api.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. # Natural Language Toolkit: CCG Categories
  2. #
  3. # Copyright (C) 2001-2019 NLTK Project
  4. # Author: Graeme Gange <ggange@csse.unimelb.edu.au>
  5. # URL: <http://nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. from __future__ import unicode_literals
  8. from functools import total_ordering
  9. from abc import ABCMeta, abstractmethod
  10. from six import add_metaclass
  11. from nltk.internals import raise_unorderable_types
  12. from nltk.compat import python_2_unicode_compatible, unicode_repr
  13. @add_metaclass(ABCMeta)
  14. @total_ordering
  15. class AbstractCCGCategory(object):
  16. '''
  17. Interface for categories in combinatory grammars.
  18. '''
  19. @abstractmethod
  20. def is_primitive(self):
  21. """
  22. Returns true if the category is primitive.
  23. """
  24. @abstractmethod
  25. def is_function(self):
  26. """
  27. Returns true if the category is a function application.
  28. """
  29. @abstractmethod
  30. def is_var(self):
  31. """
  32. Returns true if the category is a variable.
  33. """
  34. @abstractmethod
  35. def substitute(self, substitutions):
  36. """
  37. Takes a set of (var, category) substitutions, and replaces every
  38. occurrence of the variable with the corresponding category.
  39. """
  40. @abstractmethod
  41. def can_unify(self, other):
  42. """
  43. Determines whether two categories can be unified.
  44. - Returns None if they cannot be unified
  45. - Returns a list of necessary substitutions if they can.
  46. """
  47. # Utility functions: comparison, strings and hashing.
  48. @abstractmethod
  49. def __str__(self):
  50. pass
  51. def __eq__(self, other):
  52. return (
  53. self.__class__ is other.__class__
  54. and self._comparison_key == other._comparison_key
  55. )
  56. def __ne__(self, other):
  57. return not self == other
  58. def __lt__(self, other):
  59. if not isinstance(other, AbstractCCGCategory):
  60. raise_unorderable_types("<", self, other)
  61. if self.__class__ is other.__class__:
  62. return self._comparison_key < other._comparison_key
  63. else:
  64. return self.__class__.__name__ < other.__class__.__name__
  65. def __hash__(self):
  66. try:
  67. return self._hash
  68. except AttributeError:
  69. self._hash = hash(self._comparison_key)
  70. return self._hash
  71. @python_2_unicode_compatible
  72. class CCGVar(AbstractCCGCategory):
  73. '''
  74. Class representing a variable CCG category.
  75. Used for conjunctions (and possibly type-raising, if implemented as a
  76. unary rule).
  77. '''
  78. _maxID = 0
  79. def __init__(self, prim_only=False):
  80. """Initialize a variable (selects a new identifier)
  81. :param prim_only: a boolean that determines whether the variable is
  82. restricted to primitives
  83. :type prim_only: bool
  84. """
  85. self._id = self.new_id()
  86. self._prim_only = prim_only
  87. self._comparison_key = self._id
  88. @classmethod
  89. def new_id(cls):
  90. """
  91. A class method allowing generation of unique variable identifiers.
  92. """
  93. cls._maxID = cls._maxID + 1
  94. return cls._maxID - 1
  95. @classmethod
  96. def reset_id(cls):
  97. cls._maxID = 0
  98. def is_primitive(self):
  99. return False
  100. def is_function(self):
  101. return False
  102. def is_var(self):
  103. return True
  104. def substitute(self, substitutions):
  105. """If there is a substitution corresponding to this variable,
  106. return the substituted category.
  107. """
  108. for (var, cat) in substitutions:
  109. if var == self:
  110. return cat
  111. return self
  112. def can_unify(self, other):
  113. """ If the variable can be replaced with other
  114. a substitution is returned.
  115. """
  116. if other.is_primitive() or not self._prim_only:
  117. return [(self, other)]
  118. return None
  119. def id(self):
  120. return self._id
  121. def __str__(self):
  122. return "_var" + str(self._id)
  123. @total_ordering
  124. @python_2_unicode_compatible
  125. class Direction(object):
  126. '''
  127. Class representing the direction of a function application.
  128. Also contains maintains information as to which combinators
  129. may be used with the category.
  130. '''
  131. def __init__(self, dir, restrictions):
  132. self._dir = dir
  133. self._restrs = restrictions
  134. self._comparison_key = (dir, tuple(restrictions))
  135. # Testing the application direction
  136. def is_forward(self):
  137. return self._dir == '/'
  138. def is_backward(self):
  139. return self._dir == '\\'
  140. def dir(self):
  141. return self._dir
  142. def restrs(self):
  143. """A list of restrictions on the combinators.
  144. '.' denotes that permuting operations are disallowed
  145. ',' denotes that function composition is disallowed
  146. '_' denotes that the direction has variable restrictions.
  147. (This is redundant in the current implementation of type-raising)
  148. """
  149. return self._restrs
  150. def is_variable(self):
  151. return self._restrs == '_'
  152. # Unification and substitution of variable directions.
  153. # Used only if type-raising is implemented as a unary rule, as it
  154. # must inherit restrictions from the argument category.
  155. def can_unify(self, other):
  156. if other.is_variable():
  157. return [('_', self.restrs())]
  158. elif self.is_variable():
  159. return [('_', other.restrs())]
  160. else:
  161. if self.restrs() == other.restrs():
  162. return []
  163. return None
  164. def substitute(self, subs):
  165. if not self.is_variable():
  166. return self
  167. for (var, restrs) in subs:
  168. if var == '_':
  169. return Direction(self._dir, restrs)
  170. return self
  171. # Testing permitted combinators
  172. def can_compose(self):
  173. return ',' not in self._restrs
  174. def can_cross(self):
  175. return '.' not in self._restrs
  176. def __eq__(self, other):
  177. return (
  178. self.__class__ is other.__class__
  179. and self._comparison_key == other._comparison_key
  180. )
  181. def __ne__(self, other):
  182. return not self == other
  183. def __lt__(self, other):
  184. if not isinstance(other, Direction):
  185. raise_unorderable_types("<", self, other)
  186. if self.__class__ is other.__class__:
  187. return self._comparison_key < other._comparison_key
  188. else:
  189. return self.__class__.__name__ < other.__class__.__name__
  190. def __hash__(self):
  191. try:
  192. return self._hash
  193. except AttributeError:
  194. self._hash = hash(self._comparison_key)
  195. return self._hash
  196. def __str__(self):
  197. r_str = ""
  198. for r in self._restrs:
  199. r_str = r_str + "%s" % r
  200. return "%s%s" % (self._dir, r_str)
  201. # The negation operator reverses the direction of the application
  202. def __neg__(self):
  203. if self._dir == '/':
  204. return Direction('\\', self._restrs)
  205. else:
  206. return Direction('/', self._restrs)
  207. @python_2_unicode_compatible
  208. class PrimitiveCategory(AbstractCCGCategory):
  209. '''
  210. Class representing primitive categories.
  211. Takes a string representation of the category, and a
  212. list of strings specifying the morphological subcategories.
  213. '''
  214. def __init__(self, categ, restrictions=[]):
  215. self._categ = categ
  216. self._restrs = restrictions
  217. self._comparison_key = (categ, tuple(restrictions))
  218. def is_primitive(self):
  219. return True
  220. def is_function(self):
  221. return False
  222. def is_var(self):
  223. return False
  224. def restrs(self):
  225. return self._restrs
  226. def categ(self):
  227. return self._categ
  228. # Substitution does nothing to a primitive category
  229. def substitute(self, subs):
  230. return self
  231. # A primitive can be unified with a class of the same
  232. # base category, given that the other category shares all
  233. # of its subclasses, or with a variable.
  234. def can_unify(self, other):
  235. if not other.is_primitive():
  236. return None
  237. if other.is_var():
  238. return [(other, self)]
  239. if other.categ() == self.categ():
  240. for restr in self._restrs:
  241. if restr not in other.restrs():
  242. return None
  243. return []
  244. return None
  245. def __str__(self):
  246. if self._restrs == []:
  247. return "%s" % self._categ
  248. restrictions = "[%s]" % ",".join(unicode_repr(r) for r in self._restrs)
  249. return "%s%s" % (self._categ, restrictions)
  250. @python_2_unicode_compatible
  251. class FunctionalCategory(AbstractCCGCategory):
  252. '''
  253. Class that represents a function application category.
  254. Consists of argument and result categories, together with
  255. an application direction.
  256. '''
  257. def __init__(self, res, arg, dir):
  258. self._res = res
  259. self._arg = arg
  260. self._dir = dir
  261. self._comparison_key = (arg, dir, res)
  262. def is_primitive(self):
  263. return False
  264. def is_function(self):
  265. return True
  266. def is_var(self):
  267. return False
  268. # Substitution returns the category consisting of the
  269. # substitution applied to each of its constituents.
  270. def substitute(self, subs):
  271. sub_res = self._res.substitute(subs)
  272. sub_dir = self._dir.substitute(subs)
  273. sub_arg = self._arg.substitute(subs)
  274. return FunctionalCategory(sub_res, sub_arg, self._dir)
  275. # A function can unify with another function, so long as its
  276. # constituents can unify, or with an unrestricted variable.
  277. def can_unify(self, other):
  278. if other.is_var():
  279. return [(other, self)]
  280. if other.is_function():
  281. sa = self._res.can_unify(other.res())
  282. sd = self._dir.can_unify(other.dir())
  283. if sa is not None and sd is not None:
  284. sb = self._arg.substitute(sa).can_unify(other.arg().substitute(sa))
  285. if sb is not None:
  286. return sa + sb
  287. return None
  288. # Constituent accessors
  289. def arg(self):
  290. return self._arg
  291. def res(self):
  292. return self._res
  293. def dir(self):
  294. return self._dir
  295. def __str__(self):
  296. return "(%s%s%s)" % (self._res, self._dir, self._arg)