lfg.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. # Natural Language Toolkit: Lexical Functional Grammar
  2. #
  3. # Author: Dan Garrette <dhgarrette@gmail.com>
  4. #
  5. # Copyright (C) 2001-2019 NLTK Project
  6. # URL: <http://nltk.org/>
  7. # For license information, see LICENSE.TXT
  8. from __future__ import print_function, division, unicode_literals
  9. from itertools import chain
  10. from nltk.internals import Counter
  11. from nltk.compat import python_2_unicode_compatible
  12. @python_2_unicode_compatible
  13. class FStructure(dict):
  14. def safeappend(self, key, item):
  15. """
  16. Append 'item' to the list at 'key'. If no list exists for 'key', then
  17. construct one.
  18. """
  19. if key not in self:
  20. self[key] = []
  21. self[key].append(item)
  22. def __setitem__(self, key, value):
  23. dict.__setitem__(self, key.lower(), value)
  24. def __getitem__(self, key):
  25. return dict.__getitem__(self, key.lower())
  26. def __contains__(self, key):
  27. return dict.__contains__(self, key.lower())
  28. def to_glueformula_list(self, glue_dict):
  29. depgraph = self.to_depgraph()
  30. return glue_dict.to_glueformula_list(depgraph)
  31. def to_depgraph(self, rel=None):
  32. from nltk.parse.dependencygraph import DependencyGraph
  33. depgraph = DependencyGraph()
  34. nodes = depgraph.nodes
  35. self._to_depgraph(nodes, 0, 'ROOT')
  36. # Add all the dependencies for all the nodes
  37. for address, node in nodes.items():
  38. for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
  39. if n2['head'] == address:
  40. relation = n2['rel']
  41. node['deps'].setdefault(relation, [])
  42. node['deps'][relation].append(n2['address'])
  43. depgraph.root = nodes[1]
  44. return depgraph
  45. def _to_depgraph(self, nodes, head, rel):
  46. index = len(nodes)
  47. nodes[index].update(
  48. {
  49. 'address': index,
  50. 'word': self.pred[0],
  51. 'tag': self.pred[1],
  52. 'head': head,
  53. 'rel': rel,
  54. }
  55. )
  56. for feature in sorted(self):
  57. for item in sorted(self[feature]):
  58. if isinstance(item, FStructure):
  59. item._to_depgraph(nodes, index, feature)
  60. elif isinstance(item, tuple):
  61. new_index = len(nodes)
  62. nodes[new_index].update(
  63. {
  64. 'address': new_index,
  65. 'word': item[0],
  66. 'tag': item[1],
  67. 'head': index,
  68. 'rel': feature,
  69. }
  70. )
  71. elif isinstance(item, list):
  72. for n in item:
  73. n._to_depgraph(nodes, index, feature)
  74. else:
  75. raise Exception(
  76. 'feature %s is not an FStruct, a list, or a tuple' % feature
  77. )
  78. @staticmethod
  79. def read_depgraph(depgraph):
  80. return FStructure._read_depgraph(depgraph.root, depgraph)
  81. @staticmethod
  82. def _read_depgraph(node, depgraph, label_counter=None, parent=None):
  83. if not label_counter:
  84. label_counter = Counter()
  85. if node['rel'].lower() in ['spec', 'punct']:
  86. # the value of a 'spec' entry is a word, not an FStructure
  87. return (node['word'], node['tag'])
  88. else:
  89. fstruct = FStructure()
  90. fstruct.pred = None
  91. fstruct.label = FStructure._make_label(label_counter.get())
  92. fstruct.parent = parent
  93. word, tag = node['word'], node['tag']
  94. if tag[:2] == 'VB':
  95. if tag[2:3] == 'D':
  96. fstruct.safeappend('tense', ('PAST', 'tense'))
  97. fstruct.pred = (word, tag[:2])
  98. if not fstruct.pred:
  99. fstruct.pred = (word, tag)
  100. children = [depgraph.nodes[idx] for idx in chain(*node['deps'].values())]
  101. for child in children:
  102. fstruct.safeappend(
  103. child['rel'],
  104. FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
  105. )
  106. return fstruct
  107. @staticmethod
  108. def _make_label(value):
  109. """
  110. Pick an alphabetic character as identifier for an entity in the model.
  111. :param value: where to index into the list of characters
  112. :type value: int
  113. """
  114. letter = [
  115. 'f',
  116. 'g',
  117. 'h',
  118. 'i',
  119. 'j',
  120. 'k',
  121. 'l',
  122. 'm',
  123. 'n',
  124. 'o',
  125. 'p',
  126. 'q',
  127. 'r',
  128. 's',
  129. 't',
  130. 'u',
  131. 'v',
  132. 'w',
  133. 'x',
  134. 'y',
  135. 'z',
  136. 'a',
  137. 'b',
  138. 'c',
  139. 'd',
  140. 'e',
  141. ][value - 1]
  142. num = int(value) // 26
  143. if num > 0:
  144. return letter + str(num)
  145. else:
  146. return letter
  147. def __repr__(self):
  148. return self.__unicode__().replace('\n', '')
  149. def __str__(self):
  150. return self.pretty_format()
  151. def pretty_format(self, indent=3):
  152. try:
  153. accum = '%s:[' % self.label
  154. except NameError:
  155. accum = '['
  156. try:
  157. accum += 'pred \'%s\'' % (self.pred[0])
  158. except NameError:
  159. pass
  160. for feature in sorted(self):
  161. for item in self[feature]:
  162. if isinstance(item, FStructure):
  163. next_indent = indent + len(feature) + 3 + len(self.label)
  164. accum += '\n%s%s %s' % (
  165. ' ' * (indent),
  166. feature,
  167. item.pretty_format(next_indent),
  168. )
  169. elif isinstance(item, tuple):
  170. accum += '\n%s%s \'%s\'' % (' ' * (indent), feature, item[0])
  171. elif isinstance(item, list):
  172. accum += '\n%s%s {%s}' % (
  173. ' ' * (indent),
  174. feature,
  175. ('\n%s' % (' ' * (indent + len(feature) + 2))).join(item),
  176. )
  177. else: # ERROR
  178. raise Exception(
  179. 'feature %s is not an FStruct, a list, or a tuple' % feature
  180. )
  181. return accum + ']'
  182. def demo_read_depgraph():
  183. from nltk.parse.dependencygraph import DependencyGraph
  184. dg1 = DependencyGraph(
  185. """\
  186. Esso NNP 2 SUB
  187. said VBD 0 ROOT
  188. the DT 5 NMOD
  189. Whiting NNP 5 NMOD
  190. field NN 6 SUB
  191. started VBD 2 VMOD
  192. production NN 6 OBJ
  193. Tuesday NNP 6 VMOD
  194. """
  195. )
  196. dg2 = DependencyGraph(
  197. """\
  198. John NNP 2 SUB
  199. sees VBP 0 ROOT
  200. Mary NNP 2 OBJ
  201. """
  202. )
  203. dg3 = DependencyGraph(
  204. """\
  205. a DT 2 SPEC
  206. man NN 3 SUBJ
  207. walks VB 0 ROOT
  208. """
  209. )
  210. dg4 = DependencyGraph(
  211. """\
  212. every DT 2 SPEC
  213. girl NN 3 SUBJ
  214. chases VB 0 ROOT
  215. a DT 5 SPEC
  216. dog NN 3 OBJ
  217. """
  218. )
  219. depgraphs = [dg1, dg2, dg3, dg4]
  220. for dg in depgraphs:
  221. print(FStructure.read_depgraph(dg))
  222. if __name__ == '__main__':
  223. demo_read_depgraph()