dependency.doctest 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. .. Copyright (C) 2001-2019 NLTK Project
  2. .. For license information, see LICENSE.TXT
  3. ===================
  4. Dependency Grammars
  5. ===================
  6. >>> from nltk.grammar import DependencyGrammar
  7. >>> from nltk.parse import (
  8. ... DependencyGraph,
  9. ... ProjectiveDependencyParser,
  10. ... NonprojectiveDependencyParser,
  11. ... )
  12. CoNLL Data
  13. ----------
  14. >>> treebank_data = """Pierre NNP 2 NMOD
  15. ... Vinken NNP 8 SUB
  16. ... , , 2 P
  17. ... 61 CD 5 NMOD
  18. ... years NNS 6 AMOD
  19. ... old JJ 2 NMOD
  20. ... , , 2 P
  21. ... will MD 0 ROOT
  22. ... join VB 8 VC
  23. ... the DT 11 NMOD
  24. ... board NN 9 OBJ
  25. ... as IN 9 VMOD
  26. ... a DT 15 NMOD
  27. ... nonexecutive JJ 15 NMOD
  28. ... director NN 12 PMOD
  29. ... Nov. NNP 9 VMOD
  30. ... 29 CD 16 NMOD
  31. ... . . 9 VMOD
  32. ... """
  33. >>> dg = DependencyGraph(treebank_data)
  34. >>> dg.tree().pprint()
  35. (will
  36. (Vinken Pierre , (old (years 61)) ,)
  37. (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
  38. >>> for head, rel, dep in dg.triples():
  39. ... print(
  40. ... '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
  41. ... .format(h=head, r=rel, d=dep)
  42. ... )
  43. (will, MD), SUB, (Vinken, NNP)
  44. (Vinken, NNP), NMOD, (Pierre, NNP)
  45. (Vinken, NNP), P, (,, ,)
  46. (Vinken, NNP), NMOD, (old, JJ)
  47. (old, JJ), AMOD, (years, NNS)
  48. (years, NNS), NMOD, (61, CD)
  49. (Vinken, NNP), P, (,, ,)
  50. (will, MD), VC, (join, VB)
  51. (join, VB), OBJ, (board, NN)
  52. (board, NN), NMOD, (the, DT)
  53. (join, VB), VMOD, (as, IN)
  54. (as, IN), PMOD, (director, NN)
  55. (director, NN), NMOD, (a, DT)
  56. (director, NN), NMOD, (nonexecutive, JJ)
  57. (join, VB), VMOD, (Nov., NNP)
  58. (Nov., NNP), NMOD, (29, CD)
  59. (join, VB), VMOD, (., .)
  60. Using a custom cell extractor.
  61. >>> def custom_extractor(cells):
  62. ... _, tag, head, rel = cells
  63. ... return 'spam', 'spam', tag, tag, '', head, rel
  64. >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
  65. >>> dg.tree().pprint()
  66. (spam
  67. (spam spam spam (spam (spam spam)) spam)
  68. (spam (spam spam) (spam (spam spam spam)) (spam spam) spam))
  69. Custom cell extractors can take in and return an index.
  70. >>> def custom_extractor(cells, index):
  71. ... word, tag, head, rel = cells
  72. ... return (index, '{}-{}'.format(word, index), word,
  73. ... tag, tag, '', head, rel)
  74. >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
  75. >>> dg.tree().pprint()
  76. (will-8
  77. (Vinken-2 Pierre-1 ,-3 (old-6 (years-5 61-4)) ,-7)
  78. (join-9
  79. (board-11 the-10)
  80. (as-12 (director-15 a-13 nonexecutive-14))
  81. (Nov.-16 29-17)
  82. .-18))
  83. Using the dependency-parsed version of the Penn Treebank corpus sample.
  84. >>> from nltk.corpus import dependency_treebank
  85. >>> t = dependency_treebank.parsed_sents()[0]
  86. >>> print(t.to_conll(3)) # doctest: +NORMALIZE_WHITESPACE
  87. Pierre NNP 2
  88. Vinken NNP 8
  89. , , 2
  90. 61 CD 5
  91. years NNS 6
  92. old JJ 2
  93. , , 2
  94. will MD 0
  95. join VB 8
  96. the DT 11
  97. board NN 9
  98. as IN 9
  99. a DT 15
  100. nonexecutive JJ 15
  101. director NN 12
  102. Nov. NNP 9
  103. 29 CD 16
  104. . . 8
  105. Using the output of zpar (like Malt-TAB but with zero-based indexing)
  106. >>> zpar_data = """
  107. ... Pierre NNP 1 NMOD
  108. ... Vinken NNP 7 SUB
  109. ... , , 1 P
  110. ... 61 CD 4 NMOD
  111. ... years NNS 5 AMOD
  112. ... old JJ 1 NMOD
  113. ... , , 1 P
  114. ... will MD -1 ROOT
  115. ... join VB 7 VC
  116. ... the DT 10 NMOD
  117. ... board NN 8 OBJ
  118. ... as IN 8 VMOD
  119. ... a DT 14 NMOD
  120. ... nonexecutive JJ 14 NMOD
  121. ... director NN 11 PMOD
  122. ... Nov. NNP 8 VMOD
  123. ... 29 CD 15 NMOD
  124. ... . . 7 P
  125. ... """
  126. >>> zdg = DependencyGraph(zpar_data, zero_based=True)
  127. >>> print(zdg.tree())
  128. (will
  129. (Vinken Pierre , (old (years 61)) ,)
  130. (join (board the) (as (director a nonexecutive)) (Nov. 29))
  131. .)
  132. Projective Dependency Parsing
  133. -----------------------------
  134. >>> grammar = DependencyGrammar.fromstring("""
  135. ... 'fell' -> 'price' | 'stock'
  136. ... 'price' -> 'of' 'the'
  137. ... 'of' -> 'stock'
  138. ... 'stock' -> 'the'
  139. ... """)
  140. >>> print(grammar)
  141. Dependency grammar with 5 productions
  142. 'fell' -> 'price'
  143. 'fell' -> 'stock'
  144. 'price' -> 'of' 'the'
  145. 'of' -> 'stock'
  146. 'stock' -> 'the'
  147. >>> dp = ProjectiveDependencyParser(grammar)
  148. >>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
  149. ... print(t)
  150. (fell (price the (of (stock the))))
  151. (fell (price the of) (stock the))
  152. (fell (price the of the) stock)
  153. Non-Projective Dependency Parsing
  154. ---------------------------------
  155. >>> grammar = DependencyGrammar.fromstring("""
  156. ... 'taught' -> 'play' | 'man'
  157. ... 'man' -> 'the'
  158. ... 'play' -> 'golf' | 'dog' | 'to'
  159. ... 'dog' -> 'his'
  160. ... """)
  161. >>> print(grammar)
  162. Dependency grammar with 7 productions
  163. 'taught' -> 'play'
  164. 'taught' -> 'man'
  165. 'man' -> 'the'
  166. 'play' -> 'golf'
  167. 'play' -> 'dog'
  168. 'play' -> 'to'
  169. 'dog' -> 'his'
  170. >>> dp = NonprojectiveDependencyParser(grammar)
  171. >>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
  172. >>> print(g.root['word'])
  173. taught
  174. >>> for _, node in sorted(g.nodes.items()):
  175. ... if node['word'] is not None:
  176. ... print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
  177. 1 the: []
  178. 2 man: [1]
  179. 3 taught: [2, 7]
  180. 4 his: []
  181. 5 dog: [4]
  182. 6 to: []
  183. 7 play: [5, 6, 8]
  184. 8 golf: []
  185. >>> print(g.tree())
  186. (taught (man the) (play (dog his) to golf))
  187. Integration with MALT parser
  188. ============================
  189. In case the top relation is different from the default, we can set it. In case
  190. of MALT parser, it's set to `'null'`.
  191. >>> dg_str = """1 I _ NN NN _ 2 nn _ _
  192. ... 2 shot _ NN NN _ 0 null _ _
  193. ... 3 an _ AT AT _ 2 dep _ _
  194. ... 4 elephant _ NN NN _ 7 nn _ _
  195. ... 5 in _ NN NN _ 7 nn _ _
  196. ... 6 my _ NN NN _ 7 nn _ _
  197. ... 7 pajamas _ NNS NNS _ 3 dobj _ _
  198. ... """
  199. >>> dg = DependencyGraph(dg_str, top_relation_label='null')
  200. >>> len(dg.nodes)
  201. 8
  202. >>> dg.root['word'], dg.root['address']
  203. ('shot', 2)
  204. >>> print(dg.to_conll(10)) # doctest: +NORMALIZE_WHITESPACE
  205. 1 I _ NN NN _ 2 nn _ _
  206. 2 shot _ NN NN _ 0 null _ _
  207. 3 an _ AT AT _ 2 dep _ _
  208. 4 elephant _ NN NN _ 7 nn _ _
  209. 5 in _ NN NN _ 7 nn _ _
  210. 6 my _ NN NN _ 7 nn _ _
  211. 7 pajamas _ NNS NNS _ 3 dobj _ _