parse.doctest 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885
  1. .. Copyright (C) 2001-2019 NLTK Project
  2. .. For license information, see LICENSE.TXT
  3. =========
  4. Parsing
  5. =========
  6. Unit tests for the Context Free Grammar class
  7. ---------------------------------------------
  8. >>> from nltk import Nonterminal, nonterminals, Production, CFG
  9. >>> nt1 = Nonterminal('NP')
  10. >>> nt2 = Nonterminal('VP')
  11. >>> nt1.symbol()
  12. 'NP'
  13. >>> nt1 == Nonterminal('NP')
  14. True
  15. >>> nt1 == nt2
  16. False
  17. >>> S, NP, VP, PP = nonterminals('S, NP, VP, PP')
  18. >>> N, V, P, DT = nonterminals('N, V, P, DT')
  19. >>> prod1 = Production(S, [NP, VP])
  20. >>> prod2 = Production(NP, [DT, NP])
  21. >>> prod1.lhs()
  22. S
  23. >>> prod1.rhs()
  24. (NP, VP)
  25. >>> prod1 == Production(S, [NP, VP])
  26. True
  27. >>> prod1 == prod2
  28. False
  29. >>> grammar = CFG.fromstring("""
  30. ... S -> NP VP
  31. ... PP -> P NP
  32. ... NP -> 'the' N | N PP | 'the' N PP
  33. ... VP -> V NP | V PP | V NP PP
  34. ... N -> 'cat'
  35. ... N -> 'dog'
  36. ... N -> 'rug'
  37. ... V -> 'chased'
  38. ... V -> 'sat'
  39. ... P -> 'in'
  40. ... P -> 'on'
  41. ... """)
  42. Unit tests for the rd (Recursive Descent Parser) class
  43. ------------------------------------------------------
  44. Create and run a recursive descent parser over both a syntactically ambiguous
  45. and unambiguous sentence.
  46. >>> from nltk.parse import RecursiveDescentParser
  47. >>> rd = RecursiveDescentParser(grammar)
  48. >>> sentence1 = 'the cat chased the dog'.split()
  49. >>> sentence2 = 'the cat chased the dog on the rug'.split()
  50. >>> for t in rd.parse(sentence1):
  51. ... print(t)
  52. (S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
  53. >>> for t in rd.parse(sentence2):
  54. ... print(t)
  55. (S
  56. (NP the (N cat))
  57. (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
  58. (S
  59. (NP the (N cat))
  60. (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
  61. (dolist (expr doctest-font-lock-keywords)
  62. (add-to-list 'font-lock-keywords expr))
  63. font-lock-keywords
  64. (add-to-list 'font-lock-keywords
  65. (car doctest-font-lock-keywords))
  66. Unit tests for the sr (Shift Reduce Parser) class
  67. -------------------------------------------------
  68. Create and run a shift reduce parser over both a syntactically ambiguous
  69. and unambiguous sentence. Note that unlike the recursive descent parser, one
  70. and only one parse is ever returned.
  71. >>> from nltk.parse import ShiftReduceParser
  72. >>> sr = ShiftReduceParser(grammar)
  73. >>> sentence1 = 'the cat chased the dog'.split()
  74. >>> sentence2 = 'the cat chased the dog on the rug'.split()
  75. >>> for t in sr.parse(sentence1):
  76. ... print(t)
  77. (S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
  78. The shift reduce parser uses heuristics to decide what to do when there are
  79. multiple possible shift or reduce operations available - for the supplied
  80. grammar clearly the wrong operation is selected.
  81. >>> for t in sr.parse(sentence2):
  82. ... print(t)
  83. Unit tests for the Chart Parser class
  84. -------------------------------------
  85. We use the demo() function for testing.
  86. We must turn off showing of times.
  87. >>> import nltk
  88. First we test tracing with a short sentence
  89. >>> nltk.parse.chart.demo(2, print_times=False, trace=1,
  90. ... sent='I saw a dog', numparses=1)
  91. * Sentence:
  92. I saw a dog
  93. ['I', 'saw', 'a', 'dog']
  94. <BLANKLINE>
  95. * Strategy: Bottom-up
  96. <BLANKLINE>
  97. |. I . saw . a . dog .|
  98. |[---------] . . .| [0:1] 'I'
  99. |. [---------] . .| [1:2] 'saw'
  100. |. . [---------] .| [2:3] 'a'
  101. |. . . [---------]| [3:4] 'dog'
  102. |> . . . .| [0:0] NP -> * 'I'
  103. |[---------] . . .| [0:1] NP -> 'I' *
  104. |> . . . .| [0:0] S -> * NP VP
  105. |> . . . .| [0:0] NP -> * NP PP
  106. |[---------> . . .| [0:1] S -> NP * VP
  107. |[---------> . . .| [0:1] NP -> NP * PP
  108. |. > . . .| [1:1] Verb -> * 'saw'
  109. |. [---------] . .| [1:2] Verb -> 'saw' *
  110. |. > . . .| [1:1] VP -> * Verb NP
  111. |. > . . .| [1:1] VP -> * Verb
  112. |. [---------> . .| [1:2] VP -> Verb * NP
  113. |. [---------] . .| [1:2] VP -> Verb *
  114. |. > . . .| [1:1] VP -> * VP PP
  115. |[-------------------] . .| [0:2] S -> NP VP *
  116. |. [---------> . .| [1:2] VP -> VP * PP
  117. |. . > . .| [2:2] Det -> * 'a'
  118. |. . [---------] .| [2:3] Det -> 'a' *
  119. |. . > . .| [2:2] NP -> * Det Noun
  120. |. . [---------> .| [2:3] NP -> Det * Noun
  121. |. . . > .| [3:3] Noun -> * 'dog'
  122. |. . . [---------]| [3:4] Noun -> 'dog' *
  123. |. . [-------------------]| [2:4] NP -> Det Noun *
  124. |. . > . .| [2:2] S -> * NP VP
  125. |. . > . .| [2:2] NP -> * NP PP
  126. |. [-----------------------------]| [1:4] VP -> Verb NP *
  127. |. . [------------------->| [2:4] S -> NP * VP
  128. |. . [------------------->| [2:4] NP -> NP * PP
  129. |[=======================================]| [0:4] S -> NP VP *
  130. |. [----------------------------->| [1:4] VP -> VP * PP
  131. Nr edges in chart: 33
  132. (S (NP I) (VP (Verb saw) (NP (Det a) (Noun dog))))
  133. <BLANKLINE>
  134. Then we test the different parsing Strategies.
  135. Note that the number of edges differ between the strategies.
  136. Top-down
  137. >>> nltk.parse.chart.demo(1, print_times=False, trace=0,
  138. ... sent='I saw John with a dog', numparses=2)
  139. * Sentence:
  140. I saw John with a dog
  141. ['I', 'saw', 'John', 'with', 'a', 'dog']
  142. <BLANKLINE>
  143. * Strategy: Top-down
  144. <BLANKLINE>
  145. Nr edges in chart: 48
  146. (S
  147. (NP I)
  148. (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
  149. (S
  150. (NP I)
  151. (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
  152. <BLANKLINE>
  153. Bottom-up
  154. >>> nltk.parse.chart.demo(2, print_times=False, trace=0,
  155. ... sent='I saw John with a dog', numparses=2)
  156. * Sentence:
  157. I saw John with a dog
  158. ['I', 'saw', 'John', 'with', 'a', 'dog']
  159. <BLANKLINE>
  160. * Strategy: Bottom-up
  161. <BLANKLINE>
  162. Nr edges in chart: 53
  163. (S
  164. (NP I)
  165. (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
  166. (S
  167. (NP I)
  168. (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
  169. <BLANKLINE>
  170. Bottom-up Left-Corner
  171. >>> nltk.parse.chart.demo(3, print_times=False, trace=0,
  172. ... sent='I saw John with a dog', numparses=2)
  173. * Sentence:
  174. I saw John with a dog
  175. ['I', 'saw', 'John', 'with', 'a', 'dog']
  176. <BLANKLINE>
  177. * Strategy: Bottom-up left-corner
  178. <BLANKLINE>
  179. Nr edges in chart: 36
  180. (S
  181. (NP I)
  182. (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
  183. (S
  184. (NP I)
  185. (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
  186. <BLANKLINE>
  187. Left-Corner with Bottom-Up Filter
  188. >>> nltk.parse.chart.demo(4, print_times=False, trace=0,
  189. ... sent='I saw John with a dog', numparses=2)
  190. * Sentence:
  191. I saw John with a dog
  192. ['I', 'saw', 'John', 'with', 'a', 'dog']
  193. <BLANKLINE>
  194. * Strategy: Filtered left-corner
  195. <BLANKLINE>
  196. Nr edges in chart: 28
  197. (S
  198. (NP I)
  199. (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
  200. (S
  201. (NP I)
  202. (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
  203. <BLANKLINE>
  204. The stepping chart parser
  205. >>> nltk.parse.chart.demo(5, print_times=False, trace=1,
  206. ... sent='I saw John with a dog', numparses=2)
  207. * Sentence:
  208. I saw John with a dog
  209. ['I', 'saw', 'John', 'with', 'a', 'dog']
  210. <BLANKLINE>
  211. * Strategy: Stepping (top-down vs bottom-up)
  212. <BLANKLINE>
  213. *** SWITCH TO TOP DOWN
  214. |[------] . . . . .| [0:1] 'I'
  215. |. [------] . . . .| [1:2] 'saw'
  216. |. . [------] . . .| [2:3] 'John'
  217. |. . . [------] . .| [3:4] 'with'
  218. |. . . . [------] .| [4:5] 'a'
  219. |. . . . . [------]| [5:6] 'dog'
  220. |> . . . . . .| [0:0] S -> * NP VP
  221. |> . . . . . .| [0:0] NP -> * NP PP
  222. |> . . . . . .| [0:0] NP -> * Det Noun
  223. |> . . . . . .| [0:0] NP -> * 'I'
  224. |[------] . . . . .| [0:1] NP -> 'I' *
  225. |[------> . . . . .| [0:1] S -> NP * VP
  226. |[------> . . . . .| [0:1] NP -> NP * PP
  227. |. > . . . . .| [1:1] VP -> * VP PP
  228. |. > . . . . .| [1:1] VP -> * Verb NP
  229. |. > . . . . .| [1:1] VP -> * Verb
  230. |. > . . . . .| [1:1] Verb -> * 'saw'
  231. |. [------] . . . .| [1:2] Verb -> 'saw' *
  232. |. [------> . . . .| [1:2] VP -> Verb * NP
  233. |. [------] . . . .| [1:2] VP -> Verb *
  234. |[-------------] . . . .| [0:2] S -> NP VP *
  235. |. [------> . . . .| [1:2] VP -> VP * PP
  236. *** SWITCH TO BOTTOM UP
  237. |. . > . . . .| [2:2] NP -> * 'John'
  238. |. . . > . . .| [3:3] PP -> * 'with' NP
  239. |. . . > . . .| [3:3] Prep -> * 'with'
  240. |. . . . > . .| [4:4] Det -> * 'a'
  241. |. . . . . > .| [5:5] Noun -> * 'dog'
  242. |. . [------] . . .| [2:3] NP -> 'John' *
  243. |. . . [------> . .| [3:4] PP -> 'with' * NP
  244. |. . . [------] . .| [3:4] Prep -> 'with' *
  245. |. . . . [------] .| [4:5] Det -> 'a' *
  246. |. . . . . [------]| [5:6] Noun -> 'dog' *
  247. |. [-------------] . . .| [1:3] VP -> Verb NP *
  248. |[--------------------] . . .| [0:3] S -> NP VP *
  249. |. [-------------> . . .| [1:3] VP -> VP * PP
  250. |. . > . . . .| [2:2] S -> * NP VP
  251. |. . > . . . .| [2:2] NP -> * NP PP
  252. |. . . . > . .| [4:4] NP -> * Det Noun
  253. |. . [------> . . .| [2:3] S -> NP * VP
  254. |. . [------> . . .| [2:3] NP -> NP * PP
  255. |. . . . [------> .| [4:5] NP -> Det * Noun
  256. |. . . . [-------------]| [4:6] NP -> Det Noun *
  257. |. . . [--------------------]| [3:6] PP -> 'with' NP *
  258. |. [----------------------------------]| [1:6] VP -> VP PP *
  259. *** SWITCH TO TOP DOWN
  260. |. . > . . . .| [2:2] NP -> * Det Noun
  261. |. . . . > . .| [4:4] NP -> * NP PP
  262. |. . . > . . .| [3:3] VP -> * VP PP
  263. |. . . > . . .| [3:3] VP -> * Verb NP
  264. |. . . > . . .| [3:3] VP -> * Verb
  265. |[=========================================]| [0:6] S -> NP VP *
  266. |. [---------------------------------->| [1:6] VP -> VP * PP
  267. |. . [---------------------------]| [2:6] NP -> NP PP *
  268. |. . . . [------------->| [4:6] NP -> NP * PP
  269. |. [----------------------------------]| [1:6] VP -> Verb NP *
  270. |. . [--------------------------->| [2:6] S -> NP * VP
  271. |. . [--------------------------->| [2:6] NP -> NP * PP
  272. |[=========================================]| [0:6] S -> NP VP *
  273. |. [---------------------------------->| [1:6] VP -> VP * PP
  274. |. . . . . . >| [6:6] VP -> * VP PP
  275. |. . . . . . >| [6:6] VP -> * Verb NP
  276. |. . . . . . >| [6:6] VP -> * Verb
  277. *** SWITCH TO BOTTOM UP
  278. |. . . . > . .| [4:4] S -> * NP VP
  279. |. . . . [------------->| [4:6] S -> NP * VP
  280. *** SWITCH TO TOP DOWN
  281. *** SWITCH TO BOTTOM UP
  282. *** SWITCH TO TOP DOWN
  283. *** SWITCH TO BOTTOM UP
  284. *** SWITCH TO TOP DOWN
  285. *** SWITCH TO BOTTOM UP
  286. Nr edges in chart: 61
  287. (S
  288. (NP I)
  289. (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
  290. (S
  291. (NP I)
  292. (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
  293. <BLANKLINE>
  294. Unit tests for the Incremental Chart Parser class
  295. -------------------------------------------------
  296. The incremental chart parsers are defined in earleychart.py.
  297. We use the demo() function for testing. We must turn off showing of times.
  298. >>> import nltk
  299. Earley Chart Parser
  300. >>> nltk.parse.earleychart.demo(print_times=False, trace=1,
  301. ... sent='I saw John with a dog', numparses=2)
  302. * Sentence:
  303. I saw John with a dog
  304. ['I', 'saw', 'John', 'with', 'a', 'dog']
  305. <BLANKLINE>
  306. |. I . saw . John . with . a . dog .|
  307. |[------] . . . . .| [0:1] 'I'
  308. |. [------] . . . .| [1:2] 'saw'
  309. |. . [------] . . .| [2:3] 'John'
  310. |. . . [------] . .| [3:4] 'with'
  311. |. . . . [------] .| [4:5] 'a'
  312. |. . . . . [------]| [5:6] 'dog'
  313. |> . . . . . .| [0:0] S -> * NP VP
  314. |> . . . . . .| [0:0] NP -> * NP PP
  315. |> . . . . . .| [0:0] NP -> * Det Noun
  316. |> . . . . . .| [0:0] NP -> * 'I'
  317. |[------] . . . . .| [0:1] NP -> 'I' *
  318. |[------> . . . . .| [0:1] S -> NP * VP
  319. |[------> . . . . .| [0:1] NP -> NP * PP
  320. |. > . . . . .| [1:1] VP -> * VP PP
  321. |. > . . . . .| [1:1] VP -> * Verb NP
  322. |. > . . . . .| [1:1] VP -> * Verb
  323. |. > . . . . .| [1:1] Verb -> * 'saw'
  324. |. [------] . . . .| [1:2] Verb -> 'saw' *
  325. |. [------> . . . .| [1:2] VP -> Verb * NP
  326. |. [------] . . . .| [1:2] VP -> Verb *
  327. |[-------------] . . . .| [0:2] S -> NP VP *
  328. |. [------> . . . .| [1:2] VP -> VP * PP
  329. |. . > . . . .| [2:2] NP -> * NP PP
  330. |. . > . . . .| [2:2] NP -> * Det Noun
  331. |. . > . . . .| [2:2] NP -> * 'John'
  332. |. . [------] . . .| [2:3] NP -> 'John' *
  333. |. [-------------] . . .| [1:3] VP -> Verb NP *
  334. |. . [------> . . .| [2:3] NP -> NP * PP
  335. |. . . > . . .| [3:3] PP -> * 'with' NP
  336. |[--------------------] . . .| [0:3] S -> NP VP *
  337. |. [-------------> . . .| [1:3] VP -> VP * PP
  338. |. . . [------> . .| [3:4] PP -> 'with' * NP
  339. |. . . . > . .| [4:4] NP -> * NP PP
  340. |. . . . > . .| [4:4] NP -> * Det Noun
  341. |. . . . > . .| [4:4] Det -> * 'a'
  342. |. . . . [------] .| [4:5] Det -> 'a' *
  343. |. . . . [------> .| [4:5] NP -> Det * Noun
  344. |. . . . . > .| [5:5] Noun -> * 'dog'
  345. |. . . . . [------]| [5:6] Noun -> 'dog' *
  346. |. . . . [-------------]| [4:6] NP -> Det Noun *
  347. |. . . [--------------------]| [3:6] PP -> 'with' NP *
  348. |. . . . [------------->| [4:6] NP -> NP * PP
  349. |. . [---------------------------]| [2:6] NP -> NP PP *
  350. |. [----------------------------------]| [1:6] VP -> VP PP *
  351. |[=========================================]| [0:6] S -> NP VP *
  352. |. [---------------------------------->| [1:6] VP -> VP * PP
  353. |. [----------------------------------]| [1:6] VP -> Verb NP *
  354. |. . [--------------------------->| [2:6] NP -> NP * PP
  355. |[=========================================]| [0:6] S -> NP VP *
  356. |. [---------------------------------->| [1:6] VP -> VP * PP
  357. (S
  358. (NP I)
  359. (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
  360. (S
  361. (NP I)
  362. (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
  363. Unit tests for LARGE context-free grammars
  364. ------------------------------------------
  365. Reading the ATIS grammar.
  366. >>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
  367. >>> grammar
  368. <Grammar with 5517 productions>
  369. Reading the test sentences.
  370. >>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
  371. >>> sentences = nltk.parse.util.extract_test_sentences(sentences)
  372. >>> len(sentences)
  373. 98
  374. >>> testsentence = sentences[22]
  375. >>> testsentence[0]
  376. ['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']
  377. >>> testsentence[1]
  378. 17
  379. >>> sentence = testsentence[0]
  380. Now we test all different parsing strategies.
  381. Note that the number of edges differ between the strategies.
  382. Bottom-up parsing.
  383. >>> parser = nltk.parse.BottomUpChartParser(grammar)
  384. >>> chart = parser.chart_parse(sentence)
  385. >>> print((chart.num_edges()))
  386. 7661
  387. >>> print((len(list(chart.parses(grammar.start())))))
  388. 17
  389. Bottom-up Left-corner parsing.
  390. >>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar)
  391. >>> chart = parser.chart_parse(sentence)
  392. >>> print((chart.num_edges()))
  393. 4986
  394. >>> print((len(list(chart.parses(grammar.start())))))
  395. 17
  396. Left-corner parsing with bottom-up filter.
  397. >>> parser = nltk.parse.LeftCornerChartParser(grammar)
  398. >>> chart = parser.chart_parse(sentence)
  399. >>> print((chart.num_edges()))
  400. 1342
  401. >>> print((len(list(chart.parses(grammar.start())))))
  402. 17
  403. Top-down parsing.
  404. >>> parser = nltk.parse.TopDownChartParser(grammar)
  405. >>> chart = parser.chart_parse(sentence)
  406. >>> print((chart.num_edges()))
  407. 28352
  408. >>> print((len(list(chart.parses(grammar.start())))))
  409. 17
  410. Incremental Bottom-up parsing.
  411. >>> parser = nltk.parse.IncrementalBottomUpChartParser(grammar)
  412. >>> chart = parser.chart_parse(sentence)
  413. >>> print((chart.num_edges()))
  414. 7661
  415. >>> print((len(list(chart.parses(grammar.start())))))
  416. 17
  417. Incremental Bottom-up Left-corner parsing.
  418. >>> parser = nltk.parse.IncrementalBottomUpLeftCornerChartParser(grammar)
  419. >>> chart = parser.chart_parse(sentence)
  420. >>> print((chart.num_edges()))
  421. 4986
  422. >>> print((len(list(chart.parses(grammar.start())))))
  423. 17
  424. Incremental Left-corner parsing with bottom-up filter.
  425. >>> parser = nltk.parse.IncrementalLeftCornerChartParser(grammar)
  426. >>> chart = parser.chart_parse(sentence)
  427. >>> print((chart.num_edges()))
  428. 1342
  429. >>> print((len(list(chart.parses(grammar.start())))))
  430. 17
  431. Incremental Top-down parsing.
  432. >>> parser = nltk.parse.IncrementalTopDownChartParser(grammar)
  433. >>> chart = parser.chart_parse(sentence)
  434. >>> print((chart.num_edges()))
  435. 28352
  436. >>> print((len(list(chart.parses(grammar.start())))))
  437. 17
  438. Earley parsing. This is similar to the incremental top-down algorithm.
  439. >>> parser = nltk.parse.EarleyChartParser(grammar)
  440. >>> chart = parser.chart_parse(sentence)
  441. >>> print((chart.num_edges()))
  442. 28352
  443. >>> print((len(list(chart.parses(grammar.start())))))
  444. 17
  445. Unit tests for the Probabilistic CFG class
  446. ------------------------------------------
  447. >>> from nltk.corpus import treebank
  448. >>> from itertools import islice
  449. >>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2
  450. Create a set of PCFG productions.
  451. >>> grammar = PCFG.fromstring("""
  452. ... A -> B B [.3] | C B C [.7]
  453. ... B -> B D [.5] | C [.5]
  454. ... C -> 'a' [.1] | 'b' [0.9]
  455. ... D -> 'b' [1.0]
  456. ... """)
  457. >>> prod = grammar.productions()[0]
  458. >>> prod
  459. A -> B B [0.3]
  460. >>> prod.lhs()
  461. A
  462. >>> prod.rhs()
  463. (B, B)
  464. >>> print((prod.prob()))
  465. 0.3
  466. >>> grammar.start()
  467. A
  468. >>> grammar.productions()
  469. [A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
  470. Induce some productions using parsed Treebank data.
  471. >>> productions = []
  472. >>> for fileid in treebank.fileids()[:2]:
  473. ... for t in treebank.parsed_sents(fileid):
  474. ... productions += t.productions()
  475. >>> grammar = induce_pcfg(S, productions)
  476. >>> grammar
  477. <Grammar with 71 productions>
  478. >>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
  479. [PP -> IN NP [1.0]]
  480. >>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
  481. [NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
  482. >>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
  483. [JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]]
  484. >>> sorted(grammar.productions(lhs=Nonterminal('NP')))[:2]
  485. [NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]]
  486. Unit tests for the Probabilistic Chart Parse classes
  487. ----------------------------------------------------
  488. >>> tokens = "Jack saw Bob with my cookie".split()
  489. >>> grammar = toy_pcfg2
  490. >>> print(grammar)
  491. Grammar with 23 productions (start state = S)
  492. S -> NP VP [1.0]
  493. VP -> V NP [0.59]
  494. VP -> V [0.4]
  495. VP -> VP PP [0.01]
  496. NP -> Det N [0.41]
  497. NP -> Name [0.28]
  498. NP -> NP PP [0.31]
  499. PP -> P NP [1.0]
  500. V -> 'saw' [0.21]
  501. V -> 'ate' [0.51]
  502. V -> 'ran' [0.28]
  503. N -> 'boy' [0.11]
  504. N -> 'cookie' [0.12]
  505. N -> 'table' [0.13]
  506. N -> 'telescope' [0.14]
  507. N -> 'hill' [0.5]
  508. Name -> 'Jack' [0.52]
  509. Name -> 'Bob' [0.48]
  510. P -> 'with' [0.61]
  511. P -> 'under' [0.39]
  512. Det -> 'the' [0.41]
  513. Det -> 'a' [0.31]
  514. Det -> 'my' [0.28]
  515. Create several parsers using different queuing strategies and show the
  516. resulting parses.
  517. >>> from nltk.parse import pchart
  518. >>> parser = pchart.InsideChartParser(grammar)
  519. >>> for t in parser.parse(tokens):
  520. ... print(t)
  521. (S
  522. (NP (Name Jack))
  523. (VP
  524. (V saw)
  525. (NP
  526. (NP (Name Bob))
  527. (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
  528. (S
  529. (NP (Name Jack))
  530. (VP
  531. (VP (V saw) (NP (Name Bob)))
  532. (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
  533. >>> parser = pchart.RandomChartParser(grammar)
  534. >>> for t in parser.parse(tokens):
  535. ... print(t)
  536. (S
  537. (NP (Name Jack))
  538. (VP
  539. (V saw)
  540. (NP
  541. (NP (Name Bob))
  542. (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
  543. (S
  544. (NP (Name Jack))
  545. (VP
  546. (VP (V saw) (NP (Name Bob)))
  547. (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
  548. >>> parser = pchart.UnsortedChartParser(grammar)
  549. >>> for t in parser.parse(tokens):
  550. ... print(t)
  551. (S
  552. (NP (Name Jack))
  553. (VP
  554. (V saw)
  555. (NP
  556. (NP (Name Bob))
  557. (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
  558. (S
  559. (NP (Name Jack))
  560. (VP
  561. (VP (V saw) (NP (Name Bob)))
  562. (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
  563. >>> parser = pchart.LongestChartParser(grammar)
  564. >>> for t in parser.parse(tokens):
  565. ... print(t)
  566. (S
  567. (NP (Name Jack))
  568. (VP
  569. (V saw)
  570. (NP
  571. (NP (Name Bob))
  572. (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
  573. (S
  574. (NP (Name Jack))
  575. (VP
  576. (VP (V saw) (NP (Name Bob)))
  577. (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
  578. >>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
  579. >>> for t in parser.parse(tokens):
  580. ... print(t)
  581. Unit tests for the Viterbi Parse classes
  582. ----------------------------------------
  583. >>> from nltk.parse import ViterbiParser
  584. >>> tokens = "Jack saw Bob with my cookie".split()
  585. >>> grammar = toy_pcfg2
  586. Parse the tokenized sentence.
  587. >>> parser = ViterbiParser(grammar)
  588. >>> for t in parser.parse(tokens):
  589. ... print(t)
  590. (S
  591. (NP (Name Jack))
  592. (VP
  593. (V saw)
  594. (NP
  595. (NP (Name Bob))
  596. (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
  597. Unit tests for the FeatStructNonterminal class
  598. ----------------------------------------------
  599. >>> from nltk.grammar import FeatStructNonterminal
  600. >>> FeatStructNonterminal(
  601. ... pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))
  602. [agr=[gender='f', number='pl'], pos='n']
  603. >>> FeatStructNonterminal('VP[+fin]/NP[+pl]')
  604. VP[+fin]/NP[+pl]
  605. Tracing the Feature Chart Parser
  606. --------------------------------
  607. We use the featurechart.demo() function for tracing the Feature Chart Parser.
  608. >>> nltk.parse.featurechart.demo(print_times=False,
  609. ... print_grammar=True,
  610. ... parser=nltk.parse.featurechart.FeatureChartParser,
  611. ... sent='I saw John with a dog')
  612. <BLANKLINE>
  613. Grammar with 18 productions (start state = S[])
  614. S[] -> NP[] VP[]
  615. PP[] -> Prep[] NP[]
  616. NP[] -> NP[] PP[]
  617. VP[] -> VP[] PP[]
  618. VP[] -> Verb[] NP[]
  619. VP[] -> Verb[]
  620. NP[] -> Det[pl=?x] Noun[pl=?x]
  621. NP[] -> 'John'
  622. NP[] -> 'I'
  623. Det[] -> 'the'
  624. Det[] -> 'my'
  625. Det[-pl] -> 'a'
  626. Noun[-pl] -> 'dog'
  627. Noun[-pl] -> 'cookie'
  628. Verb[] -> 'ate'
  629. Verb[] -> 'saw'
  630. Prep[] -> 'with'
  631. Prep[] -> 'under'
  632. <BLANKLINE>
  633. * FeatureChartParser
  634. Sentence: I saw John with a dog
  635. |.I.s.J.w.a.d.|
  636. |[-] . . . . .| [0:1] 'I'
  637. |. [-] . . . .| [1:2] 'saw'
  638. |. . [-] . . .| [2:3] 'John'
  639. |. . . [-] . .| [3:4] 'with'
  640. |. . . . [-] .| [4:5] 'a'
  641. |. . . . . [-]| [5:6] 'dog'
  642. |[-] . . . . .| [0:1] NP[] -> 'I' *
  643. |[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
  644. |[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
  645. |. [-] . . . .| [1:2] Verb[] -> 'saw' *
  646. |. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
  647. |. [-] . . . .| [1:2] VP[] -> Verb[] *
  648. |. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
  649. |[---] . . . .| [0:2] S[] -> NP[] VP[] *
  650. |. . [-] . . .| [2:3] NP[] -> 'John' *
  651. |. . [-> . . .| [2:3] S[] -> NP[] * VP[] {}
  652. |. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
  653. |. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
  654. |. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
  655. |[-----] . . .| [0:3] S[] -> NP[] VP[] *
  656. |. . . [-] . .| [3:4] Prep[] -> 'with' *
  657. |. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
  658. |. . . . [-] .| [4:5] Det[-pl] -> 'a' *
  659. |. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
  660. |. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
  661. |. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
  662. |. . . . [--->| [4:6] S[] -> NP[] * VP[] {}
  663. |. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
  664. |. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
  665. |. . [-------]| [2:6] NP[] -> NP[] PP[] *
  666. |. [---------]| [1:6] VP[] -> VP[] PP[] *
  667. |. [--------->| [1:6] VP[] -> VP[] * PP[] {}
  668. |[===========]| [0:6] S[] -> NP[] VP[] *
  669. |. . [------->| [2:6] S[] -> NP[] * VP[] {}
  670. |. . [------->| [2:6] NP[] -> NP[] * PP[] {}
  671. |. [---------]| [1:6] VP[] -> Verb[] NP[] *
  672. |. [--------->| [1:6] VP[] -> VP[] * PP[] {}
  673. |[===========]| [0:6] S[] -> NP[] VP[] *
  674. (S[]
  675. (NP[] I)
  676. (VP[]
  677. (VP[] (Verb[] saw) (NP[] John))
  678. (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))
  679. (S[]
  680. (NP[] I)
  681. (VP[]
  682. (Verb[] saw)
  683. (NP[]
  684. (NP[] John)
  685. (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
  686. Unit tests for the Feature Chart Parser classes
  687. -----------------------------------------------
  688. The list of parsers we want to test.
  689. >>> parsers = [nltk.parse.featurechart.FeatureChartParser,
  690. ... nltk.parse.featurechart.FeatureTopDownChartParser,
  691. ... nltk.parse.featurechart.FeatureBottomUpChartParser,
  692. ... nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser,
  693. ... nltk.parse.earleychart.FeatureIncrementalChartParser,
  694. ... nltk.parse.earleychart.FeatureEarleyChartParser,
  695. ... nltk.parse.earleychart.FeatureIncrementalTopDownChartParser,
  696. ... nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser,
  697. ... nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser,
  698. ... ]
  699. A helper function that tests each parser on the given grammar and sentence.
  700. We check that the number of trees are correct, and that all parsers
  701. return the same trees. Otherwise an error is printed.
  702. >>> def unittest(grammar, sentence, nr_trees):
  703. ... sentence = sentence.split()
  704. ... trees = None
  705. ... for P in parsers:
  706. ... result = P(grammar).parse(sentence)
  707. ... result = set(tree.freeze() for tree in result)
  708. ... if len(result) != nr_trees:
  709. ... print("Wrong nr of trees:", len(result))
  710. ... elif trees is None:
  711. ... trees = result
  712. ... elif result != trees:
  713. ... print("Trees differ for parser:", P.__name__)
  714. The demo grammar from before, with an ambiguous sentence.
  715. >>> isawjohn = nltk.parse.featurechart.demo_grammar()
  716. >>> unittest(isawjohn, "I saw John with a dog with my cookie", 5)
  717. This grammar tests that variables in different grammar rules are renamed
  718. before unification. (The problematic variable is in this case ?X).
  719. >>> whatwasthat = nltk.grammar.FeatureGrammar.fromstring('''
  720. ... S[] -> NP[num=?N] VP[num=?N, slash=?X]
  721. ... NP[num=?X] -> "what"
  722. ... NP[num=?X] -> "that"
  723. ... VP[num=?P, slash=none] -> V[num=?P] NP[]
  724. ... V[num=sg] -> "was"
  725. ... ''')
  726. >>> unittest(whatwasthat, "what was that", 1)
  727. This grammar tests that the same rule can be used in different places
  728. in another rule, and that the variables are properly renamed.
  729. >>> thislovesthat = nltk.grammar.FeatureGrammar.fromstring('''
  730. ... S[] -> NP[case=nom] V[] NP[case=acc]
  731. ... NP[case=?X] -> Pron[case=?X]
  732. ... Pron[] -> "this"
  733. ... Pron[] -> "that"
  734. ... V[] -> "loves"
  735. ... ''')
  736. >>> unittest(thislovesthat, "this loves that", 1)
  737. Tests for loading feature grammar files
  738. ---------------------------------------
  739. Alternative 1: first load the grammar, then create the parser.
  740. >>> fcfg = nltk.data.load('grammars/book_grammars/feat0.fcfg')
  741. >>> fcp1 = nltk.parse.FeatureChartParser(fcfg)
  742. >>> print((type(fcp1)))
  743. <class 'nltk.parse.featurechart.FeatureChartParser'>
  744. Alternative 2: directly load the parser.
  745. >>> fcp2 = nltk.parse.load_parser('grammars/book_grammars/feat0.fcfg')
  746. >>> print((type(fcp2)))
  747. <class 'nltk.parse.featurechart.FeatureChartParser'>