ccg.doctest 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. .. Copyright (C) 2001-2019 NLTK Project
  2. .. For license information, see LICENSE.TXT
  3. ==============================
  4. Combinatory Categorial Grammar
  5. ==============================
  6. Relative Clauses
  7. ----------------
  8. >>> from nltk.ccg import chart, lexicon
  9. Construct a lexicon:
  10. >>> lex = lexicon.parseLexicon('''
  11. ... :- S, NP, N, VP
  12. ...
  13. ... Det :: NP/N
  14. ... Pro :: NP
  15. ... Modal :: S\\NP/VP
  16. ...
  17. ... TV :: VP/NP
  18. ... DTV :: TV/NP
  19. ...
  20. ... the => Det
  21. ...
  22. ... that => Det
  23. ... that => NP
  24. ...
  25. ... I => Pro
  26. ... you => Pro
  27. ... we => Pro
  28. ...
  29. ... chef => N
  30. ... cake => N
  31. ... children => N
  32. ... dough => N
  33. ...
  34. ... will => Modal
  35. ... should => Modal
  36. ... might => Modal
  37. ... must => Modal
  38. ...
  39. ... and => var\\.,var/.,var
  40. ...
  41. ... to => VP[to]/VP
  42. ...
  43. ... without => (VP\\VP)/VP[ing]
  44. ...
  45. ... be => TV
  46. ... cook => TV
  47. ... eat => TV
  48. ...
  49. ... cooking => VP[ing]/NP
  50. ...
  51. ... give => DTV
  52. ...
  53. ... is => (S\\NP)/NP
  54. ... prefer => (S\\NP)/NP
  55. ...
  56. ... which => (N\\N)/(S/NP)
  57. ...
  58. ... persuade => (VP/VP[to])/NP
  59. ... ''')
  60. >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
  61. >>> for parse in parser.parse("you prefer that cake".split()):
  62. ... chart.printCCGDerivation(parse)
  63. ... break
  64. ...
  65. you prefer that cake
  66. NP ((S\NP)/NP) (NP/N) N
  67. -------------->
  68. NP
  69. --------------------------->
  70. (S\NP)
  71. --------------------------------<
  72. S
  73. >>> for parse in parser.parse("that is the cake which you prefer".split()):
  74. ... chart.printCCGDerivation(parse)
  75. ... break
  76. ...
  77. that is the cake which you prefer
  78. NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/NP)
  79. ----->T
  80. (S/(S\NP))
  81. ------------------>B
  82. (S/NP)
  83. ---------------------------------->
  84. (N\N)
  85. ----------------------------------------<
  86. N
  87. ------------------------------------------------>
  88. NP
  89. ------------------------------------------------------------->
  90. (S\NP)
  91. -------------------------------------------------------------------<
  92. S
  93. Some other sentences to try:
  94. "that is the cake which we will persuade the chef to cook"
  95. "that is the cake which we will persuade the chef to give the children"
  96. >>> sent = "that is the dough which you will eat without cooking".split()
  97. >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet +
  98. ... chart.CompositionRuleSet + chart.TypeRaiseRuleSet)
  99. Without Substitution (no output)
  100. >>> for parse in nosub_parser.parse(sent):
  101. ... chart.printCCGDerivation(parse)
  102. With Substitution:
  103. >>> for parse in parser.parse(sent):
  104. ... chart.printCCGDerivation(parse)
  105. ... break
  106. ...
  107. that is the dough which you will eat without cooking
  108. NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/VP) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
  109. ----->T
  110. (S/(S\NP))
  111. ------------------------------------->B
  112. ((VP\VP)/NP)
  113. ----------------------------------------------<Sx
  114. (VP/NP)
  115. ----------------------------------------------------------->B
  116. ((S\NP)/NP)
  117. ---------------------------------------------------------------->B
  118. (S/NP)
  119. -------------------------------------------------------------------------------->
  120. (N\N)
  121. ---------------------------------------------------------------------------------------<
  122. N
  123. ----------------------------------------------------------------------------------------------->
  124. NP
  125. ------------------------------------------------------------------------------------------------------------>
  126. (S\NP)
  127. ------------------------------------------------------------------------------------------------------------------<
  128. S
  129. Conjunction
  130. -----------
  131. >>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet
  132. >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation
  133. >>> from nltk.ccg import lexicon
  134. Lexicons for the tests:
  135. >>> test1_lex = '''
  136. ... :- S,N,NP,VP
  137. ... I => NP
  138. ... you => NP
  139. ... will => S\\NP/VP
  140. ... cook => VP/NP
  141. ... which => (N\\N)/(S/NP)
  142. ... and => var\\.,var/.,var
  143. ... might => S\\NP/VP
  144. ... eat => VP/NP
  145. ... the => NP/N
  146. ... mushrooms => N
  147. ... parsnips => N'''
  148. >>> test2_lex = '''
  149. ... :- N, S, NP, VP
  150. ... articles => N
  151. ... the => NP/N
  152. ... and => var\\.,var/.,var
  153. ... which => (N\\N)/(S/NP)
  154. ... I => NP
  155. ... anyone => NP
  156. ... will => (S/VP)\\NP
  157. ... file => VP/NP
  158. ... without => (VP\\VP)/VP[ing]
  159. ... forget => VP/NP
  160. ... reading => VP[ing]/NP
  161. ... '''
  162. Tests handling of conjunctions.
  163. Note that while the two derivations are different, they are semantically equivalent.
  164. >>> lex = lexicon.parseLexicon(test1_lex)
  165. >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
  166. >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
  167. ... printCCGDerivation(parse)
  168. I will cook and might eat the mushrooms and parsnips
  169. NP ((S\NP)/VP) (VP/NP) ((_var0\.,_var0)/.,_var0) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var0\.,_var0)/.,_var0) N
  170. ---------------------->B
  171. ((S\NP)/NP)
  172. ---------------------->B
  173. ((S\NP)/NP)
  174. ------------------------------------------------->
  175. (((S\NP)/NP)\.,((S\NP)/NP))
  176. -----------------------------------------------------------------------<
  177. ((S\NP)/NP)
  178. ------------------------------------->
  179. (N\.,N)
  180. ------------------------------------------------<
  181. N
  182. -------------------------------------------------------->
  183. NP
  184. ------------------------------------------------------------------------------------------------------------------------------->
  185. (S\NP)
  186. -----------------------------------------------------------------------------------------------------------------------------------<
  187. S
  188. I will cook and might eat the mushrooms and parsnips
  189. NP ((S\NP)/VP) (VP/NP) ((_var0\.,_var0)/.,_var0) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var0\.,_var0)/.,_var0) N
  190. ---------------------->B
  191. ((S\NP)/NP)
  192. ---------------------->B
  193. ((S\NP)/NP)
  194. ------------------------------------------------->
  195. (((S\NP)/NP)\.,((S\NP)/NP))
  196. -----------------------------------------------------------------------<
  197. ((S\NP)/NP)
  198. ------------------------------------------------------------------------------->B
  199. ((S\NP)/N)
  200. ------------------------------------->
  201. (N\.,N)
  202. ------------------------------------------------<
  203. N
  204. ------------------------------------------------------------------------------------------------------------------------------->
  205. (S\NP)
  206. -----------------------------------------------------------------------------------------------------------------------------------<
  207. S
  208. Tests handling subject extraction.
  209. Interesting to point that the two parses are clearly semantically different.
  210. >>> lex = lexicon.parseLexicon(test2_lex)
  211. >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
  212. >>> for parse in parser.parse("articles which I will file and forget without reading".split()):
  213. ... printCCGDerivation(parse)
  214. articles which I will file and forget without reading
  215. N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var0\.,_var0)/.,_var0) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
  216. -----------------<
  217. (S/VP)
  218. ------------------------------------->B
  219. ((VP\VP)/NP)
  220. ----------------------------------------------<Sx
  221. (VP/NP)
  222. ------------------------------------------------------------------------->
  223. ((VP/NP)\.,(VP/NP))
  224. ----------------------------------------------------------------------------------<
  225. (VP/NP)
  226. --------------------------------------------------------------------------------------------------->B
  227. (S/NP)
  228. ------------------------------------------------------------------------------------------------------------------->
  229. (N\N)
  230. -----------------------------------------------------------------------------------------------------------------------------<
  231. N
  232. articles which I will file and forget without reading
  233. N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var0\.,_var0)/.,_var0) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
  234. -----------------<
  235. (S/VP)
  236. ------------------------------------>
  237. ((VP/NP)\.,(VP/NP))
  238. ---------------------------------------------<
  239. (VP/NP)
  240. ------------------------------------->B
  241. ((VP\VP)/NP)
  242. ----------------------------------------------------------------------------------<Sx
  243. (VP/NP)
  244. --------------------------------------------------------------------------------------------------->B
  245. (S/NP)
  246. ------------------------------------------------------------------------------------------------------------------->
  247. (N\N)
  248. -----------------------------------------------------------------------------------------------------------------------------<
  249. N
  250. Unicode support
  251. ---------------
  252. Unicode words are supported.
  253. >>> from nltk.ccg import chart, lexicon
  254. Lexicons for the tests:
  255. >>> lex = lexicon.parseLexicon(u'''
  256. ... :- S, N, NP, PP
  257. ...
  258. ... AdjI :: N\\N
  259. ... AdjD :: N/N
  260. ... AdvD :: S/S
  261. ... AdvI :: S\\S
  262. ... Det :: NP/N
  263. ... PrepNPCompl :: PP/NP
  264. ... PrepNAdjN :: S\\S/N
  265. ... PrepNAdjNP :: S\\S/NP
  266. ... VPNP :: S\\NP/NP
  267. ... VPPP :: S\\NP/PP
  268. ... VPser :: S\\NP/AdjI
  269. ...
  270. ... auto => N
  271. ... bebidas => N
  272. ... cine => N
  273. ... ley => N
  274. ... libro => N
  275. ... ministro => N
  276. ... panadería => N
  277. ... presidente => N
  278. ... super => N
  279. ...
  280. ... el => Det
  281. ... la => Det
  282. ... las => Det
  283. ... un => Det
  284. ...
  285. ... Ana => NP
  286. ... Pablo => NP
  287. ...
  288. ... y => var\\.,var/.,var
  289. ...
  290. ... pero => (S/NP)\\(S/NP)/(S/NP)
  291. ...
  292. ... anunció => VPNP
  293. ... compró => VPNP
  294. ... cree => S\\NP/S[dep]
  295. ... desmintió => VPNP
  296. ... lee => VPNP
  297. ... fueron => VPPP
  298. ...
  299. ... es => VPser
  300. ...
  301. ... interesante => AdjD
  302. ... interesante => AdjI
  303. ... nueva => AdjD
  304. ... nueva => AdjI
  305. ...
  306. ... a => PrepNPCompl
  307. ... en => PrepNAdjN
  308. ... en => PrepNAdjNP
  309. ...
  310. ... ayer => AdvI
  311. ...
  312. ... que => (NP\\NP)/(S/NP)
  313. ... que => S[dep]/S
  314. ... ''')
  315. >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
  316. >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()):
  317. ... printCCGDerivation(parse) # doctest: +SKIP
  318. ... # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354
  319. ... break
  320. el ministro anunció pero el presidente desmintió la nueva ley
  321. (NP/N) N ((S\NP)/NP) (((S/NP)\(S/NP))/(S/NP)) (NP/N) N ((S\NP)/NP) (NP/N) (N/N) N
  322. ------------------>
  323. NP
  324. ------------------>T
  325. (S/(S\NP))
  326. -------------------->
  327. NP
  328. -------------------->T
  329. (S/(S\NP))
  330. --------------------------------->B
  331. (S/NP)
  332. ----------------------------------------------------------->
  333. ((S/NP)\(S/NP))
  334. ------------>
  335. N
  336. -------------------->
  337. NP
  338. --------------------<T
  339. (S\(S/NP))
  340. -------------------------------------------------------------------------------<B
  341. (S\(S/NP))
  342. --------------------------------------------------------------------------------------------<B
  343. (S/NP)
  344. -------------------------------------------------------------------------------------------------------------->
  345. S