123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377 |
- .. Copyright (C) 2001-2019 NLTK Project
- .. For license information, see LICENSE.TXT
- ==============================
- Combinatory Categorial Grammar
- ==============================
- Relative Clauses
- ----------------
- >>> from nltk.ccg import chart, lexicon
- Construct a lexicon:
- >>> lex = lexicon.parseLexicon('''
- ... :- S, NP, N, VP
- ...
- ... Det :: NP/N
- ... Pro :: NP
- ... Modal :: S\\NP/VP
- ...
- ... TV :: VP/NP
- ... DTV :: TV/NP
- ...
- ... the => Det
- ...
- ... that => Det
- ... that => NP
- ...
- ... I => Pro
- ... you => Pro
- ... we => Pro
- ...
- ... chef => N
- ... cake => N
- ... children => N
- ... dough => N
- ...
- ... will => Modal
- ... should => Modal
- ... might => Modal
- ... must => Modal
- ...
- ... and => var\\.,var/.,var
- ...
- ... to => VP[to]/VP
- ...
- ... without => (VP\\VP)/VP[ing]
- ...
- ... be => TV
- ... cook => TV
- ... eat => TV
- ...
- ... cooking => VP[ing]/NP
- ...
- ... give => DTV
- ...
- ... is => (S\\NP)/NP
- ... prefer => (S\\NP)/NP
- ...
- ... which => (N\\N)/(S/NP)
- ...
- ... persuade => (VP/VP[to])/NP
- ... ''')
- >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
- >>> for parse in parser.parse("you prefer that cake".split()):
- ... chart.printCCGDerivation(parse)
- ... break
- ...
- you prefer that cake
- NP ((S\NP)/NP) (NP/N) N
- -------------->
- NP
- --------------------------->
- (S\NP)
- --------------------------------<
- S
- >>> for parse in parser.parse("that is the cake which you prefer".split()):
- ... chart.printCCGDerivation(parse)
- ... break
- ...
- that is the cake which you prefer
- NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/NP)
- ----->T
- (S/(S\NP))
- ------------------>B
- (S/NP)
- ---------------------------------->
- (N\N)
- ----------------------------------------<
- N
- ------------------------------------------------>
- NP
- ------------------------------------------------------------->
- (S\NP)
- -------------------------------------------------------------------<
- S
- Some other sentences to try:
- "that is the cake which we will persuade the chef to cook"
- "that is the cake which we will persuade the chef to give the children"
- >>> sent = "that is the dough which you will eat without cooking".split()
- >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet +
- ... chart.CompositionRuleSet + chart.TypeRaiseRuleSet)
- Without Substitution (no output)
- >>> for parse in nosub_parser.parse(sent):
- ... chart.printCCGDerivation(parse)
- With Substitution:
- >>> for parse in parser.parse(sent):
- ... chart.printCCGDerivation(parse)
- ... break
- ...
- that is the dough which you will eat without cooking
- NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/VP) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
- ----->T
- (S/(S\NP))
- ------------------------------------->B
- ((VP\VP)/NP)
- ----------------------------------------------<Sx
- (VP/NP)
- ----------------------------------------------------------->B
- ((S\NP)/NP)
- ---------------------------------------------------------------->B
- (S/NP)
- -------------------------------------------------------------------------------->
- (N\N)
- ---------------------------------------------------------------------------------------<
- N
- ----------------------------------------------------------------------------------------------->
- NP
- ------------------------------------------------------------------------------------------------------------>
- (S\NP)
- ------------------------------------------------------------------------------------------------------------------<
- S
- Conjunction
- -----------
- >>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet
- >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation
- >>> from nltk.ccg import lexicon
- Lexicons for the tests:
- >>> test1_lex = '''
- ... :- S,N,NP,VP
- ... I => NP
- ... you => NP
- ... will => S\\NP/VP
- ... cook => VP/NP
- ... which => (N\\N)/(S/NP)
- ... and => var\\.,var/.,var
- ... might => S\\NP/VP
- ... eat => VP/NP
- ... the => NP/N
- ... mushrooms => N
- ... parsnips => N'''
- >>> test2_lex = '''
- ... :- N, S, NP, VP
- ... articles => N
- ... the => NP/N
- ... and => var\\.,var/.,var
- ... which => (N\\N)/(S/NP)
- ... I => NP
- ... anyone => NP
- ... will => (S/VP)\\NP
- ... file => VP/NP
- ... without => (VP\\VP)/VP[ing]
- ... forget => VP/NP
- ... reading => VP[ing]/NP
- ... '''
- Tests handling of conjunctions.
- Note that while the two derivations are different, they are semantically equivalent.
- >>> lex = lexicon.parseLexicon(test1_lex)
- >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
- >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
- ... printCCGDerivation(parse)
- I will cook and might eat the mushrooms and parsnips
- NP ((S\NP)/VP) (VP/NP) ((_var0\.,_var0)/.,_var0) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var0\.,_var0)/.,_var0) N
- ---------------------->B
- ((S\NP)/NP)
- ---------------------->B
- ((S\NP)/NP)
- ------------------------------------------------->
- (((S\NP)/NP)\.,((S\NP)/NP))
- -----------------------------------------------------------------------<
- ((S\NP)/NP)
- ------------------------------------->
- (N\.,N)
- ------------------------------------------------<
- N
- -------------------------------------------------------->
- NP
- ------------------------------------------------------------------------------------------------------------------------------->
- (S\NP)
- -----------------------------------------------------------------------------------------------------------------------------------<
- S
- I will cook and might eat the mushrooms and parsnips
- NP ((S\NP)/VP) (VP/NP) ((_var0\.,_var0)/.,_var0) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var0\.,_var0)/.,_var0) N
- ---------------------->B
- ((S\NP)/NP)
- ---------------------->B
- ((S\NP)/NP)
- ------------------------------------------------->
- (((S\NP)/NP)\.,((S\NP)/NP))
- -----------------------------------------------------------------------<
- ((S\NP)/NP)
- ------------------------------------------------------------------------------->B
- ((S\NP)/N)
- ------------------------------------->
- (N\.,N)
- ------------------------------------------------<
- N
- ------------------------------------------------------------------------------------------------------------------------------->
- (S\NP)
- -----------------------------------------------------------------------------------------------------------------------------------<
- S
- Tests handling subject extraction.
- Interesting to point that the two parses are clearly semantically different.
- >>> lex = lexicon.parseLexicon(test2_lex)
- >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
- >>> for parse in parser.parse("articles which I will file and forget without reading".split()):
- ... printCCGDerivation(parse)
- articles which I will file and forget without reading
- N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var0\.,_var0)/.,_var0) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
- -----------------<
- (S/VP)
- ------------------------------------->B
- ((VP\VP)/NP)
- ----------------------------------------------<Sx
- (VP/NP)
- ------------------------------------------------------------------------->
- ((VP/NP)\.,(VP/NP))
- ----------------------------------------------------------------------------------<
- (VP/NP)
- --------------------------------------------------------------------------------------------------->B
- (S/NP)
- ------------------------------------------------------------------------------------------------------------------->
- (N\N)
- -----------------------------------------------------------------------------------------------------------------------------<
- N
- articles which I will file and forget without reading
- N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var0\.,_var0)/.,_var0) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
- -----------------<
- (S/VP)
- ------------------------------------>
- ((VP/NP)\.,(VP/NP))
- ---------------------------------------------<
- (VP/NP)
- ------------------------------------->B
- ((VP\VP)/NP)
- ----------------------------------------------------------------------------------<Sx
- (VP/NP)
- --------------------------------------------------------------------------------------------------->B
- (S/NP)
- ------------------------------------------------------------------------------------------------------------------->
- (N\N)
- -----------------------------------------------------------------------------------------------------------------------------<
- N
- Unicode support
- ---------------
- Unicode words are supported.
- >>> from nltk.ccg import chart, lexicon
- Lexicons for the tests:
- >>> lex = lexicon.parseLexicon(u'''
- ... :- S, N, NP, PP
- ...
- ... AdjI :: N\\N
- ... AdjD :: N/N
- ... AdvD :: S/S
- ... AdvI :: S\\S
- ... Det :: NP/N
- ... PrepNPCompl :: PP/NP
- ... PrepNAdjN :: S\\S/N
- ... PrepNAdjNP :: S\\S/NP
- ... VPNP :: S\\NP/NP
- ... VPPP :: S\\NP/PP
- ... VPser :: S\\NP/AdjI
- ...
- ... auto => N
- ... bebidas => N
- ... cine => N
- ... ley => N
- ... libro => N
- ... ministro => N
- ... panadería => N
- ... presidente => N
- ... super => N
- ...
- ... el => Det
- ... la => Det
- ... las => Det
- ... un => Det
- ...
- ... Ana => NP
- ... Pablo => NP
- ...
- ... y => var\\.,var/.,var
- ...
- ... pero => (S/NP)\\(S/NP)/(S/NP)
- ...
- ... anunció => VPNP
- ... compró => VPNP
- ... cree => S\\NP/S[dep]
- ... desmintió => VPNP
- ... lee => VPNP
- ... fueron => VPPP
- ...
- ... es => VPser
- ...
- ... interesante => AdjD
- ... interesante => AdjI
- ... nueva => AdjD
- ... nueva => AdjI
- ...
- ... a => PrepNPCompl
- ... en => PrepNAdjN
- ... en => PrepNAdjNP
- ...
- ... ayer => AdvI
- ...
- ... que => (NP\\NP)/(S/NP)
- ... que => S[dep]/S
- ... ''')
- >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
- >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()):
- ... printCCGDerivation(parse) # doctest: +SKIP
- ... # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354
- ... break
- el ministro anunció pero el presidente desmintió la nueva ley
- (NP/N) N ((S\NP)/NP) (((S/NP)\(S/NP))/(S/NP)) (NP/N) N ((S\NP)/NP) (NP/N) (N/N) N
- ------------------>
- NP
- ------------------>T
- (S/(S\NP))
- -------------------->
- NP
- -------------------->T
- (S/(S\NP))
- --------------------------------->B
- (S/NP)
- ----------------------------------------------------------->
- ((S/NP)\(S/NP))
- ------------>
- N
- -------------------->
- NP
- --------------------<T
- (S\(S/NP))
- -------------------------------------------------------------------------------<B
- (S\(S/NP))
- --------------------------------------------------------------------------------------------<B
- (S/NP)
- -------------------------------------------------------------------------------------------------------------->
- S
|