123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885 |
- .. Copyright (C) 2001-2019 NLTK Project
- .. For license information, see LICENSE.TXT
- =========
- Parsing
- =========
- Unit tests for the Context Free Grammar class
- ---------------------------------------------
- >>> from nltk import Nonterminal, nonterminals, Production, CFG
- >>> nt1 = Nonterminal('NP')
- >>> nt2 = Nonterminal('VP')
- >>> nt1.symbol()
- 'NP'
- >>> nt1 == Nonterminal('NP')
- True
- >>> nt1 == nt2
- False
- >>> S, NP, VP, PP = nonterminals('S, NP, VP, PP')
- >>> N, V, P, DT = nonterminals('N, V, P, DT')
- >>> prod1 = Production(S, [NP, VP])
- >>> prod2 = Production(NP, [DT, NP])
- >>> prod1.lhs()
- S
- >>> prod1.rhs()
- (NP, VP)
- >>> prod1 == Production(S, [NP, VP])
- True
- >>> prod1 == prod2
- False
- >>> grammar = CFG.fromstring("""
- ... S -> NP VP
- ... PP -> P NP
- ... NP -> 'the' N | N PP | 'the' N PP
- ... VP -> V NP | V PP | V NP PP
- ... N -> 'cat'
- ... N -> 'dog'
- ... N -> 'rug'
- ... V -> 'chased'
- ... V -> 'sat'
- ... P -> 'in'
- ... P -> 'on'
- ... """)
- Unit tests for the rd (Recursive Descent Parser) class
- ------------------------------------------------------
- Create and run a recursive descent parser over both a syntactically ambiguous
- and unambiguous sentence.
- >>> from nltk.parse import RecursiveDescentParser
- >>> rd = RecursiveDescentParser(grammar)
- >>> sentence1 = 'the cat chased the dog'.split()
- >>> sentence2 = 'the cat chased the dog on the rug'.split()
- >>> for t in rd.parse(sentence1):
- ... print(t)
- (S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
- >>> for t in rd.parse(sentence2):
- ... print(t)
- (S
- (NP the (N cat))
- (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
- (S
- (NP the (N cat))
- (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
- (dolist (expr doctest-font-lock-keywords)
- (add-to-list 'font-lock-keywords expr))
- font-lock-keywords
- (add-to-list 'font-lock-keywords
- (car doctest-font-lock-keywords))
- Unit tests for the sr (Shift Reduce Parser) class
- -------------------------------------------------
- Create and run a shift reduce parser over both a syntactically ambiguous
- and unambiguous sentence. Note that unlike the recursive descent parser, one
- and only one parse is ever returned.
- >>> from nltk.parse import ShiftReduceParser
- >>> sr = ShiftReduceParser(grammar)
- >>> sentence1 = 'the cat chased the dog'.split()
- >>> sentence2 = 'the cat chased the dog on the rug'.split()
- >>> for t in sr.parse(sentence1):
- ... print(t)
- (S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
- The shift reduce parser uses heuristics to decide what to do when there are
- multiple possible shift or reduce operations available - for the supplied
- grammar clearly the wrong operation is selected.
- >>> for t in sr.parse(sentence2):
- ... print(t)
- Unit tests for the Chart Parser class
- -------------------------------------
- We use the demo() function for testing.
- We must turn off showing of times.
- >>> import nltk
- First we test tracing with a short sentence
- >>> nltk.parse.chart.demo(2, print_times=False, trace=1,
- ... sent='I saw a dog', numparses=1)
- * Sentence:
- I saw a dog
- ['I', 'saw', 'a', 'dog']
- <BLANKLINE>
- * Strategy: Bottom-up
- <BLANKLINE>
- |. I . saw . a . dog .|
- |[---------] . . .| [0:1] 'I'
- |. [---------] . .| [1:2] 'saw'
- |. . [---------] .| [2:3] 'a'
- |. . . [---------]| [3:4] 'dog'
- |> . . . .| [0:0] NP -> * 'I'
- |[---------] . . .| [0:1] NP -> 'I' *
- |> . . . .| [0:0] S -> * NP VP
- |> . . . .| [0:0] NP -> * NP PP
- |[---------> . . .| [0:1] S -> NP * VP
- |[---------> . . .| [0:1] NP -> NP * PP
- |. > . . .| [1:1] Verb -> * 'saw'
- |. [---------] . .| [1:2] Verb -> 'saw' *
- |. > . . .| [1:1] VP -> * Verb NP
- |. > . . .| [1:1] VP -> * Verb
- |. [---------> . .| [1:2] VP -> Verb * NP
- |. [---------] . .| [1:2] VP -> Verb *
- |. > . . .| [1:1] VP -> * VP PP
- |[-------------------] . .| [0:2] S -> NP VP *
- |. [---------> . .| [1:2] VP -> VP * PP
- |. . > . .| [2:2] Det -> * 'a'
- |. . [---------] .| [2:3] Det -> 'a' *
- |. . > . .| [2:2] NP -> * Det Noun
- |. . [---------> .| [2:3] NP -> Det * Noun
- |. . . > .| [3:3] Noun -> * 'dog'
- |. . . [---------]| [3:4] Noun -> 'dog' *
- |. . [-------------------]| [2:4] NP -> Det Noun *
- |. . > . .| [2:2] S -> * NP VP
- |. . > . .| [2:2] NP -> * NP PP
- |. [-----------------------------]| [1:4] VP -> Verb NP *
- |. . [------------------->| [2:4] S -> NP * VP
- |. . [------------------->| [2:4] NP -> NP * PP
- |[=======================================]| [0:4] S -> NP VP *
- |. [----------------------------->| [1:4] VP -> VP * PP
- Nr edges in chart: 33
- (S (NP I) (VP (Verb saw) (NP (Det a) (Noun dog))))
- <BLANKLINE>
- Then we test the different parsing Strategies.
- Note that the number of edges differ between the strategies.
- Top-down
- >>> nltk.parse.chart.demo(1, print_times=False, trace=0,
- ... sent='I saw John with a dog', numparses=2)
- * Sentence:
- I saw John with a dog
- ['I', 'saw', 'John', 'with', 'a', 'dog']
- <BLANKLINE>
- * Strategy: Top-down
- <BLANKLINE>
- Nr edges in chart: 48
- (S
- (NP I)
- (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
- (S
- (NP I)
- (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
- <BLANKLINE>
- Bottom-up
- >>> nltk.parse.chart.demo(2, print_times=False, trace=0,
- ... sent='I saw John with a dog', numparses=2)
- * Sentence:
- I saw John with a dog
- ['I', 'saw', 'John', 'with', 'a', 'dog']
- <BLANKLINE>
- * Strategy: Bottom-up
- <BLANKLINE>
- Nr edges in chart: 53
- (S
- (NP I)
- (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
- (S
- (NP I)
- (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
- <BLANKLINE>
- Bottom-up Left-Corner
- >>> nltk.parse.chart.demo(3, print_times=False, trace=0,
- ... sent='I saw John with a dog', numparses=2)
- * Sentence:
- I saw John with a dog
- ['I', 'saw', 'John', 'with', 'a', 'dog']
- <BLANKLINE>
- * Strategy: Bottom-up left-corner
- <BLANKLINE>
- Nr edges in chart: 36
- (S
- (NP I)
- (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
- (S
- (NP I)
- (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
- <BLANKLINE>
- Left-Corner with Bottom-Up Filter
- >>> nltk.parse.chart.demo(4, print_times=False, trace=0,
- ... sent='I saw John with a dog', numparses=2)
- * Sentence:
- I saw John with a dog
- ['I', 'saw', 'John', 'with', 'a', 'dog']
- <BLANKLINE>
- * Strategy: Filtered left-corner
- <BLANKLINE>
- Nr edges in chart: 28
- (S
- (NP I)
- (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
- (S
- (NP I)
- (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
- <BLANKLINE>
- The stepping chart parser
- >>> nltk.parse.chart.demo(5, print_times=False, trace=1,
- ... sent='I saw John with a dog', numparses=2)
- * Sentence:
- I saw John with a dog
- ['I', 'saw', 'John', 'with', 'a', 'dog']
- <BLANKLINE>
- * Strategy: Stepping (top-down vs bottom-up)
- <BLANKLINE>
- *** SWITCH TO TOP DOWN
- |[------] . . . . .| [0:1] 'I'
- |. [------] . . . .| [1:2] 'saw'
- |. . [------] . . .| [2:3] 'John'
- |. . . [------] . .| [3:4] 'with'
- |. . . . [------] .| [4:5] 'a'
- |. . . . . [------]| [5:6] 'dog'
- |> . . . . . .| [0:0] S -> * NP VP
- |> . . . . . .| [0:0] NP -> * NP PP
- |> . . . . . .| [0:0] NP -> * Det Noun
- |> . . . . . .| [0:0] NP -> * 'I'
- |[------] . . . . .| [0:1] NP -> 'I' *
- |[------> . . . . .| [0:1] S -> NP * VP
- |[------> . . . . .| [0:1] NP -> NP * PP
- |. > . . . . .| [1:1] VP -> * VP PP
- |. > . . . . .| [1:1] VP -> * Verb NP
- |. > . . . . .| [1:1] VP -> * Verb
- |. > . . . . .| [1:1] Verb -> * 'saw'
- |. [------] . . . .| [1:2] Verb -> 'saw' *
- |. [------> . . . .| [1:2] VP -> Verb * NP
- |. [------] . . . .| [1:2] VP -> Verb *
- |[-------------] . . . .| [0:2] S -> NP VP *
- |. [------> . . . .| [1:2] VP -> VP * PP
- *** SWITCH TO BOTTOM UP
- |. . > . . . .| [2:2] NP -> * 'John'
- |. . . > . . .| [3:3] PP -> * 'with' NP
- |. . . > . . .| [3:3] Prep -> * 'with'
- |. . . . > . .| [4:4] Det -> * 'a'
- |. . . . . > .| [5:5] Noun -> * 'dog'
- |. . [------] . . .| [2:3] NP -> 'John' *
- |. . . [------> . .| [3:4] PP -> 'with' * NP
- |. . . [------] . .| [3:4] Prep -> 'with' *
- |. . . . [------] .| [4:5] Det -> 'a' *
- |. . . . . [------]| [5:6] Noun -> 'dog' *
- |. [-------------] . . .| [1:3] VP -> Verb NP *
- |[--------------------] . . .| [0:3] S -> NP VP *
- |. [-------------> . . .| [1:3] VP -> VP * PP
- |. . > . . . .| [2:2] S -> * NP VP
- |. . > . . . .| [2:2] NP -> * NP PP
- |. . . . > . .| [4:4] NP -> * Det Noun
- |. . [------> . . .| [2:3] S -> NP * VP
- |. . [------> . . .| [2:3] NP -> NP * PP
- |. . . . [------> .| [4:5] NP -> Det * Noun
- |. . . . [-------------]| [4:6] NP -> Det Noun *
- |. . . [--------------------]| [3:6] PP -> 'with' NP *
- |. [----------------------------------]| [1:6] VP -> VP PP *
- *** SWITCH TO TOP DOWN
- |. . > . . . .| [2:2] NP -> * Det Noun
- |. . . . > . .| [4:4] NP -> * NP PP
- |. . . > . . .| [3:3] VP -> * VP PP
- |. . . > . . .| [3:3] VP -> * Verb NP
- |. . . > . . .| [3:3] VP -> * Verb
- |[=========================================]| [0:6] S -> NP VP *
- |. [---------------------------------->| [1:6] VP -> VP * PP
- |. . [---------------------------]| [2:6] NP -> NP PP *
- |. . . . [------------->| [4:6] NP -> NP * PP
- |. [----------------------------------]| [1:6] VP -> Verb NP *
- |. . [--------------------------->| [2:6] S -> NP * VP
- |. . [--------------------------->| [2:6] NP -> NP * PP
- |[=========================================]| [0:6] S -> NP VP *
- |. [---------------------------------->| [1:6] VP -> VP * PP
- |. . . . . . >| [6:6] VP -> * VP PP
- |. . . . . . >| [6:6] VP -> * Verb NP
- |. . . . . . >| [6:6] VP -> * Verb
- *** SWITCH TO BOTTOM UP
- |. . . . > . .| [4:4] S -> * NP VP
- |. . . . [------------->| [4:6] S -> NP * VP
- *** SWITCH TO TOP DOWN
- *** SWITCH TO BOTTOM UP
- *** SWITCH TO TOP DOWN
- *** SWITCH TO BOTTOM UP
- *** SWITCH TO TOP DOWN
- *** SWITCH TO BOTTOM UP
- Nr edges in chart: 61
- (S
- (NP I)
- (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
- (S
- (NP I)
- (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
- <BLANKLINE>
- Unit tests for the Incremental Chart Parser class
- -------------------------------------------------
- The incremental chart parsers are defined in earleychart.py.
- We use the demo() function for testing. We must turn off showing of times.
- >>> import nltk
- Earley Chart Parser
- >>> nltk.parse.earleychart.demo(print_times=False, trace=1,
- ... sent='I saw John with a dog', numparses=2)
- * Sentence:
- I saw John with a dog
- ['I', 'saw', 'John', 'with', 'a', 'dog']
- <BLANKLINE>
- |. I . saw . John . with . a . dog .|
- |[------] . . . . .| [0:1] 'I'
- |. [------] . . . .| [1:2] 'saw'
- |. . [------] . . .| [2:3] 'John'
- |. . . [------] . .| [3:4] 'with'
- |. . . . [------] .| [4:5] 'a'
- |. . . . . [------]| [5:6] 'dog'
- |> . . . . . .| [0:0] S -> * NP VP
- |> . . . . . .| [0:0] NP -> * NP PP
- |> . . . . . .| [0:0] NP -> * Det Noun
- |> . . . . . .| [0:0] NP -> * 'I'
- |[------] . . . . .| [0:1] NP -> 'I' *
- |[------> . . . . .| [0:1] S -> NP * VP
- |[------> . . . . .| [0:1] NP -> NP * PP
- |. > . . . . .| [1:1] VP -> * VP PP
- |. > . . . . .| [1:1] VP -> * Verb NP
- |. > . . . . .| [1:1] VP -> * Verb
- |. > . . . . .| [1:1] Verb -> * 'saw'
- |. [------] . . . .| [1:2] Verb -> 'saw' *
- |. [------> . . . .| [1:2] VP -> Verb * NP
- |. [------] . . . .| [1:2] VP -> Verb *
- |[-------------] . . . .| [0:2] S -> NP VP *
- |. [------> . . . .| [1:2] VP -> VP * PP
- |. . > . . . .| [2:2] NP -> * NP PP
- |. . > . . . .| [2:2] NP -> * Det Noun
- |. . > . . . .| [2:2] NP -> * 'John'
- |. . [------] . . .| [2:3] NP -> 'John' *
- |. [-------------] . . .| [1:3] VP -> Verb NP *
- |. . [------> . . .| [2:3] NP -> NP * PP
- |. . . > . . .| [3:3] PP -> * 'with' NP
- |[--------------------] . . .| [0:3] S -> NP VP *
- |. [-------------> . . .| [1:3] VP -> VP * PP
- |. . . [------> . .| [3:4] PP -> 'with' * NP
- |. . . . > . .| [4:4] NP -> * NP PP
- |. . . . > . .| [4:4] NP -> * Det Noun
- |. . . . > . .| [4:4] Det -> * 'a'
- |. . . . [------] .| [4:5] Det -> 'a' *
- |. . . . [------> .| [4:5] NP -> Det * Noun
- |. . . . . > .| [5:5] Noun -> * 'dog'
- |. . . . . [------]| [5:6] Noun -> 'dog' *
- |. . . . [-------------]| [4:6] NP -> Det Noun *
- |. . . [--------------------]| [3:6] PP -> 'with' NP *
- |. . . . [------------->| [4:6] NP -> NP * PP
- |. . [---------------------------]| [2:6] NP -> NP PP *
- |. [----------------------------------]| [1:6] VP -> VP PP *
- |[=========================================]| [0:6] S -> NP VP *
- |. [---------------------------------->| [1:6] VP -> VP * PP
- |. [----------------------------------]| [1:6] VP -> Verb NP *
- |. . [--------------------------->| [2:6] NP -> NP * PP
- |[=========================================]| [0:6] S -> NP VP *
- |. [---------------------------------->| [1:6] VP -> VP * PP
- (S
- (NP I)
- (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
- (S
- (NP I)
- (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
- Unit tests for LARGE context-free grammars
- ------------------------------------------
- Reading the ATIS grammar.
- >>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
- >>> grammar
- <Grammar with 5517 productions>
- Reading the test sentences.
- >>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
- >>> sentences = nltk.parse.util.extract_test_sentences(sentences)
- >>> len(sentences)
- 98
- >>> testsentence = sentences[22]
- >>> testsentence[0]
- ['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']
- >>> testsentence[1]
- 17
- >>> sentence = testsentence[0]
- Now we test all different parsing strategies.
- Note that the number of edges differ between the strategies.
- Bottom-up parsing.
- >>> parser = nltk.parse.BottomUpChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 7661
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Bottom-up Left-corner parsing.
- >>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 4986
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Left-corner parsing with bottom-up filter.
- >>> parser = nltk.parse.LeftCornerChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 1342
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Top-down parsing.
- >>> parser = nltk.parse.TopDownChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 28352
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Incremental Bottom-up parsing.
- >>> parser = nltk.parse.IncrementalBottomUpChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 7661
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Incremental Bottom-up Left-corner parsing.
- >>> parser = nltk.parse.IncrementalBottomUpLeftCornerChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 4986
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Incremental Left-corner parsing with bottom-up filter.
- >>> parser = nltk.parse.IncrementalLeftCornerChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 1342
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Incremental Top-down parsing.
- >>> parser = nltk.parse.IncrementalTopDownChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 28352
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Earley parsing. This is similar to the incremental top-down algorithm.
- >>> parser = nltk.parse.EarleyChartParser(grammar)
- >>> chart = parser.chart_parse(sentence)
- >>> print((chart.num_edges()))
- 28352
- >>> print((len(list(chart.parses(grammar.start())))))
- 17
- Unit tests for the Probabilistic CFG class
- ------------------------------------------
- >>> from nltk.corpus import treebank
- >>> from itertools import islice
- >>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2
- Create a set of PCFG productions.
- >>> grammar = PCFG.fromstring("""
- ... A -> B B [.3] | C B C [.7]
- ... B -> B D [.5] | C [.5]
- ... C -> 'a' [.1] | 'b' [0.9]
- ... D -> 'b' [1.0]
- ... """)
- >>> prod = grammar.productions()[0]
- >>> prod
- A -> B B [0.3]
- >>> prod.lhs()
- A
- >>> prod.rhs()
- (B, B)
- >>> print((prod.prob()))
- 0.3
- >>> grammar.start()
- A
- >>> grammar.productions()
- [A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
- Induce some productions using parsed Treebank data.
- >>> productions = []
- >>> for fileid in treebank.fileids()[:2]:
- ... for t in treebank.parsed_sents(fileid):
- ... productions += t.productions()
- >>> grammar = induce_pcfg(S, productions)
- >>> grammar
- <Grammar with 71 productions>
- >>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
- [PP -> IN NP [1.0]]
- >>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
- [NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
- >>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
- [JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]]
- >>> sorted(grammar.productions(lhs=Nonterminal('NP')))[:2]
- [NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]]
- Unit tests for the Probabilistic Chart Parse classes
- ----------------------------------------------------
- >>> tokens = "Jack saw Bob with my cookie".split()
- >>> grammar = toy_pcfg2
- >>> print(grammar)
- Grammar with 23 productions (start state = S)
- S -> NP VP [1.0]
- VP -> V NP [0.59]
- VP -> V [0.4]
- VP -> VP PP [0.01]
- NP -> Det N [0.41]
- NP -> Name [0.28]
- NP -> NP PP [0.31]
- PP -> P NP [1.0]
- V -> 'saw' [0.21]
- V -> 'ate' [0.51]
- V -> 'ran' [0.28]
- N -> 'boy' [0.11]
- N -> 'cookie' [0.12]
- N -> 'table' [0.13]
- N -> 'telescope' [0.14]
- N -> 'hill' [0.5]
- Name -> 'Jack' [0.52]
- Name -> 'Bob' [0.48]
- P -> 'with' [0.61]
- P -> 'under' [0.39]
- Det -> 'the' [0.41]
- Det -> 'a' [0.31]
- Det -> 'my' [0.28]
- Create several parsers using different queuing strategies and show the
- resulting parses.
- >>> from nltk.parse import pchart
- >>> parser = pchart.InsideChartParser(grammar)
- >>> for t in parser.parse(tokens):
- ... print(t)
- (S
- (NP (Name Jack))
- (VP
- (V saw)
- (NP
- (NP (Name Bob))
- (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
- (S
- (NP (Name Jack))
- (VP
- (VP (V saw) (NP (Name Bob)))
- (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
- >>> parser = pchart.RandomChartParser(grammar)
- >>> for t in parser.parse(tokens):
- ... print(t)
- (S
- (NP (Name Jack))
- (VP
- (V saw)
- (NP
- (NP (Name Bob))
- (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
- (S
- (NP (Name Jack))
- (VP
- (VP (V saw) (NP (Name Bob)))
- (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
- >>> parser = pchart.UnsortedChartParser(grammar)
- >>> for t in parser.parse(tokens):
- ... print(t)
- (S
- (NP (Name Jack))
- (VP
- (V saw)
- (NP
- (NP (Name Bob))
- (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
- (S
- (NP (Name Jack))
- (VP
- (VP (V saw) (NP (Name Bob)))
- (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
- >>> parser = pchart.LongestChartParser(grammar)
- >>> for t in parser.parse(tokens):
- ... print(t)
- (S
- (NP (Name Jack))
- (VP
- (V saw)
- (NP
- (NP (Name Bob))
- (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
- (S
- (NP (Name Jack))
- (VP
- (VP (V saw) (NP (Name Bob)))
- (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
- >>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
- >>> for t in parser.parse(tokens):
- ... print(t)
- Unit tests for the Viterbi Parse classes
- ----------------------------------------
- >>> from nltk.parse import ViterbiParser
- >>> tokens = "Jack saw Bob with my cookie".split()
- >>> grammar = toy_pcfg2
- Parse the tokenized sentence.
- >>> parser = ViterbiParser(grammar)
- >>> for t in parser.parse(tokens):
- ... print(t)
- (S
- (NP (Name Jack))
- (VP
- (V saw)
- (NP
- (NP (Name Bob))
- (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
- Unit tests for the FeatStructNonterminal class
- ----------------------------------------------
- >>> from nltk.grammar import FeatStructNonterminal
- >>> FeatStructNonterminal(
- ... pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))
- [agr=[gender='f', number='pl'], pos='n']
- >>> FeatStructNonterminal('VP[+fin]/NP[+pl]')
- VP[+fin]/NP[+pl]
- Tracing the Feature Chart Parser
- --------------------------------
- We use the featurechart.demo() function for tracing the Feature Chart Parser.
- >>> nltk.parse.featurechart.demo(print_times=False,
- ... print_grammar=True,
- ... parser=nltk.parse.featurechart.FeatureChartParser,
- ... sent='I saw John with a dog')
- <BLANKLINE>
- Grammar with 18 productions (start state = S[])
- S[] -> NP[] VP[]
- PP[] -> Prep[] NP[]
- NP[] -> NP[] PP[]
- VP[] -> VP[] PP[]
- VP[] -> Verb[] NP[]
- VP[] -> Verb[]
- NP[] -> Det[pl=?x] Noun[pl=?x]
- NP[] -> 'John'
- NP[] -> 'I'
- Det[] -> 'the'
- Det[] -> 'my'
- Det[-pl] -> 'a'
- Noun[-pl] -> 'dog'
- Noun[-pl] -> 'cookie'
- Verb[] -> 'ate'
- Verb[] -> 'saw'
- Prep[] -> 'with'
- Prep[] -> 'under'
- <BLANKLINE>
- * FeatureChartParser
- Sentence: I saw John with a dog
- |.I.s.J.w.a.d.|
- |[-] . . . . .| [0:1] 'I'
- |. [-] . . . .| [1:2] 'saw'
- |. . [-] . . .| [2:3] 'John'
- |. . . [-] . .| [3:4] 'with'
- |. . . . [-] .| [4:5] 'a'
- |. . . . . [-]| [5:6] 'dog'
- |[-] . . . . .| [0:1] NP[] -> 'I' *
- |[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
- |[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
- |. [-] . . . .| [1:2] Verb[] -> 'saw' *
- |. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
- |. [-] . . . .| [1:2] VP[] -> Verb[] *
- |. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
- |[---] . . . .| [0:2] S[] -> NP[] VP[] *
- |. . [-] . . .| [2:3] NP[] -> 'John' *
- |. . [-> . . .| [2:3] S[] -> NP[] * VP[] {}
- |. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
- |. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
- |. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
- |[-----] . . .| [0:3] S[] -> NP[] VP[] *
- |. . . [-] . .| [3:4] Prep[] -> 'with' *
- |. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
- |. . . . [-] .| [4:5] Det[-pl] -> 'a' *
- |. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
- |. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
- |. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
- |. . . . [--->| [4:6] S[] -> NP[] * VP[] {}
- |. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
- |. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
- |. . [-------]| [2:6] NP[] -> NP[] PP[] *
- |. [---------]| [1:6] VP[] -> VP[] PP[] *
- |. [--------->| [1:6] VP[] -> VP[] * PP[] {}
- |[===========]| [0:6] S[] -> NP[] VP[] *
- |. . [------->| [2:6] S[] -> NP[] * VP[] {}
- |. . [------->| [2:6] NP[] -> NP[] * PP[] {}
- |. [---------]| [1:6] VP[] -> Verb[] NP[] *
- |. [--------->| [1:6] VP[] -> VP[] * PP[] {}
- |[===========]| [0:6] S[] -> NP[] VP[] *
- (S[]
- (NP[] I)
- (VP[]
- (VP[] (Verb[] saw) (NP[] John))
- (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))
- (S[]
- (NP[] I)
- (VP[]
- (Verb[] saw)
- (NP[]
- (NP[] John)
- (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
- Unit tests for the Feature Chart Parser classes
- -----------------------------------------------
- The list of parsers we want to test.
- >>> parsers = [nltk.parse.featurechart.FeatureChartParser,
- ... nltk.parse.featurechart.FeatureTopDownChartParser,
- ... nltk.parse.featurechart.FeatureBottomUpChartParser,
- ... nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser,
- ... nltk.parse.earleychart.FeatureIncrementalChartParser,
- ... nltk.parse.earleychart.FeatureEarleyChartParser,
- ... nltk.parse.earleychart.FeatureIncrementalTopDownChartParser,
- ... nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser,
- ... nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser,
- ... ]
- A helper function that tests each parser on the given grammar and sentence.
- We check that the number of trees are correct, and that all parsers
- return the same trees. Otherwise an error is printed.
- >>> def unittest(grammar, sentence, nr_trees):
- ... sentence = sentence.split()
- ... trees = None
- ... for P in parsers:
- ... result = P(grammar).parse(sentence)
- ... result = set(tree.freeze() for tree in result)
- ... if len(result) != nr_trees:
- ... print("Wrong nr of trees:", len(result))
- ... elif trees is None:
- ... trees = result
- ... elif result != trees:
- ... print("Trees differ for parser:", P.__name__)
- The demo grammar from before, with an ambiguous sentence.
- >>> isawjohn = nltk.parse.featurechart.demo_grammar()
- >>> unittest(isawjohn, "I saw John with a dog with my cookie", 5)
- This grammar tests that variables in different grammar rules are renamed
- before unification. (The problematic variable is in this case ?X).
- >>> whatwasthat = nltk.grammar.FeatureGrammar.fromstring('''
- ... S[] -> NP[num=?N] VP[num=?N, slash=?X]
- ... NP[num=?X] -> "what"
- ... NP[num=?X] -> "that"
- ... VP[num=?P, slash=none] -> V[num=?P] NP[]
- ... V[num=sg] -> "was"
- ... ''')
- >>> unittest(whatwasthat, "what was that", 1)
- This grammar tests that the same rule can be used in different places
- in another rule, and that the variables are properly renamed.
- >>> thislovesthat = nltk.grammar.FeatureGrammar.fromstring('''
- ... S[] -> NP[case=nom] V[] NP[case=acc]
- ... NP[case=?X] -> Pron[case=?X]
- ... Pron[] -> "this"
- ... Pron[] -> "that"
- ... V[] -> "loves"
- ... ''')
- >>> unittest(thislovesthat, "this loves that", 1)
- Tests for loading feature grammar files
- ---------------------------------------
- Alternative 1: first load the grammar, then create the parser.
- >>> fcfg = nltk.data.load('grammars/book_grammars/feat0.fcfg')
- >>> fcp1 = nltk.parse.FeatureChartParser(fcfg)
- >>> print((type(fcp1)))
- <class 'nltk.parse.featurechart.FeatureChartParser'>
- Alternative 2: directly load the parser.
- >>> fcp2 = nltk.parse.load_parser('grammars/book_grammars/feat0.fcfg')
- >>> print((type(fcp2)))
- <class 'nltk.parse.featurechart.FeatureChartParser'>
|