123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862 |
- # Natural Language Toolkit: CFG visualization
- #
- # Copyright (C) 2001-2019 NLTK Project
- # Author: Edward Loper <edloper@gmail.com>
- # URL: <http://nltk.org/>
- # For license information, see LICENSE.TXT
- """
- Visualization tools for CFGs.
- """
- # Idea for a nice demo:
- # - 3 panes: grammar, treelet, working area
- # - grammar is a list of productions
- # - when you select a production, the treelet that it licenses appears
- # in the treelet area
- # - the working area has the text on the bottom, and S at top. When
- # you select a production, it shows (ghosted) the locations where
- # that production's treelet could be attached to either the text
- # or the tree rooted at S.
- # - the user can drag the treelet onto one of those (or click on them?)
- # - the user can delete pieces of the tree from the working area
- # (right click?)
- # - connecting top to bottom? drag one NP onto another?
- #
- # +-------------------------------------------------------------+
- # | S -> NP VP | S |
- # |[NP -> Det N ]| / \ |
- # | ... | NP VP |
- # | N -> 'dog' | |
- # | N -> 'cat' | |
- # | ... | |
- # +--------------+ |
- # | NP | Det N |
- # | / \ | | | |
- # | Det N | the cat saw the dog |
- # | | |
- # +--------------+----------------------------------------------+
- #
- # Operations:
- # - connect a new treelet -- drag or click shadow
- # - delete a treelet -- right click
- # - if only connected to top, delete everything below
- # - if only connected to bottom, delete everything above
- # - connect top & bottom -- drag a leaf to a root or a root to a leaf
- # - disconnect top & bottom -- right click
- # - if connected to top & bottom, then disconnect
- import re
- from six import string_types
- from six.moves.tkinter import (
- Button,
- Canvas,
- Entry,
- Frame,
- IntVar,
- Label,
- Scrollbar,
- Text,
- Tk,
- Toplevel,
- )
- from nltk.grammar import CFG, _read_cfg_production, Nonterminal, nonterminals
- from nltk.tree import Tree
- from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment
- from nltk.draw.util import (
- CanvasFrame,
- ColorizedList,
- ShowText,
- SymbolWidget,
- TextWidget,
- )
- ######################################################################
- # Production List
- ######################################################################
- class ProductionList(ColorizedList):
- ARROW = SymbolWidget.SYMBOLS['rightarrow']
- def _init_colortags(self, textwidget, options):
- textwidget.tag_config('terminal', foreground='#006000')
- textwidget.tag_config('arrow', font='symbol', underline='0')
- textwidget.tag_config(
- 'nonterminal', foreground='blue', font=('helvetica', -12, 'bold')
- )
- def _item_repr(self, item):
- contents = []
- contents.append(('%s\t' % item.lhs(), 'nonterminal'))
- contents.append((self.ARROW, 'arrow'))
- for elt in item.rhs():
- if isinstance(elt, Nonterminal):
- contents.append((' %s' % elt.symbol(), 'nonterminal'))
- else:
- contents.append((' %r' % elt, 'terminal'))
- return contents
- ######################################################################
- # CFG Editor
- ######################################################################
- _CFGEditor_HELP = """
- The CFG Editor can be used to create or modify context free grammars.
- A context free grammar consists of a start symbol and a list of
- productions. The start symbol is specified by the text entry field in
- the upper right hand corner of the editor; and the list of productions
- are specified in the main text editing box.
- Every non-blank line specifies a single production. Each production
- has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
- is a list of nonterminals and terminals.
- Nonterminals must be a single word, such as S or NP or NP_subj.
- Currently, nonterminals must consists of alphanumeric characters and
- underscores (_). Nonterminals are colored blue. If you place the
- mouse over any nonterminal, then all occurrences of that nonterminal
- will be highlighted.
- Terminals must be surrounded by single quotes (') or double
- quotes(\"). For example, "dog" and "New York" are terminals.
- Currently, the string within the quotes must consist of alphanumeric
- characters, underscores, and spaces.
- To enter a new production, go to a blank line, and type a nonterminal,
- followed by an arrow (->), followed by a sequence of terminals and
- nonterminals. Note that "->" (dash + greater-than) is automatically
- converted to an arrow symbol. When you move your cursor to a
- different line, your production will automatically be colorized. If
- there are any errors, they will be highlighted in red.
- Note that the order of the productions is significant for some
- algorithms. To re-order the productions, use cut and paste to move
- them.
- Use the buttons at the bottom of the window when you are done editing
- the CFG:
- - Ok: apply the new CFG, and exit the editor.
- - Apply: apply the new CFG, and do not exit the editor.
- - Reset: revert to the original CFG, and do not exit the editor.
- - Cancel: revert to the original CFG, and exit the editor.
- """
- class CFGEditor(object):
- """
- A dialog window for creating and editing context free grammars.
- ``CFGEditor`` imposes the following restrictions:
- - All nonterminals must be strings consisting of word
- characters.
- - All terminals must be strings consisting of word characters
- and space characters.
- """
- # Regular expressions used by _analyze_line. Precompile them, so
- # we can process the text faster.
- ARROW = SymbolWidget.SYMBOLS['rightarrow']
- _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))")
- _ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*")
- _PRODUCTION_RE = re.compile(
- r"(^\s*\w+\s*)"
- + "(->|(" # LHS
- + ARROW
- + "))\s*"
- + r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow
- ) # RHS
- _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")")
- _BOLD = ('helvetica', -12, 'bold')
- def __init__(self, parent, cfg=None, set_cfg_callback=None):
- self._parent = parent
- if cfg is not None:
- self._cfg = cfg
- else:
- self._cfg = CFG(Nonterminal('S'), [])
- self._set_cfg_callback = set_cfg_callback
- self._highlight_matching_nonterminals = 1
- # Create the top-level window.
- self._top = Toplevel(parent)
- self._init_bindings()
- self._init_startframe()
- self._startframe.pack(side='top', fill='x', expand=0)
- self._init_prodframe()
- self._prodframe.pack(side='top', fill='both', expand=1)
- self._init_buttons()
- self._buttonframe.pack(side='bottom', fill='x', expand=0)
- self._textwidget.focus()
- def _init_startframe(self):
- frame = self._startframe = Frame(self._top)
- self._start = Entry(frame)
- self._start.pack(side='right')
- Label(frame, text='Start Symbol:').pack(side='right')
- Label(frame, text='Productions:').pack(side='left')
- self._start.insert(0, self._cfg.start().symbol())
- def _init_buttons(self):
- frame = self._buttonframe = Frame(self._top)
- Button(frame, text='Ok', command=self._ok, underline=0, takefocus=0).pack(
- side='left'
- )
- Button(frame, text='Apply', command=self._apply, underline=0, takefocus=0).pack(
- side='left'
- )
- Button(frame, text='Reset', command=self._reset, underline=0, takefocus=0).pack(
- side='left'
- )
- Button(
- frame, text='Cancel', command=self._cancel, underline=0, takefocus=0
- ).pack(side='left')
- Button(frame, text='Help', command=self._help, underline=0, takefocus=0).pack(
- side='right'
- )
- def _init_bindings(self):
- self._top.title('CFG Editor')
- self._top.bind('<Control-q>', self._cancel)
- self._top.bind('<Alt-q>', self._cancel)
- self._top.bind('<Control-d>', self._cancel)
- # self._top.bind('<Control-x>', self._cancel)
- self._top.bind('<Alt-x>', self._cancel)
- self._top.bind('<Escape>', self._cancel)
- # self._top.bind('<Control-c>', self._cancel)
- self._top.bind('<Alt-c>', self._cancel)
- self._top.bind('<Control-o>', self._ok)
- self._top.bind('<Alt-o>', self._ok)
- self._top.bind('<Control-a>', self._apply)
- self._top.bind('<Alt-a>', self._apply)
- self._top.bind('<Control-r>', self._reset)
- self._top.bind('<Alt-r>', self._reset)
- self._top.bind('<Control-h>', self._help)
- self._top.bind('<Alt-h>', self._help)
- self._top.bind('<F1>', self._help)
- def _init_prodframe(self):
- self._prodframe = Frame(self._top)
- # Create the basic Text widget & scrollbar.
- self._textwidget = Text(
- self._prodframe, background='#e0e0e0', exportselection=1
- )
- self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient='vertical')
- self._textwidget.config(yscrollcommand=self._textscroll.set)
- self._textscroll.config(command=self._textwidget.yview)
- self._textscroll.pack(side='right', fill='y')
- self._textwidget.pack(expand=1, fill='both', side='left')
- # Initialize the colorization tags. Each nonterminal gets its
- # own tag, so they aren't listed here.
- self._textwidget.tag_config('terminal', foreground='#006000')
- self._textwidget.tag_config('arrow', font='symbol')
- self._textwidget.tag_config('error', background='red')
- # Keep track of what line they're on. We use that to remember
- # to re-analyze a line whenever they leave it.
- self._linenum = 0
- # Expand "->" to an arrow.
- self._top.bind('>', self._replace_arrows)
- # Re-colorize lines when appropriate.
- self._top.bind('<<Paste>>', self._analyze)
- self._top.bind('<KeyPress>', self._check_analyze)
- self._top.bind('<ButtonPress>', self._check_analyze)
- # Tab cycles focus. (why doesn't this work??)
- def cycle(e, textwidget=self._textwidget):
- textwidget.tk_focusNext().focus()
- self._textwidget.bind('<Tab>', cycle)
- prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()]
- for i in range(len(prod_tuples) - 1, 0, -1):
- if prod_tuples[i][0] == prod_tuples[i - 1][0]:
- if () in prod_tuples[i][1]:
- continue
- if () in prod_tuples[i - 1][1]:
- continue
- print(prod_tuples[i - 1][1])
- print(prod_tuples[i][1])
- prod_tuples[i - 1][1].extend(prod_tuples[i][1])
- del prod_tuples[i]
- for lhs, rhss in prod_tuples:
- print(lhs, rhss)
- s = '%s ->' % lhs
- for rhs in rhss:
- for elt in rhs:
- if isinstance(elt, Nonterminal):
- s += ' %s' % elt
- else:
- s += ' %r' % elt
- s += ' |'
- s = s[:-2] + '\n'
- self._textwidget.insert('end', s)
- self._analyze()
- # # Add the producitons to the text widget, and colorize them.
- # prod_by_lhs = {}
- # for prod in self._cfg.productions():
- # if len(prod.rhs()) > 0:
- # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
- # for (lhs, prods) in prod_by_lhs.items():
- # self._textwidget.insert('end', '%s ->' % lhs)
- # self._textwidget.insert('end', self._rhs(prods[0]))
- # for prod in prods[1:]:
- # print '\t|'+self._rhs(prod),
- # self._textwidget.insert('end', '\t|'+self._rhs(prod))
- # print
- # self._textwidget.insert('end', '\n')
- # for prod in self._cfg.productions():
- # if len(prod.rhs()) == 0:
- # self._textwidget.insert('end', '%s' % prod)
- # self._analyze()
- # def _rhs(self, prod):
- # s = ''
- # for elt in prod.rhs():
- # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
- # else: s += ' %r' % elt
- # return s
- def _clear_tags(self, linenum):
- """
- Remove all tags (except ``arrow`` and ``sel``) from the given
- line of the text widget used for editing the productions.
- """
- start = '%d.0' % linenum
- end = '%d.end' % linenum
- for tag in self._textwidget.tag_names():
- if tag not in ('arrow', 'sel'):
- self._textwidget.tag_remove(tag, start, end)
- def _check_analyze(self, *e):
- """
- Check if we've moved to a new line. If we have, then remove
- all colorization from the line we moved to, and re-colorize
- the line that we moved from.
- """
- linenum = int(self._textwidget.index('insert').split('.')[0])
- if linenum != self._linenum:
- self._clear_tags(linenum)
- self._analyze_line(self._linenum)
- self._linenum = linenum
- def _replace_arrows(self, *e):
- """
- Replace any ``'->'`` text strings with arrows (char \\256, in
- symbol font). This searches the whole buffer, but is fast
- enough to be done anytime they press '>'.
- """
- arrow = '1.0'
- while True:
- arrow = self._textwidget.search('->', arrow, 'end+1char')
- if arrow == '':
- break
- self._textwidget.delete(arrow, arrow + '+2char')
- self._textwidget.insert(arrow, self.ARROW, 'arrow')
- self._textwidget.insert(arrow, '\t')
- arrow = '1.0'
- while True:
- arrow = self._textwidget.search(self.ARROW, arrow + '+1char', 'end+1char')
- if arrow == '':
- break
- self._textwidget.tag_add('arrow', arrow, arrow + '+1char')
- def _analyze_token(self, match, linenum):
- """
- Given a line number and a regexp match for a token on that
- line, colorize the token. Note that the regexp match gives us
- the token's text, start index (on the line), and end index (on
- the line).
- """
- # What type of token is it?
- if match.group()[0] in "'\"":
- tag = 'terminal'
- elif match.group() in ('->', self.ARROW):
- tag = 'arrow'
- else:
- # If it's a nonterminal, then set up new bindings, so we
- # can highlight all instances of that nonterminal when we
- # put the mouse over it.
- tag = 'nonterminal_' + match.group()
- if tag not in self._textwidget.tag_names():
- self._init_nonterminal_tag(tag)
- start = '%d.%d' % (linenum, match.start())
- end = '%d.%d' % (linenum, match.end())
- self._textwidget.tag_add(tag, start, end)
- def _init_nonterminal_tag(self, tag, foreground='blue'):
- self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD)
- if not self._highlight_matching_nonterminals:
- return
- def enter(e, textwidget=self._textwidget, tag=tag):
- textwidget.tag_config(tag, background='#80ff80')
- def leave(e, textwidget=self._textwidget, tag=tag):
- textwidget.tag_config(tag, background='')
- self._textwidget.tag_bind(tag, '<Enter>', enter)
- self._textwidget.tag_bind(tag, '<Leave>', leave)
- def _analyze_line(self, linenum):
- """
- Colorize a given line.
- """
- # Get rid of any tags that were previously on the line.
- self._clear_tags(linenum)
- # Get the line line's text string.
- line = self._textwidget.get(repr(linenum) + '.0', repr(linenum) + '.end')
- # If it's a valid production, then colorize each token.
- if CFGEditor._PRODUCTION_RE.match(line):
- # It's valid; Use _TOKEN_RE to tokenize the production,
- # and call analyze_token on each token.
- def analyze_token(match, self=self, linenum=linenum):
- self._analyze_token(match, linenum)
- return ''
- CFGEditor._TOKEN_RE.sub(analyze_token, line)
- elif line.strip() != '':
- # It's invalid; show the user where the error is.
- self._mark_error(linenum, line)
- def _mark_error(self, linenum, line):
- """
- Mark the location of an error in a line.
- """
- arrowmatch = CFGEditor._ARROW_RE.search(line)
- if not arrowmatch:
- # If there's no arrow at all, highlight the whole line.
- start = '%d.0' % linenum
- end = '%d.end' % linenum
- elif not CFGEditor._LHS_RE.match(line):
- # Otherwise, if the LHS is bad, highlight it.
- start = '%d.0' % linenum
- end = '%d.%d' % (linenum, arrowmatch.start())
- else:
- # Otherwise, highlight the RHS.
- start = '%d.%d' % (linenum, arrowmatch.end())
- end = '%d.end' % linenum
- # If we're highlighting 0 chars, highlight the whole line.
- if self._textwidget.compare(start, '==', end):
- start = '%d.0' % linenum
- end = '%d.end' % linenum
- self._textwidget.tag_add('error', start, end)
- def _analyze(self, *e):
- """
- Replace ``->`` with arrows, and colorize the entire buffer.
- """
- self._replace_arrows()
- numlines = int(self._textwidget.index('end').split('.')[0])
- for linenum in range(1, numlines + 1): # line numbers start at 1.
- self._analyze_line(linenum)
- def _parse_productions(self):
- """
- Parse the current contents of the textwidget buffer, to create
- a list of productions.
- """
- productions = []
- # Get the text, normalize it, and split it into lines.
- text = self._textwidget.get('1.0', 'end')
- text = re.sub(self.ARROW, '->', text)
- text = re.sub('\t', ' ', text)
- lines = text.split('\n')
- # Convert each line to a CFG production
- for line in lines:
- line = line.strip()
- if line == '':
- continue
- productions += _read_cfg_production(line)
- # if line.strip() == '': continue
- # if not CFGEditor._PRODUCTION_RE.match(line):
- # raise ValueError('Bad production string %r' % line)
- #
- # (lhs_str, rhs_str) = line.split('->')
- # lhs = Nonterminal(lhs_str.strip())
- # rhs = []
- # def parse_token(match, rhs=rhs):
- # token = match.group()
- # if token[0] in "'\"": rhs.append(token[1:-1])
- # else: rhs.append(Nonterminal(token))
- # return ''
- # CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
- #
- # productions.append(Production(lhs, *rhs))
- return productions
- def _destroy(self, *e):
- if self._top is None:
- return
- self._top.destroy()
- self._top = None
- def _ok(self, *e):
- self._apply()
- self._destroy()
- def _apply(self, *e):
- productions = self._parse_productions()
- start = Nonterminal(self._start.get())
- cfg = CFG(start, productions)
- if self._set_cfg_callback is not None:
- self._set_cfg_callback(cfg)
- def _reset(self, *e):
- self._textwidget.delete('1.0', 'end')
- for production in self._cfg.productions():
- self._textwidget.insert('end', '%s\n' % production)
- self._analyze()
- if self._set_cfg_callback is not None:
- self._set_cfg_callback(self._cfg)
- def _cancel(self, *e):
- try:
- self._reset()
- except:
- pass
- self._destroy()
- def _help(self, *e):
- # The default font's not very legible; try using 'fixed' instead.
- try:
- ShowText(
- self._parent,
- 'Help: Chart Parser Demo',
- (_CFGEditor_HELP).strip(),
- width=75,
- font='fixed',
- )
- except:
- ShowText(
- self._parent,
- 'Help: Chart Parser Demo',
- (_CFGEditor_HELP).strip(),
- width=75,
- )
- ######################################################################
- # New Demo (built tree based on cfg)
- ######################################################################
- class CFGDemo(object):
- def __init__(self, grammar, text):
- self._grammar = grammar
- self._text = text
- # Set up the main window.
- self._top = Tk()
- self._top.title('Context Free Grammar Demo')
- # Base font size
- self._size = IntVar(self._top)
- self._size.set(12) # = medium
- # Set up the key bindings
- self._init_bindings(self._top)
- # Create the basic frames
- frame1 = Frame(self._top)
- frame1.pack(side='left', fill='y', expand=0)
- self._init_menubar(self._top)
- self._init_buttons(self._top)
- self._init_grammar(frame1)
- self._init_treelet(frame1)
- self._init_workspace(self._top)
- # //////////////////////////////////////////////////
- # Initialization
- # //////////////////////////////////////////////////
- def _init_bindings(self, top):
- top.bind('<Control-q>', self.destroy)
- def _init_menubar(self, parent):
- pass
- def _init_buttons(self, parent):
- pass
- def _init_grammar(self, parent):
- self._prodlist = ProductionList(parent, self._grammar, width=20)
- self._prodlist.pack(side='top', fill='both', expand=1)
- self._prodlist.focus()
- self._prodlist.add_callback('select', self._selectprod_cb)
- self._prodlist.add_callback('move', self._selectprod_cb)
- def _init_treelet(self, parent):
- self._treelet_canvas = Canvas(parent, background='white')
- self._treelet_canvas.pack(side='bottom', fill='x')
- self._treelet = None
- def _init_workspace(self, parent):
- self._workspace = CanvasFrame(parent, background='white')
- self._workspace.pack(side='right', fill='both', expand=1)
- self._tree = None
- self.reset_workspace()
- # //////////////////////////////////////////////////
- # Workspace
- # //////////////////////////////////////////////////
- def reset_workspace(self):
- c = self._workspace.canvas()
- fontsize = int(self._size.get())
- node_font = ('helvetica', -(fontsize + 4), 'bold')
- leaf_font = ('helvetica', -(fontsize + 2))
- # Remove the old tree
- if self._tree is not None:
- self._workspace.remove_widget(self._tree)
- # The root of the tree.
- start = self._grammar.start().symbol()
- rootnode = TextWidget(c, start, font=node_font, draggable=1)
- # The leaves of the tree.
- leaves = []
- for word in self._text:
- leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
- # Put it all together into one tree
- self._tree = TreeSegmentWidget(c, rootnode, leaves, color='white')
- # Add it to the workspace.
- self._workspace.add_widget(self._tree)
- # Move the leaves to the bottom of the workspace.
- for leaf in leaves:
- leaf.move(0, 100)
- # self._nodes = {start:1}
- # self._leaves = dict([(l,1) for l in leaves])
- def workspace_markprod(self, production):
- pass
- def _markproduction(self, prod, tree=None):
- if tree is None:
- tree = self._tree
- for i in range(len(tree.subtrees()) - len(prod.rhs())):
- if tree['color', i] == 'white':
- self._markproduction # FIXME: Is this necessary at all?
- for j, node in enumerate(prod.rhs()):
- widget = tree.subtrees()[i + j]
- if (
- isinstance(node, Nonterminal)
- and isinstance(widget, TreeSegmentWidget)
- and node.symbol == widget.label().text()
- ):
- pass # matching nonterminal
- elif (
- isinstance(node, string_types)
- and isinstance(widget, TextWidget)
- and node == widget.text()
- ):
- pass # matching nonterminal
- else:
- break
- else:
- # Everything matched!
- print('MATCH AT', i)
- # //////////////////////////////////////////////////
- # Grammar
- # //////////////////////////////////////////////////
- def _selectprod_cb(self, production):
- canvas = self._treelet_canvas
- self._prodlist.highlight(production)
- if self._treelet is not None:
- self._treelet.destroy()
- # Convert the production to a tree.
- rhs = production.rhs()
- for (i, elt) in enumerate(rhs):
- if isinstance(elt, Nonterminal):
- elt = Tree(elt)
- tree = Tree(production.lhs().symbol(), *rhs)
- # Draw the tree in the treelet area.
- fontsize = int(self._size.get())
- node_font = ('helvetica', -(fontsize + 4), 'bold')
- leaf_font = ('helvetica', -(fontsize + 2))
- self._treelet = tree_to_treesegment(
- canvas, tree, node_font=node_font, leaf_font=leaf_font
- )
- self._treelet['draggable'] = 1
- # Center the treelet.
- (x1, y1, x2, y2) = self._treelet.bbox()
- w, h = int(canvas['width']), int(canvas['height'])
- self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2)
- # Mark the places where we can add it to the workspace.
- self._markproduction(production)
- def destroy(self, *args):
- self._top.destroy()
- def mainloop(self, *args, **kwargs):
- self._top.mainloop(*args, **kwargs)
- def demo2():
- from nltk import Nonterminal, Production, CFG
- nonterminals = 'S VP NP PP P N Name V Det'
- (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
- productions = (
- # Syntactic Productions
- Production(S, [NP, VP]),
- Production(NP, [Det, N]),
- Production(NP, [NP, PP]),
- Production(VP, [VP, PP]),
- Production(VP, [V, NP, PP]),
- Production(VP, [V, NP]),
- Production(PP, [P, NP]),
- Production(PP, []),
- Production(PP, ['up', 'over', NP]),
- # Lexical Productions
- Production(NP, ['I']),
- Production(Det, ['the']),
- Production(Det, ['a']),
- Production(N, ['man']),
- Production(V, ['saw']),
- Production(P, ['in']),
- Production(P, ['with']),
- Production(N, ['park']),
- Production(N, ['dog']),
- Production(N, ['statue']),
- Production(Det, ['my']),
- )
- grammar = CFG(S, productions)
- text = 'I saw a man in the park'.split()
- d = CFGDemo(grammar, text)
- d.mainloop()
- ######################################################################
- # Old Demo
- ######################################################################
- def demo():
- from nltk import Nonterminal, CFG
- nonterminals = 'S VP NP PP P N Name V Det'
- (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
- grammar = CFG.fromstring(
- """
- S -> NP VP
- PP -> P NP
- NP -> Det N
- NP -> NP PP
- VP -> V NP
- VP -> VP PP
- Det -> 'a'
- Det -> 'the'
- Det -> 'my'
- NP -> 'I'
- N -> 'dog'
- N -> 'man'
- N -> 'park'
- N -> 'statue'
- V -> 'saw'
- P -> 'in'
- P -> 'up'
- P -> 'over'
- P -> 'with'
- """
- )
- def cb(grammar):
- print(grammar)
- top = Tk()
- editor = CFGEditor(top, grammar, cb)
- Label(top, text='\nTesting CFG Editor\n').pack()
- Button(top, text='Quit', command=top.destroy).pack()
- top.mainloop()
- def demo3():
- from nltk import Production
- (S, VP, NP, PP, P, N, Name, V, Det) = nonterminals(
- 'S, VP, NP, PP, P, N, Name, V, Det'
- )
- productions = (
- # Syntactic Productions
- Production(S, [NP, VP]),
- Production(NP, [Det, N]),
- Production(NP, [NP, PP]),
- Production(VP, [VP, PP]),
- Production(VP, [V, NP, PP]),
- Production(VP, [V, NP]),
- Production(PP, [P, NP]),
- Production(PP, []),
- Production(PP, ['up', 'over', NP]),
- # Lexical Productions
- Production(NP, ['I']),
- Production(Det, ['the']),
- Production(Det, ['a']),
- Production(N, ['man']),
- Production(V, ['saw']),
- Production(P, ['in']),
- Production(P, ['with']),
- Production(N, ['park']),
- Production(N, ['dog']),
- Production(N, ['statue']),
- Production(Det, ['my']),
- )
- t = Tk()
- def destroy(e, t=t):
- t.destroy()
- t.bind('q', destroy)
- p = ProductionList(t, productions)
- p.pack(expand=1, fill='both')
- p.add_callback('select', p.markonly)
- p.add_callback('move', p.markonly)
- p.focus()
- p.mark(productions[2])
- p.mark(productions[8])
- if __name__ == '__main__':
- demo()
|