cfg.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862
  1. # Natural Language Toolkit: CFG visualization
  2. #
  3. # Copyright (C) 2001-2019 NLTK Project
  4. # Author: Edward Loper <edloper@gmail.com>
  5. # URL: <http://nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. """
  8. Visualization tools for CFGs.
  9. """
  10. # Idea for a nice demo:
  11. # - 3 panes: grammar, treelet, working area
  12. # - grammar is a list of productions
  13. # - when you select a production, the treelet that it licenses appears
  14. # in the treelet area
  15. # - the working area has the text on the bottom, and S at top. When
  16. # you select a production, it shows (ghosted) the locations where
  17. # that production's treelet could be attached to either the text
  18. # or the tree rooted at S.
  19. # - the user can drag the treelet onto one of those (or click on them?)
  20. # - the user can delete pieces of the tree from the working area
  21. # (right click?)
  22. # - connecting top to bottom? drag one NP onto another?
  23. #
  24. # +-------------------------------------------------------------+
  25. # | S -> NP VP | S |
  26. # |[NP -> Det N ]| / \ |
  27. # | ... | NP VP |
  28. # | N -> 'dog' | |
  29. # | N -> 'cat' | |
  30. # | ... | |
  31. # +--------------+ |
  32. # | NP | Det N |
  33. # | / \ | | | |
  34. # | Det N | the cat saw the dog |
  35. # | | |
  36. # +--------------+----------------------------------------------+
  37. #
  38. # Operations:
  39. # - connect a new treelet -- drag or click shadow
  40. # - delete a treelet -- right click
  41. # - if only connected to top, delete everything below
  42. # - if only connected to bottom, delete everything above
  43. # - connect top & bottom -- drag a leaf to a root or a root to a leaf
  44. # - disconnect top & bottom -- right click
  45. # - if connected to top & bottom, then disconnect
  46. import re
  47. from six import string_types
  48. from six.moves.tkinter import (
  49. Button,
  50. Canvas,
  51. Entry,
  52. Frame,
  53. IntVar,
  54. Label,
  55. Scrollbar,
  56. Text,
  57. Tk,
  58. Toplevel,
  59. )
  60. from nltk.grammar import CFG, _read_cfg_production, Nonterminal, nonterminals
  61. from nltk.tree import Tree
  62. from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment
  63. from nltk.draw.util import (
  64. CanvasFrame,
  65. ColorizedList,
  66. ShowText,
  67. SymbolWidget,
  68. TextWidget,
  69. )
  70. ######################################################################
  71. # Production List
  72. ######################################################################
  73. class ProductionList(ColorizedList):
  74. ARROW = SymbolWidget.SYMBOLS['rightarrow']
  75. def _init_colortags(self, textwidget, options):
  76. textwidget.tag_config('terminal', foreground='#006000')
  77. textwidget.tag_config('arrow', font='symbol', underline='0')
  78. textwidget.tag_config(
  79. 'nonterminal', foreground='blue', font=('helvetica', -12, 'bold')
  80. )
  81. def _item_repr(self, item):
  82. contents = []
  83. contents.append(('%s\t' % item.lhs(), 'nonterminal'))
  84. contents.append((self.ARROW, 'arrow'))
  85. for elt in item.rhs():
  86. if isinstance(elt, Nonterminal):
  87. contents.append((' %s' % elt.symbol(), 'nonterminal'))
  88. else:
  89. contents.append((' %r' % elt, 'terminal'))
  90. return contents
  91. ######################################################################
  92. # CFG Editor
  93. ######################################################################
  94. _CFGEditor_HELP = """
  95. The CFG Editor can be used to create or modify context free grammars.
  96. A context free grammar consists of a start symbol and a list of
  97. productions. The start symbol is specified by the text entry field in
  98. the upper right hand corner of the editor; and the list of productions
  99. are specified in the main text editing box.
  100. Every non-blank line specifies a single production. Each production
  101. has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
  102. is a list of nonterminals and terminals.
  103. Nonterminals must be a single word, such as S or NP or NP_subj.
  104. Currently, nonterminals must consists of alphanumeric characters and
  105. underscores (_). Nonterminals are colored blue. If you place the
  106. mouse over any nonterminal, then all occurrences of that nonterminal
  107. will be highlighted.
  108. Terminals must be surrounded by single quotes (') or double
  109. quotes(\"). For example, "dog" and "New York" are terminals.
  110. Currently, the string within the quotes must consist of alphanumeric
  111. characters, underscores, and spaces.
  112. To enter a new production, go to a blank line, and type a nonterminal,
  113. followed by an arrow (->), followed by a sequence of terminals and
  114. nonterminals. Note that "->" (dash + greater-than) is automatically
  115. converted to an arrow symbol. When you move your cursor to a
  116. different line, your production will automatically be colorized. If
  117. there are any errors, they will be highlighted in red.
  118. Note that the order of the productions is significant for some
  119. algorithms. To re-order the productions, use cut and paste to move
  120. them.
  121. Use the buttons at the bottom of the window when you are done editing
  122. the CFG:
  123. - Ok: apply the new CFG, and exit the editor.
  124. - Apply: apply the new CFG, and do not exit the editor.
  125. - Reset: revert to the original CFG, and do not exit the editor.
  126. - Cancel: revert to the original CFG, and exit the editor.
  127. """
  128. class CFGEditor(object):
  129. """
  130. A dialog window for creating and editing context free grammars.
  131. ``CFGEditor`` imposes the following restrictions:
  132. - All nonterminals must be strings consisting of word
  133. characters.
  134. - All terminals must be strings consisting of word characters
  135. and space characters.
  136. """
  137. # Regular expressions used by _analyze_line. Precompile them, so
  138. # we can process the text faster.
  139. ARROW = SymbolWidget.SYMBOLS['rightarrow']
  140. _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))")
  141. _ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*")
  142. _PRODUCTION_RE = re.compile(
  143. r"(^\s*\w+\s*)"
  144. + "(->|(" # LHS
  145. + ARROW
  146. + "))\s*"
  147. + r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow
  148. ) # RHS
  149. _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")")
  150. _BOLD = ('helvetica', -12, 'bold')
  151. def __init__(self, parent, cfg=None, set_cfg_callback=None):
  152. self._parent = parent
  153. if cfg is not None:
  154. self._cfg = cfg
  155. else:
  156. self._cfg = CFG(Nonterminal('S'), [])
  157. self._set_cfg_callback = set_cfg_callback
  158. self._highlight_matching_nonterminals = 1
  159. # Create the top-level window.
  160. self._top = Toplevel(parent)
  161. self._init_bindings()
  162. self._init_startframe()
  163. self._startframe.pack(side='top', fill='x', expand=0)
  164. self._init_prodframe()
  165. self._prodframe.pack(side='top', fill='both', expand=1)
  166. self._init_buttons()
  167. self._buttonframe.pack(side='bottom', fill='x', expand=0)
  168. self._textwidget.focus()
  169. def _init_startframe(self):
  170. frame = self._startframe = Frame(self._top)
  171. self._start = Entry(frame)
  172. self._start.pack(side='right')
  173. Label(frame, text='Start Symbol:').pack(side='right')
  174. Label(frame, text='Productions:').pack(side='left')
  175. self._start.insert(0, self._cfg.start().symbol())
  176. def _init_buttons(self):
  177. frame = self._buttonframe = Frame(self._top)
  178. Button(frame, text='Ok', command=self._ok, underline=0, takefocus=0).pack(
  179. side='left'
  180. )
  181. Button(frame, text='Apply', command=self._apply, underline=0, takefocus=0).pack(
  182. side='left'
  183. )
  184. Button(frame, text='Reset', command=self._reset, underline=0, takefocus=0).pack(
  185. side='left'
  186. )
  187. Button(
  188. frame, text='Cancel', command=self._cancel, underline=0, takefocus=0
  189. ).pack(side='left')
  190. Button(frame, text='Help', command=self._help, underline=0, takefocus=0).pack(
  191. side='right'
  192. )
  193. def _init_bindings(self):
  194. self._top.title('CFG Editor')
  195. self._top.bind('<Control-q>', self._cancel)
  196. self._top.bind('<Alt-q>', self._cancel)
  197. self._top.bind('<Control-d>', self._cancel)
  198. # self._top.bind('<Control-x>', self._cancel)
  199. self._top.bind('<Alt-x>', self._cancel)
  200. self._top.bind('<Escape>', self._cancel)
  201. # self._top.bind('<Control-c>', self._cancel)
  202. self._top.bind('<Alt-c>', self._cancel)
  203. self._top.bind('<Control-o>', self._ok)
  204. self._top.bind('<Alt-o>', self._ok)
  205. self._top.bind('<Control-a>', self._apply)
  206. self._top.bind('<Alt-a>', self._apply)
  207. self._top.bind('<Control-r>', self._reset)
  208. self._top.bind('<Alt-r>', self._reset)
  209. self._top.bind('<Control-h>', self._help)
  210. self._top.bind('<Alt-h>', self._help)
  211. self._top.bind('<F1>', self._help)
  212. def _init_prodframe(self):
  213. self._prodframe = Frame(self._top)
  214. # Create the basic Text widget & scrollbar.
  215. self._textwidget = Text(
  216. self._prodframe, background='#e0e0e0', exportselection=1
  217. )
  218. self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient='vertical')
  219. self._textwidget.config(yscrollcommand=self._textscroll.set)
  220. self._textscroll.config(command=self._textwidget.yview)
  221. self._textscroll.pack(side='right', fill='y')
  222. self._textwidget.pack(expand=1, fill='both', side='left')
  223. # Initialize the colorization tags. Each nonterminal gets its
  224. # own tag, so they aren't listed here.
  225. self._textwidget.tag_config('terminal', foreground='#006000')
  226. self._textwidget.tag_config('arrow', font='symbol')
  227. self._textwidget.tag_config('error', background='red')
  228. # Keep track of what line they're on. We use that to remember
  229. # to re-analyze a line whenever they leave it.
  230. self._linenum = 0
  231. # Expand "->" to an arrow.
  232. self._top.bind('>', self._replace_arrows)
  233. # Re-colorize lines when appropriate.
  234. self._top.bind('<<Paste>>', self._analyze)
  235. self._top.bind('<KeyPress>', self._check_analyze)
  236. self._top.bind('<ButtonPress>', self._check_analyze)
  237. # Tab cycles focus. (why doesn't this work??)
  238. def cycle(e, textwidget=self._textwidget):
  239. textwidget.tk_focusNext().focus()
  240. self._textwidget.bind('<Tab>', cycle)
  241. prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()]
  242. for i in range(len(prod_tuples) - 1, 0, -1):
  243. if prod_tuples[i][0] == prod_tuples[i - 1][0]:
  244. if () in prod_tuples[i][1]:
  245. continue
  246. if () in prod_tuples[i - 1][1]:
  247. continue
  248. print(prod_tuples[i - 1][1])
  249. print(prod_tuples[i][1])
  250. prod_tuples[i - 1][1].extend(prod_tuples[i][1])
  251. del prod_tuples[i]
  252. for lhs, rhss in prod_tuples:
  253. print(lhs, rhss)
  254. s = '%s ->' % lhs
  255. for rhs in rhss:
  256. for elt in rhs:
  257. if isinstance(elt, Nonterminal):
  258. s += ' %s' % elt
  259. else:
  260. s += ' %r' % elt
  261. s += ' |'
  262. s = s[:-2] + '\n'
  263. self._textwidget.insert('end', s)
  264. self._analyze()
  265. # # Add the producitons to the text widget, and colorize them.
  266. # prod_by_lhs = {}
  267. # for prod in self._cfg.productions():
  268. # if len(prod.rhs()) > 0:
  269. # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
  270. # for (lhs, prods) in prod_by_lhs.items():
  271. # self._textwidget.insert('end', '%s ->' % lhs)
  272. # self._textwidget.insert('end', self._rhs(prods[0]))
  273. # for prod in prods[1:]:
  274. # print '\t|'+self._rhs(prod),
  275. # self._textwidget.insert('end', '\t|'+self._rhs(prod))
  276. # print
  277. # self._textwidget.insert('end', '\n')
  278. # for prod in self._cfg.productions():
  279. # if len(prod.rhs()) == 0:
  280. # self._textwidget.insert('end', '%s' % prod)
  281. # self._analyze()
  282. # def _rhs(self, prod):
  283. # s = ''
  284. # for elt in prod.rhs():
  285. # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
  286. # else: s += ' %r' % elt
  287. # return s
  288. def _clear_tags(self, linenum):
  289. """
  290. Remove all tags (except ``arrow`` and ``sel``) from the given
  291. line of the text widget used for editing the productions.
  292. """
  293. start = '%d.0' % linenum
  294. end = '%d.end' % linenum
  295. for tag in self._textwidget.tag_names():
  296. if tag not in ('arrow', 'sel'):
  297. self._textwidget.tag_remove(tag, start, end)
  298. def _check_analyze(self, *e):
  299. """
  300. Check if we've moved to a new line. If we have, then remove
  301. all colorization from the line we moved to, and re-colorize
  302. the line that we moved from.
  303. """
  304. linenum = int(self._textwidget.index('insert').split('.')[0])
  305. if linenum != self._linenum:
  306. self._clear_tags(linenum)
  307. self._analyze_line(self._linenum)
  308. self._linenum = linenum
  309. def _replace_arrows(self, *e):
  310. """
  311. Replace any ``'->'`` text strings with arrows (char \\256, in
  312. symbol font). This searches the whole buffer, but is fast
  313. enough to be done anytime they press '>'.
  314. """
  315. arrow = '1.0'
  316. while True:
  317. arrow = self._textwidget.search('->', arrow, 'end+1char')
  318. if arrow == '':
  319. break
  320. self._textwidget.delete(arrow, arrow + '+2char')
  321. self._textwidget.insert(arrow, self.ARROW, 'arrow')
  322. self._textwidget.insert(arrow, '\t')
  323. arrow = '1.0'
  324. while True:
  325. arrow = self._textwidget.search(self.ARROW, arrow + '+1char', 'end+1char')
  326. if arrow == '':
  327. break
  328. self._textwidget.tag_add('arrow', arrow, arrow + '+1char')
  329. def _analyze_token(self, match, linenum):
  330. """
  331. Given a line number and a regexp match for a token on that
  332. line, colorize the token. Note that the regexp match gives us
  333. the token's text, start index (on the line), and end index (on
  334. the line).
  335. """
  336. # What type of token is it?
  337. if match.group()[0] in "'\"":
  338. tag = 'terminal'
  339. elif match.group() in ('->', self.ARROW):
  340. tag = 'arrow'
  341. else:
  342. # If it's a nonterminal, then set up new bindings, so we
  343. # can highlight all instances of that nonterminal when we
  344. # put the mouse over it.
  345. tag = 'nonterminal_' + match.group()
  346. if tag not in self._textwidget.tag_names():
  347. self._init_nonterminal_tag(tag)
  348. start = '%d.%d' % (linenum, match.start())
  349. end = '%d.%d' % (linenum, match.end())
  350. self._textwidget.tag_add(tag, start, end)
  351. def _init_nonterminal_tag(self, tag, foreground='blue'):
  352. self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD)
  353. if not self._highlight_matching_nonterminals:
  354. return
  355. def enter(e, textwidget=self._textwidget, tag=tag):
  356. textwidget.tag_config(tag, background='#80ff80')
  357. def leave(e, textwidget=self._textwidget, tag=tag):
  358. textwidget.tag_config(tag, background='')
  359. self._textwidget.tag_bind(tag, '<Enter>', enter)
  360. self._textwidget.tag_bind(tag, '<Leave>', leave)
  361. def _analyze_line(self, linenum):
  362. """
  363. Colorize a given line.
  364. """
  365. # Get rid of any tags that were previously on the line.
  366. self._clear_tags(linenum)
  367. # Get the line line's text string.
  368. line = self._textwidget.get(repr(linenum) + '.0', repr(linenum) + '.end')
  369. # If it's a valid production, then colorize each token.
  370. if CFGEditor._PRODUCTION_RE.match(line):
  371. # It's valid; Use _TOKEN_RE to tokenize the production,
  372. # and call analyze_token on each token.
  373. def analyze_token(match, self=self, linenum=linenum):
  374. self._analyze_token(match, linenum)
  375. return ''
  376. CFGEditor._TOKEN_RE.sub(analyze_token, line)
  377. elif line.strip() != '':
  378. # It's invalid; show the user where the error is.
  379. self._mark_error(linenum, line)
  380. def _mark_error(self, linenum, line):
  381. """
  382. Mark the location of an error in a line.
  383. """
  384. arrowmatch = CFGEditor._ARROW_RE.search(line)
  385. if not arrowmatch:
  386. # If there's no arrow at all, highlight the whole line.
  387. start = '%d.0' % linenum
  388. end = '%d.end' % linenum
  389. elif not CFGEditor._LHS_RE.match(line):
  390. # Otherwise, if the LHS is bad, highlight it.
  391. start = '%d.0' % linenum
  392. end = '%d.%d' % (linenum, arrowmatch.start())
  393. else:
  394. # Otherwise, highlight the RHS.
  395. start = '%d.%d' % (linenum, arrowmatch.end())
  396. end = '%d.end' % linenum
  397. # If we're highlighting 0 chars, highlight the whole line.
  398. if self._textwidget.compare(start, '==', end):
  399. start = '%d.0' % linenum
  400. end = '%d.end' % linenum
  401. self._textwidget.tag_add('error', start, end)
  402. def _analyze(self, *e):
  403. """
  404. Replace ``->`` with arrows, and colorize the entire buffer.
  405. """
  406. self._replace_arrows()
  407. numlines = int(self._textwidget.index('end').split('.')[0])
  408. for linenum in range(1, numlines + 1): # line numbers start at 1.
  409. self._analyze_line(linenum)
  410. def _parse_productions(self):
  411. """
  412. Parse the current contents of the textwidget buffer, to create
  413. a list of productions.
  414. """
  415. productions = []
  416. # Get the text, normalize it, and split it into lines.
  417. text = self._textwidget.get('1.0', 'end')
  418. text = re.sub(self.ARROW, '->', text)
  419. text = re.sub('\t', ' ', text)
  420. lines = text.split('\n')
  421. # Convert each line to a CFG production
  422. for line in lines:
  423. line = line.strip()
  424. if line == '':
  425. continue
  426. productions += _read_cfg_production(line)
  427. # if line.strip() == '': continue
  428. # if not CFGEditor._PRODUCTION_RE.match(line):
  429. # raise ValueError('Bad production string %r' % line)
  430. #
  431. # (lhs_str, rhs_str) = line.split('->')
  432. # lhs = Nonterminal(lhs_str.strip())
  433. # rhs = []
  434. # def parse_token(match, rhs=rhs):
  435. # token = match.group()
  436. # if token[0] in "'\"": rhs.append(token[1:-1])
  437. # else: rhs.append(Nonterminal(token))
  438. # return ''
  439. # CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
  440. #
  441. # productions.append(Production(lhs, *rhs))
  442. return productions
  443. def _destroy(self, *e):
  444. if self._top is None:
  445. return
  446. self._top.destroy()
  447. self._top = None
  448. def _ok(self, *e):
  449. self._apply()
  450. self._destroy()
  451. def _apply(self, *e):
  452. productions = self._parse_productions()
  453. start = Nonterminal(self._start.get())
  454. cfg = CFG(start, productions)
  455. if self._set_cfg_callback is not None:
  456. self._set_cfg_callback(cfg)
  457. def _reset(self, *e):
  458. self._textwidget.delete('1.0', 'end')
  459. for production in self._cfg.productions():
  460. self._textwidget.insert('end', '%s\n' % production)
  461. self._analyze()
  462. if self._set_cfg_callback is not None:
  463. self._set_cfg_callback(self._cfg)
  464. def _cancel(self, *e):
  465. try:
  466. self._reset()
  467. except:
  468. pass
  469. self._destroy()
  470. def _help(self, *e):
  471. # The default font's not very legible; try using 'fixed' instead.
  472. try:
  473. ShowText(
  474. self._parent,
  475. 'Help: Chart Parser Demo',
  476. (_CFGEditor_HELP).strip(),
  477. width=75,
  478. font='fixed',
  479. )
  480. except:
  481. ShowText(
  482. self._parent,
  483. 'Help: Chart Parser Demo',
  484. (_CFGEditor_HELP).strip(),
  485. width=75,
  486. )
  487. ######################################################################
  488. # New Demo (built tree based on cfg)
  489. ######################################################################
  490. class CFGDemo(object):
  491. def __init__(self, grammar, text):
  492. self._grammar = grammar
  493. self._text = text
  494. # Set up the main window.
  495. self._top = Tk()
  496. self._top.title('Context Free Grammar Demo')
  497. # Base font size
  498. self._size = IntVar(self._top)
  499. self._size.set(12) # = medium
  500. # Set up the key bindings
  501. self._init_bindings(self._top)
  502. # Create the basic frames
  503. frame1 = Frame(self._top)
  504. frame1.pack(side='left', fill='y', expand=0)
  505. self._init_menubar(self._top)
  506. self._init_buttons(self._top)
  507. self._init_grammar(frame1)
  508. self._init_treelet(frame1)
  509. self._init_workspace(self._top)
  510. # //////////////////////////////////////////////////
  511. # Initialization
  512. # //////////////////////////////////////////////////
  513. def _init_bindings(self, top):
  514. top.bind('<Control-q>', self.destroy)
  515. def _init_menubar(self, parent):
  516. pass
  517. def _init_buttons(self, parent):
  518. pass
  519. def _init_grammar(self, parent):
  520. self._prodlist = ProductionList(parent, self._grammar, width=20)
  521. self._prodlist.pack(side='top', fill='both', expand=1)
  522. self._prodlist.focus()
  523. self._prodlist.add_callback('select', self._selectprod_cb)
  524. self._prodlist.add_callback('move', self._selectprod_cb)
  525. def _init_treelet(self, parent):
  526. self._treelet_canvas = Canvas(parent, background='white')
  527. self._treelet_canvas.pack(side='bottom', fill='x')
  528. self._treelet = None
  529. def _init_workspace(self, parent):
  530. self._workspace = CanvasFrame(parent, background='white')
  531. self._workspace.pack(side='right', fill='both', expand=1)
  532. self._tree = None
  533. self.reset_workspace()
  534. # //////////////////////////////////////////////////
  535. # Workspace
  536. # //////////////////////////////////////////////////
  537. def reset_workspace(self):
  538. c = self._workspace.canvas()
  539. fontsize = int(self._size.get())
  540. node_font = ('helvetica', -(fontsize + 4), 'bold')
  541. leaf_font = ('helvetica', -(fontsize + 2))
  542. # Remove the old tree
  543. if self._tree is not None:
  544. self._workspace.remove_widget(self._tree)
  545. # The root of the tree.
  546. start = self._grammar.start().symbol()
  547. rootnode = TextWidget(c, start, font=node_font, draggable=1)
  548. # The leaves of the tree.
  549. leaves = []
  550. for word in self._text:
  551. leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
  552. # Put it all together into one tree
  553. self._tree = TreeSegmentWidget(c, rootnode, leaves, color='white')
  554. # Add it to the workspace.
  555. self._workspace.add_widget(self._tree)
  556. # Move the leaves to the bottom of the workspace.
  557. for leaf in leaves:
  558. leaf.move(0, 100)
  559. # self._nodes = {start:1}
  560. # self._leaves = dict([(l,1) for l in leaves])
  561. def workspace_markprod(self, production):
  562. pass
  563. def _markproduction(self, prod, tree=None):
  564. if tree is None:
  565. tree = self._tree
  566. for i in range(len(tree.subtrees()) - len(prod.rhs())):
  567. if tree['color', i] == 'white':
  568. self._markproduction # FIXME: Is this necessary at all?
  569. for j, node in enumerate(prod.rhs()):
  570. widget = tree.subtrees()[i + j]
  571. if (
  572. isinstance(node, Nonterminal)
  573. and isinstance(widget, TreeSegmentWidget)
  574. and node.symbol == widget.label().text()
  575. ):
  576. pass # matching nonterminal
  577. elif (
  578. isinstance(node, string_types)
  579. and isinstance(widget, TextWidget)
  580. and node == widget.text()
  581. ):
  582. pass # matching nonterminal
  583. else:
  584. break
  585. else:
  586. # Everything matched!
  587. print('MATCH AT', i)
  588. # //////////////////////////////////////////////////
  589. # Grammar
  590. # //////////////////////////////////////////////////
  591. def _selectprod_cb(self, production):
  592. canvas = self._treelet_canvas
  593. self._prodlist.highlight(production)
  594. if self._treelet is not None:
  595. self._treelet.destroy()
  596. # Convert the production to a tree.
  597. rhs = production.rhs()
  598. for (i, elt) in enumerate(rhs):
  599. if isinstance(elt, Nonterminal):
  600. elt = Tree(elt)
  601. tree = Tree(production.lhs().symbol(), *rhs)
  602. # Draw the tree in the treelet area.
  603. fontsize = int(self._size.get())
  604. node_font = ('helvetica', -(fontsize + 4), 'bold')
  605. leaf_font = ('helvetica', -(fontsize + 2))
  606. self._treelet = tree_to_treesegment(
  607. canvas, tree, node_font=node_font, leaf_font=leaf_font
  608. )
  609. self._treelet['draggable'] = 1
  610. # Center the treelet.
  611. (x1, y1, x2, y2) = self._treelet.bbox()
  612. w, h = int(canvas['width']), int(canvas['height'])
  613. self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2)
  614. # Mark the places where we can add it to the workspace.
  615. self._markproduction(production)
  616. def destroy(self, *args):
  617. self._top.destroy()
  618. def mainloop(self, *args, **kwargs):
  619. self._top.mainloop(*args, **kwargs)
  620. def demo2():
  621. from nltk import Nonterminal, Production, CFG
  622. nonterminals = 'S VP NP PP P N Name V Det'
  623. (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
  624. productions = (
  625. # Syntactic Productions
  626. Production(S, [NP, VP]),
  627. Production(NP, [Det, N]),
  628. Production(NP, [NP, PP]),
  629. Production(VP, [VP, PP]),
  630. Production(VP, [V, NP, PP]),
  631. Production(VP, [V, NP]),
  632. Production(PP, [P, NP]),
  633. Production(PP, []),
  634. Production(PP, ['up', 'over', NP]),
  635. # Lexical Productions
  636. Production(NP, ['I']),
  637. Production(Det, ['the']),
  638. Production(Det, ['a']),
  639. Production(N, ['man']),
  640. Production(V, ['saw']),
  641. Production(P, ['in']),
  642. Production(P, ['with']),
  643. Production(N, ['park']),
  644. Production(N, ['dog']),
  645. Production(N, ['statue']),
  646. Production(Det, ['my']),
  647. )
  648. grammar = CFG(S, productions)
  649. text = 'I saw a man in the park'.split()
  650. d = CFGDemo(grammar, text)
  651. d.mainloop()
  652. ######################################################################
  653. # Old Demo
  654. ######################################################################
  655. def demo():
  656. from nltk import Nonterminal, CFG
  657. nonterminals = 'S VP NP PP P N Name V Det'
  658. (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
  659. grammar = CFG.fromstring(
  660. """
  661. S -> NP VP
  662. PP -> P NP
  663. NP -> Det N
  664. NP -> NP PP
  665. VP -> V NP
  666. VP -> VP PP
  667. Det -> 'a'
  668. Det -> 'the'
  669. Det -> 'my'
  670. NP -> 'I'
  671. N -> 'dog'
  672. N -> 'man'
  673. N -> 'park'
  674. N -> 'statue'
  675. V -> 'saw'
  676. P -> 'in'
  677. P -> 'up'
  678. P -> 'over'
  679. P -> 'with'
  680. """
  681. )
  682. def cb(grammar):
  683. print(grammar)
  684. top = Tk()
  685. editor = CFGEditor(top, grammar, cb)
  686. Label(top, text='\nTesting CFG Editor\n').pack()
  687. Button(top, text='Quit', command=top.destroy).pack()
  688. top.mainloop()
  689. def demo3():
  690. from nltk import Production
  691. (S, VP, NP, PP, P, N, Name, V, Det) = nonterminals(
  692. 'S, VP, NP, PP, P, N, Name, V, Det'
  693. )
  694. productions = (
  695. # Syntactic Productions
  696. Production(S, [NP, VP]),
  697. Production(NP, [Det, N]),
  698. Production(NP, [NP, PP]),
  699. Production(VP, [VP, PP]),
  700. Production(VP, [V, NP, PP]),
  701. Production(VP, [V, NP]),
  702. Production(PP, [P, NP]),
  703. Production(PP, []),
  704. Production(PP, ['up', 'over', NP]),
  705. # Lexical Productions
  706. Production(NP, ['I']),
  707. Production(Det, ['the']),
  708. Production(Det, ['a']),
  709. Production(N, ['man']),
  710. Production(V, ['saw']),
  711. Production(P, ['in']),
  712. Production(P, ['with']),
  713. Production(N, ['park']),
  714. Production(N, ['dog']),
  715. Production(N, ['statue']),
  716. Production(Det, ['my']),
  717. )
  718. t = Tk()
  719. def destroy(e, t=t):
  720. t.destroy()
  721. t.bind('q', destroy)
  722. p = ProductionList(t, productions)
  723. p.pack(expand=1, fill='both')
  724. p.add_callback('select', p.markonly)
  725. p.add_callback('move', p.markonly)
  726. p.focus()
  727. p.mark(productions[2])
  728. p.mark(productions[8])
  729. if __name__ == '__main__':
  730. demo()