dependencygraph.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. # Natural Language Toolkit: Dependency Grammars
  2. #
  3. # Copyright (C) 2001-2019 NLTK Project
  4. # Author: Jason Narad <jason.narad@gmail.com>
  5. # Steven Bird <stevenbird1@gmail.com> (modifications)
  6. #
  7. # URL: <http://nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. #
  10. """
  11. Tools for reading and writing dependency trees.
  12. The input is assumed to be in Malt-TAB format
  13. (http://stp.lingfil.uu.se/~nivre/research/MaltXML.html).
  14. """
  15. from __future__ import print_function, unicode_literals
  16. from collections import defaultdict
  17. from itertools import chain
  18. from pprint import pformat
  19. import subprocess
  20. import warnings
  21. from six import string_types
  22. from nltk.tree import Tree
  23. from nltk.compat import python_2_unicode_compatible
  24. #################################################################
  25. # DependencyGraph Class
  26. #################################################################
  27. @python_2_unicode_compatible
  28. class DependencyGraph(object):
  29. """
  30. A container for the nodes and labelled edges of a dependency structure.
  31. """
  32. def __init__(
  33. self,
  34. tree_str=None,
  35. cell_extractor=None,
  36. zero_based=False,
  37. cell_separator=None,
  38. top_relation_label='ROOT',
  39. ):
  40. """Dependency graph.
  41. We place a dummy `TOP` node with the index 0, since the root node is
  42. often assigned 0 as its head. This also means that the indexing of the
  43. nodes corresponds directly to the Malt-TAB format, which starts at 1.
  44. If zero-based is True, then Malt-TAB-like input with node numbers
  45. starting at 0 and the root node assigned -1 (as produced by, e.g.,
  46. zpar).
  47. :param str cell_separator: the cell separator. If not provided, cells
  48. are split by whitespace.
  49. :param str top_relation_label: the label by which the top relation is
  50. identified, for examlple, `ROOT`, `null` or `TOP`.
  51. """
  52. self.nodes = defaultdict(
  53. lambda: {
  54. 'address': None,
  55. 'word': None,
  56. 'lemma': None,
  57. 'ctag': None,
  58. 'tag': None,
  59. 'feats': None,
  60. 'head': None,
  61. 'deps': defaultdict(list),
  62. 'rel': None,
  63. }
  64. )
  65. self.nodes[0].update({'ctag': 'TOP', 'tag': 'TOP', 'address': 0})
  66. self.root = None
  67. if tree_str:
  68. self._parse(
  69. tree_str,
  70. cell_extractor=cell_extractor,
  71. zero_based=zero_based,
  72. cell_separator=cell_separator,
  73. top_relation_label=top_relation_label,
  74. )
  75. def remove_by_address(self, address):
  76. """
  77. Removes the node with the given address. References
  78. to this node in others will still exist.
  79. """
  80. del self.nodes[address]
  81. def redirect_arcs(self, originals, redirect):
  82. """
  83. Redirects arcs to any of the nodes in the originals list
  84. to the redirect node address.
  85. """
  86. for node in self.nodes.values():
  87. new_deps = []
  88. for dep in node['deps']:
  89. if dep in originals:
  90. new_deps.append(redirect)
  91. else:
  92. new_deps.append(dep)
  93. node['deps'] = new_deps
  94. def add_arc(self, head_address, mod_address):
  95. """
  96. Adds an arc from the node specified by head_address to the
  97. node specified by the mod address.
  98. """
  99. relation = self.nodes[mod_address]['rel']
  100. self.nodes[head_address]['deps'].setdefault(relation, [])
  101. self.nodes[head_address]['deps'][relation].append(mod_address)
  102. # self.nodes[head_address]['deps'].append(mod_address)
  103. def connect_graph(self):
  104. """
  105. Fully connects all non-root nodes. All nodes are set to be dependents
  106. of the root node.
  107. """
  108. for node1 in self.nodes.values():
  109. for node2 in self.nodes.values():
  110. if node1['address'] != node2['address'] and node2['rel'] != 'TOP':
  111. relation = node2['rel']
  112. node1['deps'].setdefault(relation, [])
  113. node1['deps'][relation].append(node2['address'])
  114. # node1['deps'].append(node2['address'])
  115. def get_by_address(self, node_address):
  116. """Return the node with the given address."""
  117. return self.nodes[node_address]
  118. def contains_address(self, node_address):
  119. """
  120. Returns true if the graph contains a node with the given node
  121. address, false otherwise.
  122. """
  123. return node_address in self.nodes
  124. def to_dot(self):
  125. """Return a dot representation suitable for using with Graphviz.
  126. >>> dg = DependencyGraph(
  127. ... 'John N 2\\n'
  128. ... 'loves V 0\\n'
  129. ... 'Mary N 2'
  130. ... )
  131. >>> print(dg.to_dot())
  132. digraph G{
  133. edge [dir=forward]
  134. node [shape=plaintext]
  135. <BLANKLINE>
  136. 0 [label="0 (None)"]
  137. 0 -> 2 [label="ROOT"]
  138. 1 [label="1 (John)"]
  139. 2 [label="2 (loves)"]
  140. 2 -> 1 [label=""]
  141. 2 -> 3 [label=""]
  142. 3 [label="3 (Mary)"]
  143. }
  144. """
  145. # Start the digraph specification
  146. s = 'digraph G{\n'
  147. s += 'edge [dir=forward]\n'
  148. s += 'node [shape=plaintext]\n'
  149. # Draw the remaining nodes
  150. for node in sorted(self.nodes.values(), key=lambda v: v['address']):
  151. s += '\n%s [label="%s (%s)"]' % (
  152. node['address'],
  153. node['address'],
  154. node['word'],
  155. )
  156. for rel, deps in node['deps'].items():
  157. for dep in deps:
  158. if rel is not None:
  159. s += '\n%s -> %s [label="%s"]' % (node['address'], dep, rel)
  160. else:
  161. s += '\n%s -> %s ' % (node['address'], dep)
  162. s += "\n}"
  163. return s
  164. def _repr_svg_(self):
  165. """Show SVG representation of the transducer (IPython magic).
  166. >>> dg = DependencyGraph(
  167. ... 'John N 2\\n'
  168. ... 'loves V 0\\n'
  169. ... 'Mary N 2'
  170. ... )
  171. >>> dg._repr_svg_().split('\\n')[0]
  172. '<?xml version="1.0" encoding="UTF-8" standalone="no"?>'
  173. """
  174. dot_string = self.to_dot()
  175. try:
  176. process = subprocess.Popen(
  177. ['dot', '-Tsvg'],
  178. stdin=subprocess.PIPE,
  179. stdout=subprocess.PIPE,
  180. stderr=subprocess.PIPE,
  181. universal_newlines=True,
  182. )
  183. except OSError:
  184. raise Exception('Cannot find the dot binary from Graphviz package')
  185. out, err = process.communicate(dot_string)
  186. if err:
  187. raise Exception(
  188. 'Cannot create svg representation by running dot from string: {}'
  189. ''.format(dot_string)
  190. )
  191. return out
  192. def __str__(self):
  193. return pformat(self.nodes)
  194. def __repr__(self):
  195. return "<DependencyGraph with {0} nodes>".format(len(self.nodes))
  196. @staticmethod
  197. def load(
  198. filename, zero_based=False, cell_separator=None, top_relation_label='ROOT'
  199. ):
  200. """
  201. :param filename: a name of a file in Malt-TAB format
  202. :param zero_based: nodes in the input file are numbered starting from 0
  203. rather than 1 (as produced by, e.g., zpar)
  204. :param str cell_separator: the cell separator. If not provided, cells
  205. are split by whitespace.
  206. :param str top_relation_label: the label by which the top relation is
  207. identified, for examlple, `ROOT`, `null` or `TOP`.
  208. :return: a list of DependencyGraphs
  209. """
  210. with open(filename) as infile:
  211. return [
  212. DependencyGraph(
  213. tree_str,
  214. zero_based=zero_based,
  215. cell_separator=cell_separator,
  216. top_relation_label=top_relation_label,
  217. )
  218. for tree_str in infile.read().split('\n\n')
  219. ]
  220. def left_children(self, node_index):
  221. """
  222. Returns the number of left children under the node specified
  223. by the given address.
  224. """
  225. children = chain.from_iterable(self.nodes[node_index]['deps'].values())
  226. index = self.nodes[node_index]['address']
  227. return sum(1 for c in children if c < index)
  228. def right_children(self, node_index):
  229. """
  230. Returns the number of right children under the node specified
  231. by the given address.
  232. """
  233. children = chain.from_iterable(self.nodes[node_index]['deps'].values())
  234. index = self.nodes[node_index]['address']
  235. return sum(1 for c in children if c > index)
  236. def add_node(self, node):
  237. if not self.contains_address(node['address']):
  238. self.nodes[node['address']].update(node)
  239. def _parse(
  240. self,
  241. input_,
  242. cell_extractor=None,
  243. zero_based=False,
  244. cell_separator=None,
  245. top_relation_label='ROOT',
  246. ):
  247. """Parse a sentence.
  248. :param extractor: a function that given a tuple of cells returns a
  249. 7-tuple, where the values are ``word, lemma, ctag, tag, feats, head,
  250. rel``.
  251. :param str cell_separator: the cell separator. If not provided, cells
  252. are split by whitespace.
  253. :param str top_relation_label: the label by which the top relation is
  254. identified, for examlple, `ROOT`, `null` or `TOP`.
  255. """
  256. def extract_3_cells(cells, index):
  257. word, tag, head = cells
  258. return index, word, word, tag, tag, '', head, ''
  259. def extract_4_cells(cells, index):
  260. word, tag, head, rel = cells
  261. return index, word, word, tag, tag, '', head, rel
  262. def extract_7_cells(cells, index):
  263. line_index, word, lemma, tag, _, head, rel = cells
  264. try:
  265. index = int(line_index)
  266. except ValueError:
  267. # index can't be parsed as an integer, use default
  268. pass
  269. return index, word, lemma, tag, tag, '', head, rel
  270. def extract_10_cells(cells, index):
  271. line_index, word, lemma, ctag, tag, feats, head, rel, _, _ = cells
  272. try:
  273. index = int(line_index)
  274. except ValueError:
  275. # index can't be parsed as an integer, use default
  276. pass
  277. return index, word, lemma, ctag, tag, feats, head, rel
  278. extractors = {
  279. 3: extract_3_cells,
  280. 4: extract_4_cells,
  281. 7: extract_7_cells,
  282. 10: extract_10_cells,
  283. }
  284. if isinstance(input_, string_types):
  285. input_ = (line for line in input_.split('\n'))
  286. lines = (l.rstrip() for l in input_)
  287. lines = (l for l in lines if l)
  288. cell_number = None
  289. for index, line in enumerate(lines, start=1):
  290. cells = line.split(cell_separator)
  291. if cell_number is None:
  292. cell_number = len(cells)
  293. else:
  294. assert cell_number == len(cells)
  295. if cell_extractor is None:
  296. try:
  297. cell_extractor = extractors[cell_number]
  298. except KeyError:
  299. raise ValueError(
  300. 'Number of tab-delimited fields ({0}) not supported by '
  301. 'CoNLL(10) or Malt-Tab(4) format'.format(cell_number)
  302. )
  303. try:
  304. index, word, lemma, ctag, tag, feats, head, rel = cell_extractor(
  305. cells, index
  306. )
  307. except (TypeError, ValueError):
  308. # cell_extractor doesn't take 2 arguments or doesn't return 8
  309. # values; assume the cell_extractor is an older external
  310. # extractor and doesn't accept or return an index.
  311. word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells)
  312. if head == '_':
  313. continue
  314. head = int(head)
  315. if zero_based:
  316. head += 1
  317. self.nodes[index].update(
  318. {
  319. 'address': index,
  320. 'word': word,
  321. 'lemma': lemma,
  322. 'ctag': ctag,
  323. 'tag': tag,
  324. 'feats': feats,
  325. 'head': head,
  326. 'rel': rel,
  327. }
  328. )
  329. # Make sure that the fake root node has labeled dependencies.
  330. if (cell_number == 3) and (head == 0):
  331. rel = top_relation_label
  332. self.nodes[head]['deps'][rel].append(index)
  333. if self.nodes[0]['deps'][top_relation_label]:
  334. root_address = self.nodes[0]['deps'][top_relation_label][0]
  335. self.root = self.nodes[root_address]
  336. self.top_relation_label = top_relation_label
  337. else:
  338. warnings.warn(
  339. "The graph doesn't contain a node " "that depends on the root element."
  340. )
  341. def _word(self, node, filter=True):
  342. w = node['word']
  343. if filter:
  344. if w != ',':
  345. return w
  346. return w
  347. def _tree(self, i):
  348. """ Turn dependency graphs into NLTK trees.
  349. :param int i: index of a node
  350. :return: either a word (if the indexed node is a leaf) or a ``Tree``.
  351. """
  352. node = self.get_by_address(i)
  353. word = node['word']
  354. deps = sorted(chain.from_iterable(node['deps'].values()))
  355. if deps:
  356. return Tree(word, [self._tree(dep) for dep in deps])
  357. else:
  358. return word
  359. def tree(self):
  360. """
  361. Starting with the ``root`` node, build a dependency tree using the NLTK
  362. ``Tree`` constructor. Dependency labels are omitted.
  363. """
  364. node = self.root
  365. word = node['word']
  366. deps = sorted(chain.from_iterable(node['deps'].values()))
  367. return Tree(word, [self._tree(dep) for dep in deps])
  368. def triples(self, node=None):
  369. """
  370. Extract dependency triples of the form:
  371. ((head word, head tag), rel, (dep word, dep tag))
  372. """
  373. if not node:
  374. node = self.root
  375. head = (node['word'], node['ctag'])
  376. for i in sorted(chain.from_iterable(node['deps'].values())):
  377. dep = self.get_by_address(i)
  378. yield (head, dep['rel'], (dep['word'], dep['ctag']))
  379. for triple in self.triples(node=dep):
  380. yield triple
  381. def _hd(self, i):
  382. try:
  383. return self.nodes[i]['head']
  384. except IndexError:
  385. return None
  386. def _rel(self, i):
  387. try:
  388. return self.nodes[i]['rel']
  389. except IndexError:
  390. return None
  391. # what's the return type? Boolean or list?
  392. def contains_cycle(self):
  393. """Check whether there are cycles.
  394. >>> dg = DependencyGraph(treebank_data)
  395. >>> dg.contains_cycle()
  396. False
  397. >>> cyclic_dg = DependencyGraph()
  398. >>> top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}
  399. >>> child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}
  400. >>> child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}
  401. >>> child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}
  402. >>> child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}
  403. >>> cyclic_dg.nodes = {
  404. ... 0: top,
  405. ... 1: child1,
  406. ... 2: child2,
  407. ... 3: child3,
  408. ... 4: child4,
  409. ... }
  410. >>> cyclic_dg.root = top
  411. >>> cyclic_dg.contains_cycle()
  412. [3, 1, 2, 4]
  413. """
  414. distances = {}
  415. for node in self.nodes.values():
  416. for dep in node['deps']:
  417. key = tuple([node['address'], dep])
  418. distances[key] = 1
  419. for _ in self.nodes:
  420. new_entries = {}
  421. for pair1 in distances:
  422. for pair2 in distances:
  423. if pair1[1] == pair2[0]:
  424. key = tuple([pair1[0], pair2[1]])
  425. new_entries[key] = distances[pair1] + distances[pair2]
  426. for pair in new_entries:
  427. distances[pair] = new_entries[pair]
  428. if pair[0] == pair[1]:
  429. path = self.get_cycle_path(self.get_by_address(pair[0]), pair[0])
  430. return path
  431. return False # return []?
  432. def get_cycle_path(self, curr_node, goal_node_index):
  433. for dep in curr_node['deps']:
  434. if dep == goal_node_index:
  435. return [curr_node['address']]
  436. for dep in curr_node['deps']:
  437. path = self.get_cycle_path(self.get_by_address(dep), goal_node_index)
  438. if len(path) > 0:
  439. path.insert(0, curr_node['address'])
  440. return path
  441. return []
  442. def to_conll(self, style):
  443. """
  444. The dependency graph in CoNLL format.
  445. :param style: the style to use for the format (3, 4, 10 columns)
  446. :type style: int
  447. :rtype: str
  448. """
  449. if style == 3:
  450. template = '{word}\t{tag}\t{head}\n'
  451. elif style == 4:
  452. template = '{word}\t{tag}\t{head}\t{rel}\n'
  453. elif style == 10:
  454. template = (
  455. '{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n'
  456. )
  457. else:
  458. raise ValueError(
  459. 'Number of tab-delimited fields ({0}) not supported by '
  460. 'CoNLL(10) or Malt-Tab(4) format'.format(style)
  461. )
  462. return ''.join(
  463. template.format(i=i, **node)
  464. for i, node in sorted(self.nodes.items())
  465. if node['tag'] != 'TOP'
  466. )
  467. def nx_graph(self):
  468. """Convert the data in a ``nodelist`` into a networkx labeled directed graph."""
  469. import networkx
  470. nx_nodelist = list(range(1, len(self.nodes)))
  471. nx_edgelist = [
  472. (n, self._hd(n), self._rel(n)) for n in nx_nodelist if self._hd(n)
  473. ]
  474. self.nx_labels = {}
  475. for n in nx_nodelist:
  476. self.nx_labels[n] = self.nodes[n]['word']
  477. g = networkx.MultiDiGraph()
  478. g.add_nodes_from(nx_nodelist)
  479. g.add_edges_from(nx_edgelist)
  480. return g
  481. class DependencyGraphError(Exception):
  482. """Dependency graph exception."""
  483. def demo():
  484. malt_demo()
  485. conll_demo()
  486. conll_file_demo()
  487. cycle_finding_demo()
  488. def malt_demo(nx=False):
  489. """
  490. A demonstration of the result of reading a dependency
  491. version of the first sentence of the Penn Treebank.
  492. """
  493. dg = DependencyGraph(
  494. """Pierre NNP 2 NMOD
  495. Vinken NNP 8 SUB
  496. , , 2 P
  497. 61 CD 5 NMOD
  498. years NNS 6 AMOD
  499. old JJ 2 NMOD
  500. , , 2 P
  501. will MD 0 ROOT
  502. join VB 8 VC
  503. the DT 11 NMOD
  504. board NN 9 OBJ
  505. as IN 9 VMOD
  506. a DT 15 NMOD
  507. nonexecutive JJ 15 NMOD
  508. director NN 12 PMOD
  509. Nov. NNP 9 VMOD
  510. 29 CD 16 NMOD
  511. . . 9 VMOD
  512. """
  513. )
  514. tree = dg.tree()
  515. tree.pprint()
  516. if nx:
  517. # currently doesn't work
  518. import networkx
  519. from matplotlib import pylab
  520. g = dg.nx_graph()
  521. g.info()
  522. pos = networkx.spring_layout(g, dim=1)
  523. networkx.draw_networkx_nodes(g, pos, node_size=50)
  524. # networkx.draw_networkx_edges(g, pos, edge_color='k', width=8)
  525. networkx.draw_networkx_labels(g, pos, dg.nx_labels)
  526. pylab.xticks([])
  527. pylab.yticks([])
  528. pylab.savefig('tree.png')
  529. pylab.show()
  530. def conll_demo():
  531. """
  532. A demonstration of how to read a string representation of
  533. a CoNLL format dependency tree.
  534. """
  535. dg = DependencyGraph(conll_data1)
  536. tree = dg.tree()
  537. tree.pprint()
  538. print(dg)
  539. print(dg.to_conll(4))
  540. def conll_file_demo():
  541. print('Mass conll_read demo...')
  542. graphs = [DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry]
  543. for graph in graphs:
  544. tree = graph.tree()
  545. print('\n')
  546. tree.pprint()
  547. def cycle_finding_demo():
  548. dg = DependencyGraph(treebank_data)
  549. print(dg.contains_cycle())
  550. cyclic_dg = DependencyGraph()
  551. cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0})
  552. cyclic_dg.add_node({'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1})
  553. cyclic_dg.add_node({'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2})
  554. cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3})
  555. cyclic_dg.add_node({'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4})
  556. print(cyclic_dg.contains_cycle())
  557. treebank_data = """Pierre NNP 2 NMOD
  558. Vinken NNP 8 SUB
  559. , , 2 P
  560. 61 CD 5 NMOD
  561. years NNS 6 AMOD
  562. old JJ 2 NMOD
  563. , , 2 P
  564. will MD 0 ROOT
  565. join VB 8 VC
  566. the DT 11 NMOD
  567. board NN 9 OBJ
  568. as IN 9 VMOD
  569. a DT 15 NMOD
  570. nonexecutive JJ 15 NMOD
  571. director NN 12 PMOD
  572. Nov. NNP 9 VMOD
  573. 29 CD 16 NMOD
  574. . . 9 VMOD
  575. """
  576. conll_data1 = """
  577. 1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _
  578. 2 had heb V V trans|ovt|1of2of3|ev 0 ROOT _ _
  579. 3 met met Prep Prep voor 8 mod _ _
  580. 4 haar haar Pron Pron bez|3|ev|neut|attr 5 det _ _
  581. 5 moeder moeder N N soort|ev|neut 3 obj1 _ _
  582. 6 kunnen kan V V hulp|ott|1of2of3|mv 2 vc _ _
  583. 7 gaan ga V V hulp|inf 6 vc _ _
  584. 8 winkelen winkel V V intrans|inf 11 cnj _ _
  585. 9 , , Punc Punc komma 8 punct _ _
  586. 10 zwemmen zwem V V intrans|inf 11 cnj _ _
  587. 11 of of Conj Conj neven 7 vc _ _
  588. 12 terrassen terras N N soort|mv|neut 11 cnj _ _
  589. 13 . . Punc Punc punt 12 punct _ _
  590. """
  591. conll_data2 = """1 Cathy Cathy N N eigen|ev|neut 2 su _ _
  592. 2 zag zie V V trans|ovt|1of2of3|ev 0 ROOT _ _
  593. 3 hen hen Pron Pron per|3|mv|datofacc 2 obj1 _ _
  594. 4 wild wild Adj Adj attr|stell|onverv 5 mod _ _
  595. 5 zwaaien zwaai N N soort|mv|neut 2 vc _ _
  596. 6 . . Punc Punc punt 5 punct _ _
  597. 1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _
  598. 2 had heb V V trans|ovt|1of2of3|ev 0 ROOT _ _
  599. 3 met met Prep Prep voor 8 mod _ _
  600. 4 haar haar Pron Pron bez|3|ev|neut|attr 5 det _ _
  601. 5 moeder moeder N N soort|ev|neut 3 obj1 _ _
  602. 6 kunnen kan V V hulp|ott|1of2of3|mv 2 vc _ _
  603. 7 gaan ga V V hulp|inf 6 vc _ _
  604. 8 winkelen winkel V V intrans|inf 11 cnj _ _
  605. 9 , , Punc Punc komma 8 punct _ _
  606. 10 zwemmen zwem V V intrans|inf 11 cnj _ _
  607. 11 of of Conj Conj neven 7 vc _ _
  608. 12 terrassen terras N N soort|mv|neut 11 cnj _ _
  609. 13 . . Punc Punc punt 12 punct _ _
  610. 1 Dat dat Pron Pron aanw|neut|attr 2 det _ _
  611. 2 werkwoord werkwoord N N soort|ev|neut 6 obj1 _ _
  612. 3 had heb V V hulp|ovt|1of2of3|ev 0 ROOT _ _
  613. 4 ze ze Pron Pron per|3|evofmv|nom 6 su _ _
  614. 5 zelf zelf Pron Pron aanw|neut|attr|wzelf 3 predm _ _
  615. 6 uitgevonden vind V V trans|verldw|onverv 3 vc _ _
  616. 7 . . Punc Punc punt 6 punct _ _
  617. 1 Het het Pron Pron onbep|neut|zelfst 2 su _ _
  618. 2 hoorde hoor V V trans|ovt|1of2of3|ev 0 ROOT _ _
  619. 3 bij bij Prep Prep voor 2 ld _ _
  620. 4 de de Art Art bep|zijdofmv|neut 6 det _ _
  621. 5 warme warm Adj Adj attr|stell|vervneut 6 mod _ _
  622. 6 zomerdag zomerdag N N soort|ev|neut 3 obj1 _ _
  623. 7 die die Pron Pron betr|neut|zelfst 6 mod _ _
  624. 8 ze ze Pron Pron per|3|evofmv|nom 12 su _ _
  625. 9 ginds ginds Adv Adv gew|aanw 12 mod _ _
  626. 10 achter achter Adv Adv gew|geenfunc|stell|onverv 12 svp _ _
  627. 11 had heb V V hulp|ovt|1of2of3|ev 7 body _ _
  628. 12 gelaten laat V V trans|verldw|onverv 11 vc _ _
  629. 13 . . Punc Punc punt 12 punct _ _
  630. 1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _
  631. 2 hadden heb V V trans|ovt|1of2of3|mv 0 ROOT _ _
  632. 3 languit languit Adv Adv gew|geenfunc|stell|onverv 11 mod _ _
  633. 4 naast naast Prep Prep voor 11 mod _ _
  634. 5 elkaar elkaar Pron Pron rec|neut 4 obj1 _ _
  635. 6 op op Prep Prep voor 11 ld _ _
  636. 7 de de Art Art bep|zijdofmv|neut 8 det _ _
  637. 8 strandstoelen strandstoel N N soort|mv|neut 6 obj1 _ _
  638. 9 kunnen kan V V hulp|inf 2 vc _ _
  639. 10 gaan ga V V hulp|inf 9 vc _ _
  640. 11 liggen lig V V intrans|inf 10 vc _ _
  641. 12 . . Punc Punc punt 11 punct _ _
  642. 1 Zij zij Pron Pron per|3|evofmv|nom 2 su _ _
  643. 2 zou zal V V hulp|ovt|1of2of3|ev 7 cnj _ _
  644. 3 mams mams N N soort|ev|neut 4 det _ _
  645. 4 rug rug N N soort|ev|neut 5 obj1 _ _
  646. 5 ingewreven wrijf V V trans|verldw|onverv 6 vc _ _
  647. 6 hebben heb V V hulp|inf 2 vc _ _
  648. 7 en en Conj Conj neven 0 ROOT _ _
  649. 8 mam mam V V trans|ovt|1of2of3|ev 7 cnj _ _
  650. 9 de de Art Art bep|zijdofmv|neut 10 det _ _
  651. 10 hare hare Pron Pron bez|3|ev|neut|attr 8 obj1 _ _
  652. 11 . . Punc Punc punt 10 punct _ _
  653. 1 Of of Conj Conj onder|metfin 0 ROOT _ _
  654. 2 ze ze Pron Pron per|3|evofmv|nom 3 su _ _
  655. 3 had heb V V hulp|ovt|1of2of3|ev 0 ROOT _ _
  656. 4 gewoon gewoon Adj Adj adv|stell|onverv 10 mod _ _
  657. 5 met met Prep Prep voor 10 mod _ _
  658. 6 haar haar Pron Pron bez|3|ev|neut|attr 7 det _ _
  659. 7 vriendinnen vriendin N N soort|mv|neut 5 obj1 _ _
  660. 8 rond rond Adv Adv deelv 10 svp _ _
  661. 9 kunnen kan V V hulp|inf 3 vc _ _
  662. 10 slenteren slenter V V intrans|inf 9 vc _ _
  663. 11 in in Prep Prep voor 10 mod _ _
  664. 12 de de Art Art bep|zijdofmv|neut 13 det _ _
  665. 13 buurt buurt N N soort|ev|neut 11 obj1 _ _
  666. 14 van van Prep Prep voor 13 mod _ _
  667. 15 Trafalgar_Square Trafalgar_Square MWU N_N eigen|ev|neut_eigen|ev|neut 14 obj1 _ _
  668. 16 . . Punc Punc punt 15 punct _ _
  669. """
  670. if __name__ == '__main__':
  671. demo()