glue.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. # Natural Language Toolkit: Glue Semantics
  2. #
  3. # Author: Dan Garrette <dhgarrette@gmail.com>
  4. #
  5. # Copyright (C) 2001-2019 NLTK Project
  6. # URL: <http://nltk.org/>
  7. # For license information, see LICENSE.TXT
  8. from __future__ import print_function, division, unicode_literals
  9. import os
  10. from itertools import chain
  11. from six import string_types
  12. import nltk
  13. from nltk.internals import Counter
  14. from nltk.tag import UnigramTagger, BigramTagger, TrigramTagger, RegexpTagger
  15. from nltk.sem.logic import (
  16. Expression,
  17. Variable,
  18. VariableExpression,
  19. LambdaExpression,
  20. AbstractVariableExpression,
  21. )
  22. from nltk.compat import python_2_unicode_compatible
  23. from nltk.sem import drt
  24. from nltk.sem import linearlogic
  25. SPEC_SEMTYPES = {
  26. 'a': 'ex_quant',
  27. 'an': 'ex_quant',
  28. 'every': 'univ_quant',
  29. 'the': 'def_art',
  30. 'no': 'no_quant',
  31. 'default': 'ex_quant',
  32. }
  33. OPTIONAL_RELATIONSHIPS = ['nmod', 'vmod', 'punct']
  34. @python_2_unicode_compatible
  35. class GlueFormula(object):
  36. def __init__(self, meaning, glue, indices=None):
  37. if not indices:
  38. indices = set()
  39. if isinstance(meaning, string_types):
  40. self.meaning = Expression.fromstring(meaning)
  41. elif isinstance(meaning, Expression):
  42. self.meaning = meaning
  43. else:
  44. raise RuntimeError(
  45. 'Meaning term neither string or expression: %s, %s'
  46. % (meaning, meaning.__class__)
  47. )
  48. if isinstance(glue, string_types):
  49. self.glue = linearlogic.LinearLogicParser().parse(glue)
  50. elif isinstance(glue, linearlogic.Expression):
  51. self.glue = glue
  52. else:
  53. raise RuntimeError(
  54. 'Glue term neither string or expression: %s, %s'
  55. % (glue, glue.__class__)
  56. )
  57. self.indices = indices
  58. def applyto(self, arg):
  59. """ self = (\\x.(walk x), (subj -o f))
  60. arg = (john , subj)
  61. returns ((walk john), f)
  62. """
  63. if self.indices & arg.indices: # if the sets are NOT disjoint
  64. raise linearlogic.LinearLogicApplicationException(
  65. "'%s' applied to '%s'. Indices are not disjoint." % (self, arg)
  66. )
  67. else: # if the sets ARE disjoint
  68. return_indices = self.indices | arg.indices
  69. try:
  70. return_glue = linearlogic.ApplicationExpression(
  71. self.glue, arg.glue, arg.indices
  72. )
  73. except linearlogic.LinearLogicApplicationException:
  74. raise linearlogic.LinearLogicApplicationException(
  75. "'%s' applied to '%s'" % (self.simplify(), arg.simplify())
  76. )
  77. arg_meaning_abstracted = arg.meaning
  78. if return_indices:
  79. for dep in self.glue.simplify().antecedent.dependencies[
  80. ::-1
  81. ]: # if self.glue is (A -o B), dep is in A.dependencies
  82. arg_meaning_abstracted = self.make_LambdaExpression(
  83. Variable('v%s' % dep), arg_meaning_abstracted
  84. )
  85. return_meaning = self.meaning.applyto(arg_meaning_abstracted)
  86. return self.__class__(return_meaning, return_glue, return_indices)
  87. def make_VariableExpression(self, name):
  88. return VariableExpression(name)
  89. def make_LambdaExpression(self, variable, term):
  90. return LambdaExpression(variable, term)
  91. def lambda_abstract(self, other):
  92. assert isinstance(other, GlueFormula)
  93. assert isinstance(other.meaning, AbstractVariableExpression)
  94. return self.__class__(
  95. self.make_LambdaExpression(other.meaning.variable, self.meaning),
  96. linearlogic.ImpExpression(other.glue, self.glue),
  97. )
  98. def compile(self, counter=None):
  99. """From Iddo Lev's PhD Dissertation p108-109"""
  100. if not counter:
  101. counter = Counter()
  102. (compiled_glue, new_forms) = self.glue.simplify().compile_pos(
  103. counter, self.__class__
  104. )
  105. return new_forms + [
  106. self.__class__(self.meaning, compiled_glue, set([counter.get()]))
  107. ]
  108. def simplify(self):
  109. return self.__class__(
  110. self.meaning.simplify(), self.glue.simplify(), self.indices
  111. )
  112. def __eq__(self, other):
  113. return (
  114. self.__class__ == other.__class__
  115. and self.meaning == other.meaning
  116. and self.glue == other.glue
  117. )
  118. def __ne__(self, other):
  119. return not self == other
  120. # sorting for use in doctests which must be deterministic
  121. def __lt__(self, other):
  122. return str(self) < str(other)
  123. def __str__(self):
  124. assert isinstance(self.indices, set)
  125. accum = '%s : %s' % (self.meaning, self.glue)
  126. if self.indices:
  127. accum += ' : {' + ', '.join(str(index) for index in self.indices) + '}'
  128. return accum
  129. def __repr__(self):
  130. return "%s" % self
  131. @python_2_unicode_compatible
  132. class GlueDict(dict):
  133. def __init__(self, filename, encoding=None):
  134. self.filename = filename
  135. self.file_encoding = encoding
  136. self.read_file()
  137. def read_file(self, empty_first=True):
  138. if empty_first:
  139. self.clear()
  140. try:
  141. contents = nltk.data.load(
  142. self.filename, format='text', encoding=self.file_encoding
  143. )
  144. # TODO: the above can't handle zip files, but this should anyway be fixed in nltk.data.load()
  145. except LookupError as e:
  146. try:
  147. contents = nltk.data.load(
  148. 'file:' + self.filename, format='text', encoding=self.file_encoding
  149. )
  150. except LookupError:
  151. raise e
  152. lines = contents.splitlines()
  153. for line in lines: # example: 'n : (\\x.(<word> x), (v-or))'
  154. # lambdacalc -^ linear logic -^
  155. line = line.strip() # remove trailing newline
  156. if not len(line):
  157. continue # skip empty lines
  158. if line[0] == '#':
  159. continue # skip commented out lines
  160. parts = line.split(
  161. ' : ', 2
  162. ) # ['verb', '(\\x.(<word> x), ( subj -o f ))', '[subj]']
  163. glue_formulas = []
  164. paren_count = 0
  165. tuple_start = 0
  166. tuple_comma = 0
  167. relationships = None
  168. if len(parts) > 1:
  169. for (i, c) in enumerate(parts[1]):
  170. if c == '(':
  171. if paren_count == 0: # if it's the first '(' of a tuple
  172. tuple_start = i + 1 # then save the index
  173. paren_count += 1
  174. elif c == ')':
  175. paren_count -= 1
  176. if paren_count == 0: # if it's the last ')' of a tuple
  177. meaning_term = parts[1][
  178. tuple_start:tuple_comma
  179. ] # '\\x.(<word> x)'
  180. glue_term = parts[1][tuple_comma + 1 : i] # '(v-r)'
  181. glue_formulas.append(
  182. [meaning_term, glue_term]
  183. ) # add the GlueFormula to the list
  184. elif c == ',':
  185. if (
  186. paren_count == 1
  187. ): # if it's a comma separating the parts of the tuple
  188. tuple_comma = i # then save the index
  189. elif c == '#': # skip comments at the ends of lines
  190. if (
  191. paren_count != 0
  192. ): # if the line hasn't parsed correctly so far
  193. raise RuntimeError(
  194. 'Formula syntax is incorrect for entry ' + line
  195. )
  196. break # break to the next line
  197. if len(parts) > 2: # if there is a relationship entry at the end
  198. rel_start = parts[2].index('[') + 1
  199. rel_end = parts[2].index(']')
  200. if rel_start == rel_end:
  201. relationships = frozenset()
  202. else:
  203. relationships = frozenset(
  204. r.strip() for r in parts[2][rel_start:rel_end].split(',')
  205. )
  206. try:
  207. start_inheritance = parts[0].index('(')
  208. end_inheritance = parts[0].index(')')
  209. sem = parts[0][:start_inheritance].strip()
  210. supertype = parts[0][start_inheritance + 1 : end_inheritance]
  211. except:
  212. sem = parts[0].strip()
  213. supertype = None
  214. if sem not in self:
  215. self[sem] = {}
  216. if (
  217. relationships is None
  218. ): # if not specified for a specific relationship set
  219. # add all relationship entries for parents
  220. if supertype:
  221. for rels in self[supertype]:
  222. if rels not in self[sem]:
  223. self[sem][rels] = []
  224. glue = self[supertype][rels]
  225. self[sem][rels].extend(glue)
  226. self[sem][rels].extend(
  227. glue_formulas
  228. ) # add the glue formulas to every rel entry
  229. else:
  230. if None not in self[sem]:
  231. self[sem][None] = []
  232. self[sem][None].extend(
  233. glue_formulas
  234. ) # add the glue formulas to every rel entry
  235. else:
  236. if relationships not in self[sem]:
  237. self[sem][relationships] = []
  238. if supertype:
  239. self[sem][relationships].extend(self[supertype][relationships])
  240. self[sem][relationships].extend(
  241. glue_formulas
  242. ) # add the glue entry to the dictionary
  243. def __str__(self):
  244. accum = ''
  245. for pos in self:
  246. str_pos = "%s" % pos
  247. for relset in self[pos]:
  248. i = 1
  249. for gf in self[pos][relset]:
  250. if i == 1:
  251. accum += str_pos + ': '
  252. else:
  253. accum += ' ' * (len(str_pos) + 2)
  254. accum += "%s" % gf
  255. if relset and i == len(self[pos][relset]):
  256. accum += ' : %s' % relset
  257. accum += '\n'
  258. i += 1
  259. return accum
  260. def to_glueformula_list(self, depgraph, node=None, counter=None, verbose=False):
  261. if node is None:
  262. # TODO: should it be depgraph.root? Is this code tested?
  263. top = depgraph.nodes[0]
  264. depList = list(chain(*top['deps'].values()))
  265. root = depgraph.nodes[depList[0]]
  266. return self.to_glueformula_list(depgraph, root, Counter(), verbose)
  267. glueformulas = self.lookup(node, depgraph, counter)
  268. for dep_idx in chain(*node['deps'].values()):
  269. dep = depgraph.nodes[dep_idx]
  270. glueformulas.extend(
  271. self.to_glueformula_list(depgraph, dep, counter, verbose)
  272. )
  273. return glueformulas
  274. def lookup(self, node, depgraph, counter):
  275. semtype_names = self.get_semtypes(node)
  276. semtype = None
  277. for name in semtype_names:
  278. if name in self:
  279. semtype = self[name]
  280. break
  281. if semtype is None:
  282. # raise KeyError, "There is no GlueDict entry for sem type '%s' (for '%s')" % (sem, word)
  283. return []
  284. self.add_missing_dependencies(node, depgraph)
  285. lookup = self._lookup_semtype_option(semtype, node, depgraph)
  286. if not len(lookup):
  287. raise KeyError(
  288. "There is no GlueDict entry for sem type of '%s' "
  289. "with tag '%s', and rel '%s'" % (node['word'], node['tag'], node['rel'])
  290. )
  291. return self.get_glueformulas_from_semtype_entry(
  292. lookup, node['word'], node, depgraph, counter
  293. )
  294. def add_missing_dependencies(self, node, depgraph):
  295. rel = node['rel'].lower()
  296. if rel == 'main':
  297. headnode = depgraph.nodes[node['head']]
  298. subj = self.lookup_unique('subj', headnode, depgraph)
  299. relation = subj['rel']
  300. node['deps'].setdefault(relation, [])
  301. node['deps'][relation].append(subj['address'])
  302. # node['deps'].append(subj['address'])
  303. def _lookup_semtype_option(self, semtype, node, depgraph):
  304. relationships = frozenset(
  305. depgraph.nodes[dep]['rel'].lower()
  306. for dep in chain(*node['deps'].values())
  307. if depgraph.nodes[dep]['rel'].lower() not in OPTIONAL_RELATIONSHIPS
  308. )
  309. try:
  310. lookup = semtype[relationships]
  311. except KeyError:
  312. # An exact match is not found, so find the best match where
  313. # 'best' is defined as the glue entry whose relationship set has the
  314. # most relations of any possible relationship set that is a subset
  315. # of the actual depgraph
  316. best_match = frozenset()
  317. for relset_option in set(semtype) - set([None]):
  318. if (
  319. len(relset_option) > len(best_match)
  320. and relset_option < relationships
  321. ):
  322. best_match = relset_option
  323. if not best_match:
  324. if None in semtype:
  325. best_match = None
  326. else:
  327. return None
  328. lookup = semtype[best_match]
  329. return lookup
  330. def get_semtypes(self, node):
  331. """
  332. Based on the node, return a list of plausible semtypes in order of
  333. plausibility.
  334. """
  335. rel = node['rel'].lower()
  336. word = node['word'].lower()
  337. if rel == 'spec':
  338. if word in SPEC_SEMTYPES:
  339. return [SPEC_SEMTYPES[word]]
  340. else:
  341. return [SPEC_SEMTYPES['default']]
  342. elif rel in ['nmod', 'vmod']:
  343. return [node['tag'], rel]
  344. else:
  345. return [node['tag']]
  346. def get_glueformulas_from_semtype_entry(
  347. self, lookup, word, node, depgraph, counter
  348. ):
  349. glueformulas = []
  350. glueFormulaFactory = self.get_GlueFormula_factory()
  351. for meaning, glue in lookup:
  352. gf = glueFormulaFactory(self.get_meaning_formula(meaning, word), glue)
  353. if not len(glueformulas):
  354. gf.word = word
  355. else:
  356. gf.word = '%s%s' % (word, len(glueformulas) + 1)
  357. gf.glue = self.initialize_labels(gf.glue, node, depgraph, counter.get())
  358. glueformulas.append(gf)
  359. return glueformulas
  360. def get_meaning_formula(self, generic, word):
  361. """
  362. :param generic: A meaning formula string containing the
  363. parameter "<word>"
  364. :param word: The actual word to be replace "<word>"
  365. """
  366. word = word.replace('.', '')
  367. return generic.replace('<word>', word)
  368. def initialize_labels(self, expr, node, depgraph, unique_index):
  369. if isinstance(expr, linearlogic.AtomicExpression):
  370. name = self.find_label_name(expr.name.lower(), node, depgraph, unique_index)
  371. if name[0].isupper():
  372. return linearlogic.VariableExpression(name)
  373. else:
  374. return linearlogic.ConstantExpression(name)
  375. else:
  376. return linearlogic.ImpExpression(
  377. self.initialize_labels(expr.antecedent, node, depgraph, unique_index),
  378. self.initialize_labels(expr.consequent, node, depgraph, unique_index),
  379. )
  380. def find_label_name(self, name, node, depgraph, unique_index):
  381. try:
  382. dot = name.index('.')
  383. before_dot = name[:dot]
  384. after_dot = name[dot + 1 :]
  385. if before_dot == 'super':
  386. return self.find_label_name(
  387. after_dot, depgraph.nodes[node['head']], depgraph, unique_index
  388. )
  389. else:
  390. return self.find_label_name(
  391. after_dot,
  392. self.lookup_unique(before_dot, node, depgraph),
  393. depgraph,
  394. unique_index,
  395. )
  396. except ValueError:
  397. lbl = self.get_label(node)
  398. if name == 'f':
  399. return lbl
  400. elif name == 'v':
  401. return '%sv' % lbl
  402. elif name == 'r':
  403. return '%sr' % lbl
  404. elif name == 'super':
  405. return self.get_label(depgraph.nodes[node['head']])
  406. elif name == 'var':
  407. return '%s%s' % (lbl.upper(), unique_index)
  408. elif name == 'a':
  409. return self.get_label(self.lookup_unique('conja', node, depgraph))
  410. elif name == 'b':
  411. return self.get_label(self.lookup_unique('conjb', node, depgraph))
  412. else:
  413. return self.get_label(self.lookup_unique(name, node, depgraph))
  414. def get_label(self, node):
  415. """
  416. Pick an alphabetic character as identifier for an entity in the model.
  417. :param value: where to index into the list of characters
  418. :type value: int
  419. """
  420. value = node['address']
  421. letter = [
  422. 'f',
  423. 'g',
  424. 'h',
  425. 'i',
  426. 'j',
  427. 'k',
  428. 'l',
  429. 'm',
  430. 'n',
  431. 'o',
  432. 'p',
  433. 'q',
  434. 'r',
  435. 's',
  436. 't',
  437. 'u',
  438. 'v',
  439. 'w',
  440. 'x',
  441. 'y',
  442. 'z',
  443. 'a',
  444. 'b',
  445. 'c',
  446. 'd',
  447. 'e',
  448. ][value - 1]
  449. num = int(value) // 26
  450. if num > 0:
  451. return letter + str(num)
  452. else:
  453. return letter
  454. def lookup_unique(self, rel, node, depgraph):
  455. """
  456. Lookup 'key'. There should be exactly one item in the associated relation.
  457. """
  458. deps = [
  459. depgraph.nodes[dep]
  460. for dep in chain(*node['deps'].values())
  461. if depgraph.nodes[dep]['rel'].lower() == rel.lower()
  462. ]
  463. if len(deps) == 0:
  464. raise KeyError("'%s' doesn't contain a feature '%s'" % (node['word'], rel))
  465. elif len(deps) > 1:
  466. raise KeyError(
  467. "'%s' should only have one feature '%s'" % (node['word'], rel)
  468. )
  469. else:
  470. return deps[0]
  471. def get_GlueFormula_factory(self):
  472. return GlueFormula
  473. class Glue(object):
  474. def __init__(
  475. self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
  476. ):
  477. self.verbose = verbose
  478. self.remove_duplicates = remove_duplicates
  479. self.depparser = depparser
  480. from nltk import Prover9
  481. self.prover = Prover9()
  482. if semtype_file:
  483. self.semtype_file = semtype_file
  484. else:
  485. self.semtype_file = os.path.join(
  486. 'grammars', 'sample_grammars', 'glue.semtype'
  487. )
  488. def train_depparser(self, depgraphs=None):
  489. if depgraphs:
  490. self.depparser.train(depgraphs)
  491. else:
  492. self.depparser.train_from_file(
  493. nltk.data.find(
  494. os.path.join('grammars', 'sample_grammars', 'glue_train.conll')
  495. )
  496. )
  497. def parse_to_meaning(self, sentence):
  498. readings = []
  499. for agenda in self.parse_to_compiled(sentence):
  500. readings.extend(self.get_readings(agenda))
  501. return readings
  502. def get_readings(self, agenda):
  503. readings = []
  504. agenda_length = len(agenda)
  505. atomics = dict()
  506. nonatomics = dict()
  507. while agenda: # is not empty
  508. cur = agenda.pop()
  509. glue_simp = cur.glue.simplify()
  510. if isinstance(
  511. glue_simp, linearlogic.ImpExpression
  512. ): # if cur.glue is non-atomic
  513. for key in atomics:
  514. try:
  515. if isinstance(cur.glue, linearlogic.ApplicationExpression):
  516. bindings = cur.glue.bindings
  517. else:
  518. bindings = linearlogic.BindingDict()
  519. glue_simp.antecedent.unify(key, bindings)
  520. for atomic in atomics[key]:
  521. if not (
  522. cur.indices & atomic.indices
  523. ): # if the sets of indices are disjoint
  524. try:
  525. agenda.append(cur.applyto(atomic))
  526. except linearlogic.LinearLogicApplicationException:
  527. pass
  528. except linearlogic.UnificationException:
  529. pass
  530. try:
  531. nonatomics[glue_simp.antecedent].append(cur)
  532. except KeyError:
  533. nonatomics[glue_simp.antecedent] = [cur]
  534. else: # else cur.glue is atomic
  535. for key in nonatomics:
  536. for nonatomic in nonatomics[key]:
  537. try:
  538. if isinstance(
  539. nonatomic.glue, linearlogic.ApplicationExpression
  540. ):
  541. bindings = nonatomic.glue.bindings
  542. else:
  543. bindings = linearlogic.BindingDict()
  544. glue_simp.unify(key, bindings)
  545. if not (
  546. cur.indices & nonatomic.indices
  547. ): # if the sets of indices are disjoint
  548. try:
  549. agenda.append(nonatomic.applyto(cur))
  550. except linearlogic.LinearLogicApplicationException:
  551. pass
  552. except linearlogic.UnificationException:
  553. pass
  554. try:
  555. atomics[glue_simp].append(cur)
  556. except KeyError:
  557. atomics[glue_simp] = [cur]
  558. for entry in atomics:
  559. for gf in atomics[entry]:
  560. if len(gf.indices) == agenda_length:
  561. self._add_to_reading_list(gf, readings)
  562. for entry in nonatomics:
  563. for gf in nonatomics[entry]:
  564. if len(gf.indices) == agenda_length:
  565. self._add_to_reading_list(gf, readings)
  566. return readings
  567. def _add_to_reading_list(self, glueformula, reading_list):
  568. add_reading = True
  569. if self.remove_duplicates:
  570. for reading in reading_list:
  571. try:
  572. if reading.equiv(glueformula.meaning, self.prover):
  573. add_reading = False
  574. break
  575. except Exception as e:
  576. # if there is an exception, the syntax of the formula
  577. # may not be understandable by the prover, so don't
  578. # throw out the reading.
  579. print('Error when checking logical equality of statements', e)
  580. if add_reading:
  581. reading_list.append(glueformula.meaning)
  582. def parse_to_compiled(self, sentence):
  583. gfls = [self.depgraph_to_glue(dg) for dg in self.dep_parse(sentence)]
  584. return [self.gfl_to_compiled(gfl) for gfl in gfls]
  585. def dep_parse(self, sentence):
  586. """
  587. Return a dependency graph for the sentence.
  588. :param sentence: the sentence to be parsed
  589. :type sentence: list(str)
  590. :rtype: DependencyGraph
  591. """
  592. # Lazy-initialize the depparser
  593. if self.depparser is None:
  594. from nltk.parse import MaltParser
  595. self.depparser = MaltParser(tagger=self.get_pos_tagger())
  596. if not self.depparser._trained:
  597. self.train_depparser()
  598. return self.depparser.parse(sentence, verbose=self.verbose)
  599. def depgraph_to_glue(self, depgraph):
  600. return self.get_glue_dict().to_glueformula_list(depgraph)
  601. def get_glue_dict(self):
  602. return GlueDict(self.semtype_file)
  603. def gfl_to_compiled(self, gfl):
  604. index_counter = Counter()
  605. return_list = []
  606. for gf in gfl:
  607. return_list.extend(gf.compile(index_counter))
  608. if self.verbose:
  609. print('Compiled Glue Premises:')
  610. for cgf in return_list:
  611. print(cgf)
  612. return return_list
  613. def get_pos_tagger(self):
  614. from nltk.corpus import brown
  615. regexp_tagger = RegexpTagger(
  616. [
  617. (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers
  618. (r'(The|the|A|a|An|an)$', 'AT'), # articles
  619. (r'.*able$', 'JJ'), # adjectives
  620. (r'.*ness$', 'NN'), # nouns formed from adjectives
  621. (r'.*ly$', 'RB'), # adverbs
  622. (r'.*s$', 'NNS'), # plural nouns
  623. (r'.*ing$', 'VBG'), # gerunds
  624. (r'.*ed$', 'VBD'), # past tense verbs
  625. (r'.*', 'NN'), # nouns (default)
  626. ]
  627. )
  628. brown_train = brown.tagged_sents(categories='news')
  629. unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger)
  630. bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger)
  631. trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger)
  632. # Override particular words
  633. main_tagger = RegexpTagger(
  634. [(r'(A|a|An|an)$', 'ex_quant'), (r'(Every|every|All|all)$', 'univ_quant')],
  635. backoff=trigram_tagger,
  636. )
  637. return main_tagger
  638. class DrtGlueFormula(GlueFormula):
  639. def __init__(self, meaning, glue, indices=None):
  640. if not indices:
  641. indices = set()
  642. if isinstance(meaning, string_types):
  643. self.meaning = drt.DrtExpression.fromstring(meaning)
  644. elif isinstance(meaning, drt.DrtExpression):
  645. self.meaning = meaning
  646. else:
  647. raise RuntimeError(
  648. 'Meaning term neither string or expression: %s, %s'
  649. % (meaning, meaning.__class__)
  650. )
  651. if isinstance(glue, string_types):
  652. self.glue = linearlogic.LinearLogicParser().parse(glue)
  653. elif isinstance(glue, linearlogic.Expression):
  654. self.glue = glue
  655. else:
  656. raise RuntimeError(
  657. 'Glue term neither string or expression: %s, %s'
  658. % (glue, glue.__class__)
  659. )
  660. self.indices = indices
  661. def make_VariableExpression(self, name):
  662. return drt.DrtVariableExpression(name)
  663. def make_LambdaExpression(self, variable, term):
  664. return drt.DrtLambdaExpression(variable, term)
  665. class DrtGlueDict(GlueDict):
  666. def get_GlueFormula_factory(self):
  667. return DrtGlueFormula
  668. class DrtGlue(Glue):
  669. def __init__(
  670. self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
  671. ):
  672. if not semtype_file:
  673. semtype_file = os.path.join(
  674. 'grammars', 'sample_grammars', 'drt_glue.semtype'
  675. )
  676. Glue.__init__(self, semtype_file, remove_duplicates, depparser, verbose)
  677. def get_glue_dict(self):
  678. return DrtGlueDict(self.semtype_file)
  679. def demo(show_example=-1):
  680. from nltk.parse import MaltParser
  681. examples = [
  682. 'David sees Mary',
  683. 'David eats a sandwich',
  684. 'every man chases a dog',
  685. 'every man believes a dog sleeps',
  686. 'John gives David a sandwich',
  687. 'John chases himself',
  688. ]
  689. # 'John persuades David to order a pizza',
  690. # 'John tries to go',
  691. # 'John tries to find a unicorn',
  692. # 'John seems to vanish',
  693. # 'a unicorn seems to approach',
  694. # 'every big cat leaves',
  695. # 'every gray cat leaves',
  696. # 'every big gray cat leaves',
  697. # 'a former senator leaves',
  698. print('============== DEMO ==============')
  699. tagger = RegexpTagger(
  700. [
  701. ('^(David|Mary|John)$', 'NNP'),
  702. (
  703. '^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$',
  704. 'VB',
  705. ),
  706. ('^(go|order|vanish|find|approach)$', 'VB'),
  707. ('^(a)$', 'ex_quant'),
  708. ('^(every)$', 'univ_quant'),
  709. ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'),
  710. ('^(big|gray|former)$', 'JJ'),
  711. ('^(him|himself)$', 'PRP'),
  712. ]
  713. )
  714. depparser = MaltParser(tagger=tagger)
  715. glue = Glue(depparser=depparser, verbose=False)
  716. for (i, sentence) in enumerate(examples):
  717. if i == show_example or show_example == -1:
  718. print('[[[Example %s]]] %s' % (i, sentence))
  719. for reading in glue.parse_to_meaning(sentence.split()):
  720. print(reading.simplify())
  721. print('')
  722. if __name__ == '__main__':
  723. demo()