util.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. # Natural Language Toolkit: Semantic Interpretation
  2. #
  3. # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  4. #
  5. # Copyright (C) 2001-2019 NLTK Project
  6. # URL: <http://nltk.org/>
  7. # For license information, see LICENSE.TXT
  8. """
  9. Utility functions for batch-processing sentences: parsing and
  10. extraction of the semantic representation of the root node of the the
  11. syntax tree, followed by evaluation of the semantic representation in
  12. a first-order model.
  13. """
  14. from __future__ import print_function, unicode_literals
  15. import codecs
  16. from nltk.sem import evaluate
  17. ##############################################################
  18. ## Utility functions for connecting parse output to semantics
  19. ##############################################################
  20. def parse_sents(inputs, grammar, trace=0):
  21. """
  22. Convert input sentences into syntactic trees.
  23. :param inputs: sentences to be parsed
  24. :type inputs: list(str)
  25. :param grammar: ``FeatureGrammar`` or name of feature-based grammar
  26. :type grammar: nltk.grammar.FeatureGrammar
  27. :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
  28. :return: a mapping from input sentences to a list of ``Tree``s
  29. """
  30. # put imports here to avoid circult dependencies
  31. from nltk.grammar import FeatureGrammar
  32. from nltk.parse import FeatureChartParser, load_parser
  33. if isinstance(grammar, FeatureGrammar):
  34. cp = FeatureChartParser(grammar)
  35. else:
  36. cp = load_parser(grammar, trace=trace)
  37. parses = []
  38. for sent in inputs:
  39. tokens = sent.split() # use a tokenizer?
  40. syntrees = list(cp.parse(tokens))
  41. parses.append(syntrees)
  42. return parses
  43. def root_semrep(syntree, semkey='SEM'):
  44. """
  45. Find the semantic representation at the root of a tree.
  46. :param syntree: a parse ``Tree``
  47. :param semkey: the feature label to use for the root semantics in the tree
  48. :return: the semantic representation at the root of a ``Tree``
  49. :rtype: sem.Expression
  50. """
  51. from nltk.grammar import FeatStructNonterminal
  52. node = syntree.label()
  53. assert isinstance(node, FeatStructNonterminal)
  54. try:
  55. return node[semkey]
  56. except KeyError:
  57. print(node, end=' ')
  58. print("has no specification for the feature %s" % semkey)
  59. raise
  60. def interpret_sents(inputs, grammar, semkey='SEM', trace=0):
  61. """
  62. Add the semantic representation to each syntactic parse tree
  63. of each input sentence.
  64. :param inputs: a list of sentences
  65. :type inputs: list(str)
  66. :param grammar: ``FeatureGrammar`` or name of feature-based grammar
  67. :type grammar: nltk.grammar.FeatureGrammar
  68. :return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
  69. :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))
  70. """
  71. return [
  72. [(syn, root_semrep(syn, semkey)) for syn in syntrees]
  73. for syntrees in parse_sents(inputs, grammar, trace=trace)
  74. ]
  75. def evaluate_sents(inputs, grammar, model, assignment, trace=0):
  76. """
  77. Add the truth-in-a-model value to each semantic representation
  78. for each syntactic parse of each input sentences.
  79. :param inputs: a list of sentences
  80. :type inputs: list(str)
  81. :param grammar: ``FeatureGrammar`` or name of feature-based grammar
  82. :type grammar: nltk.grammar.FeatureGrammar
  83. :return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
  84. :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))
  85. """
  86. return [
  87. [
  88. (syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
  89. for (syn, sem) in interpretations
  90. ]
  91. for interpretations in interpret_sents(inputs, grammar)
  92. ]
  93. def demo_model0():
  94. global m0, g0
  95. # Initialize a valuation of non-logical constants."""
  96. v = [
  97. ('john', 'b1'),
  98. ('mary', 'g1'),
  99. ('suzie', 'g2'),
  100. ('fido', 'd1'),
  101. ('tess', 'd2'),
  102. ('noosa', 'n'),
  103. ('girl', set(['g1', 'g2'])),
  104. ('boy', set(['b1', 'b2'])),
  105. ('dog', set(['d1', 'd2'])),
  106. ('bark', set(['d1', 'd2'])),
  107. ('walk', set(['b1', 'g2', 'd1'])),
  108. ('chase', set([('b1', 'g1'), ('b2', 'g1'), ('g1', 'd1'), ('g2', 'd2')])),
  109. (
  110. 'see',
  111. set([('b1', 'g1'), ('b2', 'd2'), ('g1', 'b1'), ('d2', 'b1'), ('g2', 'n')]),
  112. ),
  113. ('in', set([('b1', 'n'), ('b2', 'n'), ('d2', 'n')])),
  114. ('with', set([('b1', 'g1'), ('g1', 'b1'), ('d1', 'b1'), ('b1', 'd1')])),
  115. ]
  116. # Read in the data from ``v``
  117. val = evaluate.Valuation(v)
  118. # Bind ``dom`` to the ``domain`` property of ``val``
  119. dom = val.domain
  120. # Initialize a model with parameters ``dom`` and ``val``.
  121. m0 = evaluate.Model(dom, val)
  122. # Initialize a variable assignment with parameter ``dom``
  123. g0 = evaluate.Assignment(dom)
  124. def read_sents(filename, encoding='utf8'):
  125. with codecs.open(filename, 'r', encoding) as fp:
  126. sents = [l.rstrip() for l in fp]
  127. # get rid of blank lines
  128. sents = [l for l in sents if len(l) > 0]
  129. sents = [l for l in sents if not l[0] == '#']
  130. return sents
  131. def demo_legacy_grammar():
  132. """
  133. Check that interpret_sents() is compatible with legacy grammars that use
  134. a lowercase 'sem' feature.
  135. Define 'test.fcfg' to be the following
  136. """
  137. from nltk.grammar import FeatureGrammar
  138. g = FeatureGrammar.fromstring(
  139. """
  140. % start S
  141. S[sem=<hello>] -> 'hello'
  142. """
  143. )
  144. print("Reading grammar: %s" % g)
  145. print("*" * 20)
  146. for reading in interpret_sents(['hello'], g, semkey='sem'):
  147. syn, sem = reading[0]
  148. print()
  149. print("output: ", sem)
  150. def demo():
  151. import sys
  152. from optparse import OptionParser
  153. description = """
  154. Parse and evaluate some sentences.
  155. """
  156. opts = OptionParser(description=description)
  157. opts.set_defaults(
  158. evaluate=True,
  159. beta=True,
  160. syntrace=0,
  161. semtrace=0,
  162. demo='default',
  163. grammar='',
  164. sentences='',
  165. )
  166. opts.add_option(
  167. "-d",
  168. "--demo",
  169. dest="demo",
  170. help="choose demo D; omit this for the default demo, or specify 'chat80'",
  171. metavar="D",
  172. )
  173. opts.add_option(
  174. "-g", "--gram", dest="grammar", help="read in grammar G", metavar="G"
  175. )
  176. opts.add_option(
  177. "-m",
  178. "--model",
  179. dest="model",
  180. help="import model M (omit '.py' suffix)",
  181. metavar="M",
  182. )
  183. opts.add_option(
  184. "-s",
  185. "--sentences",
  186. dest="sentences",
  187. help="read in a file of test sentences S",
  188. metavar="S",
  189. )
  190. opts.add_option(
  191. "-e",
  192. "--no-eval",
  193. action="store_false",
  194. dest="evaluate",
  195. help="just do a syntactic analysis",
  196. )
  197. opts.add_option(
  198. "-b",
  199. "--no-beta-reduction",
  200. action="store_false",
  201. dest="beta",
  202. help="don't carry out beta-reduction",
  203. )
  204. opts.add_option(
  205. "-t",
  206. "--syntrace",
  207. action="count",
  208. dest="syntrace",
  209. help="set syntactic tracing on; requires '-e' option",
  210. )
  211. opts.add_option(
  212. "-T",
  213. "--semtrace",
  214. action="count",
  215. dest="semtrace",
  216. help="set semantic tracing on",
  217. )
  218. (options, args) = opts.parse_args()
  219. SPACER = '-' * 30
  220. demo_model0()
  221. sents = [
  222. 'Fido sees a boy with Mary',
  223. 'John sees Mary',
  224. 'every girl chases a dog',
  225. 'every boy chases a girl',
  226. 'John walks with a girl in Noosa',
  227. 'who walks',
  228. ]
  229. gramfile = 'grammars/sample_grammars/sem2.fcfg'
  230. if options.sentences:
  231. sentsfile = options.sentences
  232. if options.grammar:
  233. gramfile = options.grammar
  234. if options.model:
  235. exec("import %s as model" % options.model)
  236. if sents is None:
  237. sents = read_sents(sentsfile)
  238. # Set model and assignment
  239. model = m0
  240. g = g0
  241. if options.evaluate:
  242. evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
  243. else:
  244. semreps = interpret_sents(sents, gramfile, trace=options.syntrace)
  245. for i, sent in enumerate(sents):
  246. n = 1
  247. print('\nSentence: %s' % sent)
  248. print(SPACER)
  249. if options.evaluate:
  250. for (syntree, semrep, value) in evaluations[i]:
  251. if isinstance(value, dict):
  252. value = set(value.keys())
  253. print('%d: %s' % (n, semrep))
  254. print(value)
  255. n += 1
  256. else:
  257. for (syntree, semrep) in semreps[i]:
  258. print('%d: %s' % (n, semrep))
  259. n += 1
  260. if __name__ == "__main__":
  261. demo()
  262. demo_legacy_grammar()