123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403 |
- # Natural Language Toolkit: Logic
- #
- # Author: Peter Wang
- # Updated by: Dan Garrette <dhgarrette@gmail.com>
- #
- # Copyright (C) 2001-2019 NLTK Project
- # URL: <http://nltk.org>
- # For license information, see LICENSE.TXT
- """
- An implementation of the Hole Semantics model, following Blackburn and Bos,
- Representation and Inference for Natural Language (CSLI, 2005).
- The semantic representations are built by the grammar hole.fcfg.
- This module contains driver code to read in sentences and parse them
- according to a hole semantics grammar.
- After parsing, the semantic representation is in the form of an underspecified
- representation that is not easy to read. We use a "plugging" algorithm to
- convert that representation into first-order logic formulas.
- """
- from __future__ import print_function, unicode_literals
- from functools import reduce
- from six import itervalues
- from nltk import compat
- from nltk.parse import load_parser
- from nltk.sem.skolemize import skolemize
- from nltk.sem.logic import (
- AllExpression,
- AndExpression,
- ApplicationExpression,
- ExistsExpression,
- IffExpression,
- ImpExpression,
- LambdaExpression,
- NegatedExpression,
- OrExpression,
- )
- # Note that in this code there may be multiple types of trees being referred to:
- #
- # 1. parse trees
- # 2. the underspecified representation
- # 3. first-order logic formula trees
- # 4. the search space when plugging (search tree)
- #
- class Constants(object):
- ALL = 'ALL'
- EXISTS = 'EXISTS'
- NOT = 'NOT'
- AND = 'AND'
- OR = 'OR'
- IMP = 'IMP'
- IFF = 'IFF'
- PRED = 'PRED'
- LEQ = 'LEQ'
- HOLE = 'HOLE'
- LABEL = 'LABEL'
- MAP = {
- ALL: lambda v, e: AllExpression(v.variable, e),
- EXISTS: lambda v, e: ExistsExpression(v.variable, e),
- NOT: NegatedExpression,
- AND: AndExpression,
- OR: OrExpression,
- IMP: ImpExpression,
- IFF: IffExpression,
- PRED: ApplicationExpression,
- }
- class HoleSemantics(object):
- """
- This class holds the broken-down components of a hole semantics, i.e. it
- extracts the holes, labels, logic formula fragments and constraints out of
- a big conjunction of such as produced by the hole semantics grammar. It
- then provides some operations on the semantics dealing with holes, labels
- and finding legal ways to plug holes with labels.
- """
- def __init__(self, usr):
- """
- Constructor. `usr' is a ``sem.Expression`` representing an
- Underspecified Representation Structure (USR). A USR has the following
- special predicates:
- ALL(l,v,n),
- EXISTS(l,v,n),
- AND(l,n,n),
- OR(l,n,n),
- IMP(l,n,n),
- IFF(l,n,n),
- PRED(l,v,n,v[,v]*) where the brackets and star indicate zero or more repetitions,
- LEQ(n,n),
- HOLE(n),
- LABEL(n)
- where l is the label of the node described by the predicate, n is either
- a label or a hole, and v is a variable.
- """
- self.holes = set()
- self.labels = set()
- self.fragments = {} # mapping of label -> formula fragment
- self.constraints = set() # set of Constraints
- self._break_down(usr)
- self.top_most_labels = self._find_top_most_labels()
- self.top_hole = self._find_top_hole()
- def is_node(self, x):
- """
- Return true if x is a node (label or hole) in this semantic
- representation.
- """
- return x in (self.labels | self.holes)
- def _break_down(self, usr):
- """
- Extract holes, labels, formula fragments and constraints from the hole
- semantics underspecified representation (USR).
- """
- if isinstance(usr, AndExpression):
- self._break_down(usr.first)
- self._break_down(usr.second)
- elif isinstance(usr, ApplicationExpression):
- func, args = usr.uncurry()
- if func.variable.name == Constants.LEQ:
- self.constraints.add(Constraint(args[0], args[1]))
- elif func.variable.name == Constants.HOLE:
- self.holes.add(args[0])
- elif func.variable.name == Constants.LABEL:
- self.labels.add(args[0])
- else:
- label = args[0]
- assert label not in self.fragments
- self.fragments[label] = (func, args[1:])
- else:
- raise ValueError(usr.label())
- def _find_top_nodes(self, node_list):
- top_nodes = node_list.copy()
- for f in itervalues(self.fragments):
- # the label is the first argument of the predicate
- args = f[1]
- for arg in args:
- if arg in node_list:
- top_nodes.discard(arg)
- return top_nodes
- def _find_top_most_labels(self):
- """
- Return the set of labels which are not referenced directly as part of
- another formula fragment. These will be the top-most labels for the
- subtree that they are part of.
- """
- return self._find_top_nodes(self.labels)
- def _find_top_hole(self):
- """
- Return the hole that will be the top of the formula tree.
- """
- top_holes = self._find_top_nodes(self.holes)
- assert len(top_holes) == 1 # it must be unique
- return top_holes.pop()
- def pluggings(self):
- """
- Calculate and return all the legal pluggings (mappings of labels to
- holes) of this semantics given the constraints.
- """
- record = []
- self._plug_nodes([(self.top_hole, [])], self.top_most_labels, {}, record)
- return record
- def _plug_nodes(self, queue, potential_labels, plug_acc, record):
- """
- Plug the nodes in `queue' with the labels in `potential_labels'.
- Each element of `queue' is a tuple of the node to plug and the list of
- ancestor holes from the root of the graph to that node.
- `potential_labels' is a set of the labels which are still available for
- plugging.
- `plug_acc' is the incomplete mapping of holes to labels made on the
- current branch of the search tree so far.
- `record' is a list of all the complete pluggings that we have found in
- total so far. It is the only parameter that is destructively updated.
- """
- if queue != []:
- (node, ancestors) = queue[0]
- if node in self.holes:
- # The node is a hole, try to plug it.
- self._plug_hole(
- node, ancestors, queue[1:], potential_labels, plug_acc, record
- )
- else:
- assert node in self.labels
- # The node is a label. Replace it in the queue by the holes and
- # labels in the formula fragment named by that label.
- args = self.fragments[node][1]
- head = [(a, ancestors) for a in args if self.is_node(a)]
- self._plug_nodes(head + queue[1:], potential_labels, plug_acc, record)
- else:
- raise Exception('queue empty')
- def _plug_hole(self, hole, ancestors0, queue, potential_labels0, plug_acc0, record):
- """
- Try all possible ways of plugging a single hole.
- See _plug_nodes for the meanings of the parameters.
- """
- # Add the current hole we're trying to plug into the list of ancestors.
- assert hole not in ancestors0
- ancestors = [hole] + ancestors0
- # Try each potential label in this hole in turn.
- for l in potential_labels0:
- # Is the label valid in this hole?
- if self._violates_constraints(l, ancestors):
- continue
- plug_acc = plug_acc0.copy()
- plug_acc[hole] = l
- potential_labels = potential_labels0.copy()
- potential_labels.remove(l)
- if len(potential_labels) == 0:
- # No more potential labels. That must mean all the holes have
- # been filled so we have found a legal plugging so remember it.
- #
- # Note that the queue might not be empty because there might
- # be labels on there that point to formula fragments with
- # no holes in them. _sanity_check_plugging will make sure
- # all holes are filled.
- self._sanity_check_plugging(plug_acc, self.top_hole, [])
- record.append(plug_acc)
- else:
- # Recursively try to fill in the rest of the holes in the
- # queue. The label we just plugged into the hole could have
- # holes of its own so at the end of the queue. Putting it on
- # the end of the queue gives us a breadth-first search, so that
- # all the holes at level i of the formula tree are filled
- # before filling level i+1.
- # A depth-first search would work as well since the trees must
- # be finite but the bookkeeping would be harder.
- self._plug_nodes(
- queue + [(l, ancestors)], potential_labels, plug_acc, record
- )
- def _violates_constraints(self, label, ancestors):
- """
- Return True if the `label' cannot be placed underneath the holes given
- by the set `ancestors' because it would violate the constraints imposed
- on it.
- """
- for c in self.constraints:
- if c.lhs == label:
- if c.rhs not in ancestors:
- return True
- return False
- def _sanity_check_plugging(self, plugging, node, ancestors):
- """
- Make sure that a given plugging is legal. We recursively go through
- each node and make sure that no constraints are violated.
- We also check that all holes have been filled.
- """
- if node in self.holes:
- ancestors = [node] + ancestors
- label = plugging[node]
- else:
- label = node
- assert label in self.labels
- for c in self.constraints:
- if c.lhs == label:
- assert c.rhs in ancestors
- args = self.fragments[label][1]
- for arg in args:
- if self.is_node(arg):
- self._sanity_check_plugging(plugging, arg, [label] + ancestors)
- def formula_tree(self, plugging):
- """
- Return the first-order logic formula tree for this underspecified
- representation using the plugging given.
- """
- return self._formula_tree(plugging, self.top_hole)
- def _formula_tree(self, plugging, node):
- if node in plugging:
- return self._formula_tree(plugging, plugging[node])
- elif node in self.fragments:
- pred, args = self.fragments[node]
- children = [self._formula_tree(plugging, arg) for arg in args]
- return reduce(Constants.MAP[pred.variable.name], children)
- else:
- return node
- @compat.python_2_unicode_compatible
- class Constraint(object):
- """
- This class represents a constraint of the form (L =< N),
- where L is a label and N is a node (a label or a hole).
- """
- def __init__(self, lhs, rhs):
- self.lhs = lhs
- self.rhs = rhs
- def __eq__(self, other):
- if self.__class__ == other.__class__:
- return self.lhs == other.lhs and self.rhs == other.rhs
- else:
- return False
- def __ne__(self, other):
- return not (self == other)
- def __hash__(self):
- return hash(repr(self))
- def __repr__(self):
- return '(%s < %s)' % (self.lhs, self.rhs)
- def hole_readings(sentence, grammar_filename=None, verbose=False):
- if not grammar_filename:
- grammar_filename = 'grammars/sample_grammars/hole.fcfg'
- if verbose:
- print('Reading grammar file', grammar_filename)
- parser = load_parser(grammar_filename)
- # Parse the sentence.
- tokens = sentence.split()
- trees = list(parser.parse(tokens))
- if verbose:
- print('Got %d different parses' % len(trees))
- all_readings = []
- for tree in trees:
- # Get the semantic feature from the top of the parse tree.
- sem = tree.label()['SEM'].simplify()
- # Print the raw semantic representation.
- if verbose:
- print('Raw: ', sem)
- # Skolemize away all quantifiers. All variables become unique.
- while isinstance(sem, LambdaExpression):
- sem = sem.term
- skolemized = skolemize(sem)
- if verbose:
- print('Skolemized:', skolemized)
- # Break the hole semantics representation down into its components
- # i.e. holes, labels, formula fragments and constraints.
- hole_sem = HoleSemantics(skolemized)
- # Maybe show the details of the semantic representation.
- if verbose:
- print('Holes: ', hole_sem.holes)
- print('Labels: ', hole_sem.labels)
- print('Constraints: ', hole_sem.constraints)
- print('Top hole: ', hole_sem.top_hole)
- print('Top labels: ', hole_sem.top_most_labels)
- print('Fragments:')
- for l, f in hole_sem.fragments.items():
- print('\t%s: %s' % (l, f))
- # Find all the possible ways to plug the formulas together.
- pluggings = hole_sem.pluggings()
- # Build FOL formula trees using the pluggings.
- readings = list(map(hole_sem.formula_tree, pluggings))
- # Print out the formulas in a textual format.
- if verbose:
- for i, r in enumerate(readings):
- print()
- print('%d. %s' % (i, r))
- print()
- all_readings.extend(readings)
- return all_readings
- if __name__ == '__main__':
- for r in hole_readings('a dog barks'):
- print(r)
- print()
- for r in hole_readings('every girl chases a dog'):
- print(r)
|