123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- # Natural Language Toolkit: Lexical Functional Grammar
- #
- # Author: Dan Garrette <dhgarrette@gmail.com>
- #
- # Copyright (C) 2001-2019 NLTK Project
- # URL: <http://nltk.org/>
- # For license information, see LICENSE.TXT
- from __future__ import print_function, division, unicode_literals
- from itertools import chain
- from nltk.internals import Counter
- from nltk.compat import python_2_unicode_compatible
- @python_2_unicode_compatible
- class FStructure(dict):
- def safeappend(self, key, item):
- """
- Append 'item' to the list at 'key'. If no list exists for 'key', then
- construct one.
- """
- if key not in self:
- self[key] = []
- self[key].append(item)
- def __setitem__(self, key, value):
- dict.__setitem__(self, key.lower(), value)
- def __getitem__(self, key):
- return dict.__getitem__(self, key.lower())
- def __contains__(self, key):
- return dict.__contains__(self, key.lower())
- def to_glueformula_list(self, glue_dict):
- depgraph = self.to_depgraph()
- return glue_dict.to_glueformula_list(depgraph)
- def to_depgraph(self, rel=None):
- from nltk.parse.dependencygraph import DependencyGraph
- depgraph = DependencyGraph()
- nodes = depgraph.nodes
- self._to_depgraph(nodes, 0, 'ROOT')
- # Add all the dependencies for all the nodes
- for address, node in nodes.items():
- for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
- if n2['head'] == address:
- relation = n2['rel']
- node['deps'].setdefault(relation, [])
- node['deps'][relation].append(n2['address'])
- depgraph.root = nodes[1]
- return depgraph
- def _to_depgraph(self, nodes, head, rel):
- index = len(nodes)
- nodes[index].update(
- {
- 'address': index,
- 'word': self.pred[0],
- 'tag': self.pred[1],
- 'head': head,
- 'rel': rel,
- }
- )
- for feature in sorted(self):
- for item in sorted(self[feature]):
- if isinstance(item, FStructure):
- item._to_depgraph(nodes, index, feature)
- elif isinstance(item, tuple):
- new_index = len(nodes)
- nodes[new_index].update(
- {
- 'address': new_index,
- 'word': item[0],
- 'tag': item[1],
- 'head': index,
- 'rel': feature,
- }
- )
- elif isinstance(item, list):
- for n in item:
- n._to_depgraph(nodes, index, feature)
- else:
- raise Exception(
- 'feature %s is not an FStruct, a list, or a tuple' % feature
- )
- @staticmethod
- def read_depgraph(depgraph):
- return FStructure._read_depgraph(depgraph.root, depgraph)
- @staticmethod
- def _read_depgraph(node, depgraph, label_counter=None, parent=None):
- if not label_counter:
- label_counter = Counter()
- if node['rel'].lower() in ['spec', 'punct']:
- # the value of a 'spec' entry is a word, not an FStructure
- return (node['word'], node['tag'])
- else:
- fstruct = FStructure()
- fstruct.pred = None
- fstruct.label = FStructure._make_label(label_counter.get())
- fstruct.parent = parent
- word, tag = node['word'], node['tag']
- if tag[:2] == 'VB':
- if tag[2:3] == 'D':
- fstruct.safeappend('tense', ('PAST', 'tense'))
- fstruct.pred = (word, tag[:2])
- if not fstruct.pred:
- fstruct.pred = (word, tag)
- children = [depgraph.nodes[idx] for idx in chain(*node['deps'].values())]
- for child in children:
- fstruct.safeappend(
- child['rel'],
- FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
- )
- return fstruct
- @staticmethod
- def _make_label(value):
- """
- Pick an alphabetic character as identifier for an entity in the model.
- :param value: where to index into the list of characters
- :type value: int
- """
- letter = [
- 'f',
- 'g',
- 'h',
- 'i',
- 'j',
- 'k',
- 'l',
- 'm',
- 'n',
- 'o',
- 'p',
- 'q',
- 'r',
- 's',
- 't',
- 'u',
- 'v',
- 'w',
- 'x',
- 'y',
- 'z',
- 'a',
- 'b',
- 'c',
- 'd',
- 'e',
- ][value - 1]
- num = int(value) // 26
- if num > 0:
- return letter + str(num)
- else:
- return letter
- def __repr__(self):
- return self.__unicode__().replace('\n', '')
- def __str__(self):
- return self.pretty_format()
- def pretty_format(self, indent=3):
- try:
- accum = '%s:[' % self.label
- except NameError:
- accum = '['
- try:
- accum += 'pred \'%s\'' % (self.pred[0])
- except NameError:
- pass
- for feature in sorted(self):
- for item in self[feature]:
- if isinstance(item, FStructure):
- next_indent = indent + len(feature) + 3 + len(self.label)
- accum += '\n%s%s %s' % (
- ' ' * (indent),
- feature,
- item.pretty_format(next_indent),
- )
- elif isinstance(item, tuple):
- accum += '\n%s%s \'%s\'' % (' ' * (indent), feature, item[0])
- elif isinstance(item, list):
- accum += '\n%s%s {%s}' % (
- ' ' * (indent),
- feature,
- ('\n%s' % (' ' * (indent + len(feature) + 2))).join(item),
- )
- else: # ERROR
- raise Exception(
- 'feature %s is not an FStruct, a list, or a tuple' % feature
- )
- return accum + ']'
- def demo_read_depgraph():
- from nltk.parse.dependencygraph import DependencyGraph
- dg1 = DependencyGraph(
- """\
- Esso NNP 2 SUB
- said VBD 0 ROOT
- the DT 5 NMOD
- Whiting NNP 5 NMOD
- field NN 6 SUB
- started VBD 2 VMOD
- production NN 6 OBJ
- Tuesday NNP 6 VMOD
- """
- )
- dg2 = DependencyGraph(
- """\
- John NNP 2 SUB
- sees VBP 0 ROOT
- Mary NNP 2 OBJ
- """
- )
- dg3 = DependencyGraph(
- """\
- a DT 2 SPEC
- man NN 3 SUBJ
- walks VB 0 ROOT
- """
- )
- dg4 = DependencyGraph(
- """\
- every DT 2 SPEC
- girl NN 3 SUBJ
- chases VB 0 ROOT
- a DT 5 SPEC
- dog NN 3 OBJ
- """
- )
- depgraphs = [dg1, dg2, dg3, dg4]
- for dg in depgraphs:
- print(FStructure.read_depgraph(dg))
- if __name__ == '__main__':
- demo_read_depgraph()
|