1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- # Natural Language Toolkit: Minimal Sets
- #
- # Copyright (C) 2001-2019 NLTK Project
- # Author: Steven Bird <stevenbird1@gmail.com>
- # URL: <http://nltk.org>
- # For license information, see LICENSE.TXT
- from collections import defaultdict
- class MinimalSet(object):
- """
- Find contexts where more than one possible target value can
- appear. E.g. if targets are word-initial letters, and contexts
- are the remainders of words, then we would like to find cases like
- "fat" vs "cat", and "training" vs "draining". If targets are
- parts-of-speech and contexts are words, then we would like to find
- cases like wind (noun) 'air in rapid motion', vs wind (verb)
- 'coil, wrap'.
- """
- def __init__(self, parameters=None):
- """
- Create a new minimal set.
- :param parameters: The (context, target, display) tuples for the item
- :type parameters: list(tuple(str, str, str))
- """
- self._targets = set() # the contrastive information
- self._contexts = set() # what we are controlling for
- self._seen = defaultdict(set) # to record what we have seen
- self._displays = {} # what we will display
- if parameters:
- for context, target, display in parameters:
- self.add(context, target, display)
- def add(self, context, target, display):
- """
- Add a new item to the minimal set, having the specified
- context, target, and display form.
- :param context: The context in which the item of interest appears
- :type context: str
- :param target: The item of interest
- :type target: str
- :param display: The information to be reported for each item
- :type display: str
- """
- # Store the set of targets that occurred in this context
- self._seen[context].add(target)
- # Keep track of which contexts and targets we have seen
- self._contexts.add(context)
- self._targets.add(target)
- # For a given context and target, store the display form
- self._displays[(context, target)] = display
- def contexts(self, minimum=2):
- """
- Determine which contexts occurred with enough distinct targets.
- :param minimum: the minimum number of distinct target forms
- :type minimum: int
- :rtype list
- """
- return [c for c in self._contexts if len(self._seen[c]) >= minimum]
- def display(self, context, target, default=""):
- if (context, target) in self._displays:
- return self._displays[(context, target)]
- else:
- return default
- def display_all(self, context):
- result = []
- for target in self._targets:
- x = self.display(context, target)
- if x:
- result.append(x)
- return result
- def targets(self):
- return self._targets
|