1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- # Natural Language Toolkit: Clusterer Interfaces
- #
- # Copyright (C) 2001-2019 NLTK Project
- # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
- # Porting: Steven Bird <stevenbird1@gmail.com>
- # URL: <http://nltk.org/>
- # For license information, see LICENSE.TXT
- from abc import ABCMeta, abstractmethod
- from six import add_metaclass
- from nltk.probability import DictionaryProbDist
- @add_metaclass(ABCMeta)
- class ClusterI(object):
- """
- Interface covering basic clustering functionality.
- """
- @abstractmethod
- def cluster(self, vectors, assign_clusters=False):
- """
- Assigns the vectors to clusters, learning the clustering parameters
- from the data. Returns a cluster identifier for each vector.
- """
- @abstractmethod
- def classify(self, token):
- """
- Classifies the token into a cluster, setting the token's CLUSTER
- parameter to that cluster identifier.
- """
- def likelihood(self, vector, label):
- """
- Returns the likelihood (a float) of the token having the
- corresponding cluster.
- """
- if self.classify(vector) == label:
- return 1.0
- else:
- return 0.0
- def classification_probdist(self, vector):
- """
- Classifies the token into a cluster, returning
- a probability distribution over the cluster identifiers.
- """
- likelihoods = {}
- sum = 0.0
- for cluster in self.cluster_names():
- likelihoods[cluster] = self.likelihood(vector, cluster)
- sum += likelihoods[cluster]
- for cluster in self.cluster_names():
- likelihoods[cluster] /= sum
- return DictionaryProbDist(likelihoods)
- @abstractmethod
- def num_clusters(self):
- """
- Returns the number of clusters.
- """
- def cluster_names(self):
- """
- Returns the names of the clusters.
- :rtype: list
- """
- return list(range(self.num_clusters()))
- def cluster_name(self, index):
- """
- Returns the names of the cluster at index.
- """
- return index
|