123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294 |
- .. Copyright (C) 2001-2019 NLTK Project
- .. For license information, see LICENSE.TXT
- =======
- Metrics
- =======
- The `nltk.metrics` package provides a variety of *evaluation measures*
- which can be used for a wide variety of NLP tasks.
- >>> from __future__ import print_function
- >>> from nltk.metrics import *
- ------------------
- Standard IR Scores
- ------------------
- We can use standard scores from information retrieval to test the
- performance of taggers, chunkers, etc.
- >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
- >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split()
- >>> print(accuracy(reference, test))
- 0.8
- The following measures apply to sets:
- >>> reference_set = set(reference)
- >>> test_set = set(test)
- >>> precision(reference_set, test_set)
- 1.0
- >>> print(recall(reference_set, test_set))
- 0.8
- >>> print(f_measure(reference_set, test_set))
- 0.88888888888...
- Measuring the likelihood of the data, given probability distributions:
- >>> from nltk import FreqDist, MLEProbDist
- >>> pdist1 = MLEProbDist(FreqDist("aldjfalskfjaldsf"))
- >>> pdist2 = MLEProbDist(FreqDist("aldjfalssjjlldss"))
- >>> print(log_likelihood(['a', 'd'], [pdist1, pdist2]))
- -2.7075187496...
- ----------------
- Distance Metrics
- ----------------
- String edit distance (Levenshtein):
- >>> edit_distance("rain", "shine")
- 3
- >>> edit_distance_align("shine", "shine")
- [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
- >>> edit_distance_align("rain", "brainy")
- [(0, 0), (1, 1), (1, 2), (2, 3), (3, 4), (4, 5), (4, 6)]
- >>> edit_distance_align("", "brainy")
- [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6)]
- >>> edit_distance_align("", "")
- [(0, 0)]
- Other distance measures:
- >>> s1 = set([1,2,3,4])
- >>> s2 = set([3,4,5])
- >>> binary_distance(s1, s2)
- 1.0
- >>> print(jaccard_distance(s1, s2))
- 0.6
- >>> print(masi_distance(s1, s2))
- 0.868
- ----------------------
- Miscellaneous Measures
- ----------------------
- Rank Correlation works with two dictionaries mapping keys to ranks.
- The dictionaries should have the same set of keys.
- >>> spearman_correlation({'e':1, 't':2, 'a':3}, {'e':1, 'a':2, 't':3})
- 0.5
- Windowdiff uses a sliding window in comparing two segmentations of the same input (e.g. tokenizations, chunkings).
- Segmentations are represented using strings of zeros and ones.
- >>> s1 = "000100000010"
- >>> s2 = "000010000100"
- >>> s3 = "100000010000"
- >>> s4 = "000000000000"
- >>> s5 = "111111111111"
- >>> windowdiff(s1, s1, 3)
- 0.0
- >>> abs(windowdiff(s1, s2, 3) - 0.3) < 1e-6 # windowdiff(s1, s2, 3) == 0.3
- True
- >>> abs(windowdiff(s2, s3, 3) - 0.8) < 1e-6 # windowdiff(s2, s3, 3) == 0.8
- True
- >>> windowdiff(s1, s4, 3)
- 0.5
- >>> windowdiff(s1, s5, 3)
- 1.0
- ----------------
- Confusion Matrix
- ----------------
- >>> reference = 'This is the reference data. Testing 123. aoaeoeoe'
- >>> test = 'Thos iz_the rifirenci data. Testeng 123. aoaeoeoe'
- >>> print(ConfusionMatrix(reference, test))
- | . 1 2 3 T _ a c d e f g h i n o r s t z |
- --+-------------------------------------------+
- |<8>. . . . . 1 . . . . . . . . . . . . . . |
- . | .<2>. . . . . . . . . . . . . . . . . . . |
- 1 | . .<1>. . . . . . . . . . . . . . . . . . |
- 2 | . . .<1>. . . . . . . . . . . . . . . . . |
- 3 | . . . .<1>. . . . . . . . . . . . . . . . |
- T | . . . . .<2>. . . . . . . . . . . . . . . |
- _ | . . . . . .<.>. . . . . . . . . . . . . . |
- a | . . . . . . .<4>. . . . . . . . . . . . . |
- c | . . . . . . . .<1>. . . . . . . . . . . . |
- d | . . . . . . . . .<1>. . . . . . . . . . . |
- e | . . . . . . . . . .<6>. . . 3 . . . . . . |
- f | . . . . . . . . . . .<1>. . . . . . . . . |
- g | . . . . . . . . . . . .<1>. . . . . . . . |
- h | . . . . . . . . . . . . .<2>. . . . . . . |
- i | . . . . . . . . . . 1 . . .<1>. 1 . . . . |
- n | . . . . . . . . . . . . . . .<2>. . . . . |
- o | . . . . . . . . . . . . . . . .<3>. . . . |
- r | . . . . . . . . . . . . . . . . .<2>. . . |
- s | . . . . . . . . . . . . . . . . . .<2>. 1 |
- t | . . . . . . . . . . . . . . . . . . .<3>. |
- z | . . . . . . . . . . . . . . . . . . . .<.>|
- --+-------------------------------------------+
- (row = reference; col = test)
- <BLANKLINE>
- >>> cm = ConfusionMatrix(reference, test)
- >>> print(cm.pretty_format(sort_by_count=True))
- | e a i o s t . T h n r 1 2 3 c d f g _ z |
- --+-------------------------------------------+
- |<8>. . . . . . . . . . . . . . . . . . 1 . |
- e | .<6>. 3 . . . . . . . . . . . . . . . . . |
- a | . .<4>. . . . . . . . . . . . . . . . . . |
- i | . 1 .<1>1 . . . . . . . . . . . . . . . . |
- o | . . . .<3>. . . . . . . . . . . . . . . . |
- s | . . . . .<2>. . . . . . . . . . . . . . 1 |
- t | . . . . . .<3>. . . . . . . . . . . . . . |
- . | . . . . . . .<2>. . . . . . . . . . . . . |
- T | . . . . . . . .<2>. . . . . . . . . . . . |
- h | . . . . . . . . .<2>. . . . . . . . . . . |
- n | . . . . . . . . . .<2>. . . . . . . . . . |
- r | . . . . . . . . . . .<2>. . . . . . . . . |
- 1 | . . . . . . . . . . . .<1>. . . . . . . . |
- 2 | . . . . . . . . . . . . .<1>. . . . . . . |
- 3 | . . . . . . . . . . . . . .<1>. . . . . . |
- c | . . . . . . . . . . . . . . .<1>. . . . . |
- d | . . . . . . . . . . . . . . . .<1>. . . . |
- f | . . . . . . . . . . . . . . . . .<1>. . . |
- g | . . . . . . . . . . . . . . . . . .<1>. . |
- _ | . . . . . . . . . . . . . . . . . . .<.>. |
- z | . . . . . . . . . . . . . . . . . . . .<.>|
- --+-------------------------------------------+
- (row = reference; col = test)
- <BLANKLINE>
- >>> print(cm.pretty_format(sort_by_count=True, truncate=10))
- | e a i o s t . T h |
- --+---------------------+
- |<8>. . . . . . . . . |
- e | .<6>. 3 . . . . . . |
- a | . .<4>. . . . . . . |
- i | . 1 .<1>1 . . . . . |
- o | . . . .<3>. . . . . |
- s | . . . . .<2>. . . . |
- t | . . . . . .<3>. . . |
- . | . . . . . . .<2>. . |
- T | . . . . . . . .<2>. |
- h | . . . . . . . . .<2>|
- --+---------------------+
- (row = reference; col = test)
- <BLANKLINE>
- >>> print(cm.pretty_format(sort_by_count=True, truncate=10, values_in_chart=False))
- | 1 |
- | 1 2 3 4 5 6 7 8 9 0 |
- ---+---------------------+
- 1 |<8>. . . . . . . . . |
- 2 | .<6>. 3 . . . . . . |
- 3 | . .<4>. . . . . . . |
- 4 | . 1 .<1>1 . . . . . |
- 5 | . . . .<3>. . . . . |
- 6 | . . . . .<2>. . . . |
- 7 | . . . . . .<3>. . . |
- 8 | . . . . . . .<2>. . |
- 9 | . . . . . . . .<2>. |
- 10 | . . . . . . . . .<2>|
- ---+---------------------+
- (row = reference; col = test)
- Value key:
- 1:
- 2: e
- 3: a
- 4: i
- 5: o
- 6: s
- 7: t
- 8: .
- 9: T
- 10: h
- <BLANKLINE>
- --------------------
- Association measures
- --------------------
- These measures are useful to determine whether the coocurrence of two random
- events is meaningful. They are used, for instance, to distinguish collocations
- from other pairs of adjacent words.
- We bring some examples of bigram association calculations from Manning and
- Schutze's SNLP, 2nd Ed. chapter 5.
- >>> n_new_companies, n_new, n_companies, N = 8, 15828, 4675, 14307668
- >>> bam = BigramAssocMeasures
- >>> bam.raw_freq(20, (42, 20), N) == 20. / N
- True
- >>> bam.student_t(n_new_companies, (n_new, n_companies), N)
- 0.999...
- >>> bam.chi_sq(n_new_companies, (n_new, n_companies), N)
- 1.54...
- >>> bam.likelihood_ratio(150, (12593, 932), N)
- 1291...
- For other associations, we ensure the ordering of the measures:
- >>> bam.mi_like(20, (42, 20), N) > bam.mi_like(20, (41, 27), N)
- True
- >>> bam.pmi(20, (42, 20), N) > bam.pmi(20, (41, 27), N)
- True
- >>> bam.phi_sq(20, (42, 20), N) > bam.phi_sq(20, (41, 27), N)
- True
- >>> bam.poisson_stirling(20, (42, 20), N) > bam.poisson_stirling(20, (41, 27), N)
- True
- >>> bam.jaccard(20, (42, 20), N) > bam.jaccard(20, (41, 27), N)
- True
- >>> bam.dice(20, (42, 20), N) > bam.dice(20, (41, 27), N)
- True
- >>> bam.fisher(20, (42, 20), N) > bam.fisher(20, (41, 27), N) # doctest: +SKIP
- False
- For trigrams, we have to provide more count information:
- >>> n_w1_w2_w3 = 20
- >>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40
- >>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3)
- >>> n_w1, n_w2, n_w3 = 100, 200, 300
- >>> uni_counts = (n_w1, n_w2, n_w3)
- >>> N = 14307668
- >>> tam = TrigramAssocMeasures
- >>> tam.raw_freq(n_w1_w2_w3, pair_counts, uni_counts, N) == 1. * n_w1_w2_w3 / N
- True
- >>> uni_counts2 = (n_w1, n_w2, 100)
- >>> tam.student_t(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.student_t(n_w1_w2_w3, pair_counts, uni_counts, N)
- True
- >>> tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts, N)
- True
- >>> tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts, N)
- True
- >>> tam.pmi(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.pmi(n_w1_w2_w3, pair_counts, uni_counts, N)
- True
- >>> tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts, N)
- True
- >>> tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts, N)
- True
- >>> tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts, N)
- True
- For fourgrams, we have to provide more count information:
- >>> n_w1_w2_w3_w4 = 5
- >>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40
- >>> n_w1_w2_w3, n_w2_w3_w4 = 20, 10
- >>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3)
- >>> triplet_counts = (n_w1_w2_w3, n_w2_w3_w4)
- >>> n_w1, n_w2, n_w3, n_w4 = 100, 200, 300, 400
- >>> uni_counts = (n_w1, n_w2, n_w3, n_w4)
- >>> N = 14307668
- >>> qam = QuadgramAssocMeasures
- >>> qam.raw_freq(n_w1_w2_w3_w4, pair_counts, triplet_counts, uni_counts, N) == 1. * n_w1_w2_w3_w4 / N
- True
|