metrics.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # Natural Language Toolkit: Translation metrics
  2. #
  3. # Copyright (C) 2001-2019 NLTK Project
  4. # Author: Will Zhang <wilzzha@gmail.com>
  5. # Guan Gui <ggui@student.unimelb.edu.au>
  6. # Steven Bird <stevenbird1@gmail.com>
  7. # URL: <http://nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. from __future__ import division
  10. def alignment_error_rate(reference, hypothesis, possible=None):
  11. """
  12. Return the Alignment Error Rate (AER) of an alignment
  13. with respect to a "gold standard" reference alignment.
  14. Return an error rate between 0.0 (perfect alignment) and 1.0 (no
  15. alignment).
  16. >>> from nltk.translate import Alignment
  17. >>> ref = Alignment([(0, 0), (1, 1), (2, 2)])
  18. >>> test = Alignment([(0, 0), (1, 2), (2, 1)])
  19. >>> alignment_error_rate(ref, test) # doctest: +ELLIPSIS
  20. 0.6666666666666667
  21. :type reference: Alignment
  22. :param reference: A gold standard alignment (sure alignments)
  23. :type hypothesis: Alignment
  24. :param hypothesis: A hypothesis alignment (aka. candidate alignments)
  25. :type possible: Alignment or None
  26. :param possible: A gold standard reference of possible alignments
  27. (defaults to *reference* if None)
  28. :rtype: float or None
  29. """
  30. if possible is None:
  31. possible = reference
  32. else:
  33. assert reference.issubset(possible) # sanity check
  34. return 1.0 - (len(hypothesis & reference) + len(hypothesis & possible)) / float(
  35. len(hypothesis) + len(reference)
  36. )