simple.doctest 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. .. Copyright (C) 2001-2019 NLTK Project
  2. .. For license information, see LICENSE.TXT
  3. =================
  4. EasyInstall Tests
  5. =================
  6. This file contains some simple tests that will be run by EasyInstall in
  7. order to test the installation when NLTK-Data is absent.
  8. >>> from __future__ import print_function
  9. ------------
  10. Tokenization
  11. ------------
  12. >>> from nltk.tokenize import wordpunct_tokenize
  13. >>> s = ("Good muffins cost $3.88\nin New York. Please buy me\n"
  14. ... "two of them.\n\nThanks.")
  15. >>> wordpunct_tokenize(s) # doctest: +NORMALIZE_WHITESPACE
  16. ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',
  17. 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
  18. -------
  19. Metrics
  20. -------
  21. >>> from nltk.metrics import precision, recall, f_measure
  22. >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
  23. >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split()
  24. >>> reference_set = set(reference)
  25. >>> test_set = set(test)
  26. >>> precision(reference_set, test_set)
  27. 1.0
  28. >>> print(recall(reference_set, test_set))
  29. 0.8
  30. >>> print(f_measure(reference_set, test_set))
  31. 0.88888888888...
  32. ------------------
  33. Feature Structures
  34. ------------------
  35. >>> from nltk import FeatStruct
  36. >>> fs1 = FeatStruct(PER=3, NUM='pl', GND='fem')
  37. >>> fs2 = FeatStruct(POS='N', AGR=fs1)
  38. >>> print(fs2)
  39. [ [ GND = 'fem' ] ]
  40. [ AGR = [ NUM = 'pl' ] ]
  41. [ [ PER = 3 ] ]
  42. [ ]
  43. [ POS = 'N' ]
  44. >>> print(fs2['AGR'])
  45. [ GND = 'fem' ]
  46. [ NUM = 'pl' ]
  47. [ PER = 3 ]
  48. >>> print(fs2['AGR']['PER'])
  49. 3
  50. -------
  51. Parsing
  52. -------
  53. >>> from nltk.parse.recursivedescent import RecursiveDescentParser
  54. >>> from nltk.grammar import CFG
  55. >>> grammar = CFG.fromstring("""
  56. ... S -> NP VP
  57. ... PP -> P NP
  58. ... NP -> 'the' N | N PP | 'the' N PP
  59. ... VP -> V NP | V PP | V NP PP
  60. ... N -> 'cat' | 'dog' | 'rug'
  61. ... V -> 'chased'
  62. ... P -> 'on'
  63. ... """)
  64. >>> rd = RecursiveDescentParser(grammar)
  65. >>> sent = 'the cat chased the dog on the rug'.split()
  66. >>> for t in rd.parse(sent):
  67. ... print(t)
  68. (S
  69. (NP the (N cat))
  70. (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
  71. (S
  72. (NP the (N cat))
  73. (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))