generate.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. # -*- coding: utf-8 -*-
  2. # Natural Language Toolkit: Generating from a CFG
  3. #
  4. # Copyright (C) 2001-2019 NLTK Project
  5. # Author: Steven Bird <stevenbird1@gmail.com>
  6. # Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
  7. # URL: <http://nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. #
  10. from __future__ import print_function
  11. import itertools
  12. import sys
  13. from nltk.grammar import Nonterminal
  14. def generate(grammar, start=None, depth=None, n=None):
  15. """
  16. Generates an iterator of all sentences from a CFG.
  17. :param grammar: The Grammar used to generate sentences.
  18. :param start: The Nonterminal from which to start generate sentences.
  19. :param depth: The maximal depth of the generated tree.
  20. :param n: The maximum number of sentences to return.
  21. :return: An iterator of lists of terminal tokens.
  22. """
  23. if not start:
  24. start = grammar.start()
  25. if depth is None:
  26. depth = sys.maxsize
  27. iter = _generate_all(grammar, [start], depth)
  28. if n:
  29. iter = itertools.islice(iter, n)
  30. return iter
  31. def _generate_all(grammar, items, depth):
  32. if items:
  33. try:
  34. for frag1 in _generate_one(grammar, items[0], depth):
  35. for frag2 in _generate_all(grammar, items[1:], depth):
  36. yield frag1 + frag2
  37. except RuntimeError as _error:
  38. if _error.message == "maximum recursion depth exceeded":
  39. # Helpful error message while still showing the recursion stack.
  40. raise RuntimeError(
  41. "The grammar has rule(s) that yield infinite recursion!!"
  42. )
  43. else:
  44. raise
  45. else:
  46. yield []
  47. def _generate_one(grammar, item, depth):
  48. if depth > 0:
  49. if isinstance(item, Nonterminal):
  50. for prod in grammar.productions(lhs=item):
  51. for frag in _generate_all(grammar, prod.rhs(), depth - 1):
  52. yield frag
  53. else:
  54. yield [item]
  55. demo_grammar = """
  56. S -> NP VP
  57. NP -> Det N
  58. PP -> P NP
  59. VP -> 'slept' | 'saw' NP | 'walked' PP
  60. Det -> 'the' | 'a'
  61. N -> 'man' | 'park' | 'dog'
  62. P -> 'in' | 'with'
  63. """
  64. def demo(N=23):
  65. from nltk.grammar import CFG
  66. print('Generating the first %d sentences for demo grammar:' % (N,))
  67. print(demo_grammar)
  68. grammar = CFG.fromstring(demo_grammar)
  69. for n, sent in enumerate(generate(grammar, n=N), 1):
  70. print('%3d. %s' % (n, ' '.join(sent)))
  71. if __name__ == '__main__':
  72. demo()