pyparsing.py 240 KB


  1. #-*- coding: utf-8 -*-
  2. # module pyparsing.py
  3. #
  4. # Copyright (c) 2003-2019 Paul T. McGuire
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining
  7. # a copy of this software and associated documentation files (the
  8. # "Software"), to deal in the Software without restriction, including
  9. # without limitation the rights to use, copy, modify, merge, publish,
  10. # distribute, sublicense, and/or sell copies of the Software, and to
  11. # permit persons to whom the Software is furnished to do so, subject to
  12. # the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be
  15. # included in all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  21. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24. #
  25. __doc__ = \
  26. """
  27. pyparsing module - Classes and methods to define and execute parsing grammars
  28. =============================================================================
  29. The pyparsing module is an alternative approach to creating and
  30. executing simple grammars, vs. the traditional lex/yacc approach, or the
  31. use of regular expressions. With pyparsing, you don't need to learn
  32. a new syntax for defining grammars or matching expressions - the parsing
  33. module provides a library of classes that you use to construct the
  34. grammar directly in Python.
  35. Here is a program to parse "Hello, World!" (or any greeting of the form
  36. ``"<salutation>, <addressee>!"``), built up using :class:`Word`,
  37. :class:`Literal`, and :class:`And` elements
  38. (the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
  39. and the strings are auto-converted to :class:`Literal` expressions)::
  40. from pyparsing import Word, alphas
  41. # define grammar of a greeting
  42. greet = Word(alphas) + "," + Word(alphas) + "!"
  43. hello = "Hello, World!"
  44. print (hello, "->", greet.parseString(hello))
  45. The program outputs the following::
  46. Hello, World! -> ['Hello', ',', 'World', '!']
  47. The Python representation of the grammar is quite readable, owing to the
  48. self-explanatory class names, and the use of '+', '|' and '^' operators.
  49. The :class:`ParseResults` object returned from
  50. :class:`ParserElement.parseString` can be
  51. accessed as a nested list, a dictionary, or an object with named
  52. attributes.
  53. The pyparsing module handles some of the problems that are typically
  54. vexing when writing text parsers:
  55. - extra or missing whitespace (the above program will also handle
  56. "Hello,World!", "Hello , World !", etc.)
  57. - quoted strings
  58. - embedded comments
  59. Getting Started -
  60. -----------------
  61. Visit the classes :class:`ParserElement` and :class:`ParseResults` to
  62. see the base classes that most other pyparsing
  63. classes inherit from. Use the docstrings for examples of how to:
  64. - construct literal match expressions from :class:`Literal` and
  65. :class:`CaselessLiteral` classes
  66. - construct character word-group expressions using the :class:`Word`
  67. class
  68. - see how to create repetitive expressions using :class:`ZeroOrMore`
  69. and :class:`OneOrMore` classes
  70. - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
  71. and :class:`'&'<Each>` operators to combine simple expressions into
  72. more complex ones
  73. - associate names with your parsed results using
  74. :class:`ParserElement.setResultsName`
  75. - find some helpful expression short-cuts like :class:`delimitedList`
  76. and :class:`oneOf`
  77. - find more useful common expressions in the :class:`pyparsing_common`
  78. namespace class
  79. """
  80. __version__ = "2.4.0"
  81. __versionTime__ = "07 Apr 2019 18:28 UTC"
  82. __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
  83. import string
  84. from weakref import ref as wkref
  85. import copy
  86. import sys
  87. import warnings
  88. import re
  89. import sre_constants
  90. import collections
  91. import pprint
  92. import traceback
  93. import types
  94. from datetime import datetime
  95. try:
  96. # Python 3
  97. from itertools import filterfalse
  98. except ImportError:
  99. from itertools import ifilterfalse as filterfalse
  100. try:
  101. from _thread import RLock
  102. except ImportError:
  103. from threading import RLock
  104. try:
  105. # Python 3
  106. from collections.abc import Iterable
  107. from collections.abc import MutableMapping
  108. except ImportError:
  109. # Python 2.7
  110. from collections import Iterable
  111. from collections import MutableMapping
  112. try:
  113. from collections import OrderedDict as _OrderedDict
  114. except ImportError:
  115. try:
  116. from ordereddict import OrderedDict as _OrderedDict
  117. except ImportError:
  118. _OrderedDict = None
  119. try:
  120. from types import SimpleNamespace
  121. except ImportError:
  122. class SimpleNamespace: pass
  123. # version compatibility configuration
  124. __compat__ = SimpleNamespace()
  125. __compat__.__doc__ = """
  126. A cross-version compatibility configuration for pyparsing features that will be
  127. released in a future version. By setting values in this configuration to True,
  128. those features can be enabled in prior versions for compatibility development
  129. and testing.
  130. - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
  131. of results names when an And expression is nested within an Or or MatchFirst; set to
  132. True to enable bugfix to be released in pyparsing 2.4
  133. """
  134. __compat__.collect_all_And_tokens = True
  135. #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
  136. __all__ = [ '__version__', '__versionTime__', '__author__', '__compat__',
  137. 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
  138. 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
  139. 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
  140. 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
  141. 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
  142. 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
  143. 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
  144. 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
  145. 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
  146. 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
  147. 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
  148. 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
  149. 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
  150. 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
  151. 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
  152. 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
  153. 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
  154. 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
  155. ]
  156. system_version = tuple(sys.version_info)[:3]
  157. PY_3 = system_version[0] == 3
  158. if PY_3:
  159. _MAX_INT = sys.maxsize
  160. basestring = str
  161. unichr = chr
  162. unicode = str
  163. _ustr = str
  164. # build list of single arg builtins, that can be used as parse actions
  165. singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
  166. else:
  167. _MAX_INT = sys.maxint
  168. range = xrange
  169. def _ustr(obj):
  170. """Drop-in replacement for str(obj) that tries to be Unicode
  171. friendly. It first tries str(obj). If that fails with
  172. a UnicodeEncodeError, then it tries unicode(obj). It then
  173. < returns the unicode object | encodes it with the default
  174. encoding | ... >.
  175. """
  176. if isinstance(obj,unicode):
  177. return obj
  178. try:
  179. # If this works, then _ustr(obj) has the same behaviour as str(obj), so
  180. # it won't break any existing code.
  181. return str(obj)
  182. except UnicodeEncodeError:
  183. # Else encode it
  184. ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
  185. xmlcharref = Regex(r'&#\d+;')
  186. xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
  187. return xmlcharref.transformString(ret)
  188. # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
  189. singleArgBuiltins = []
  190. import __builtin__
  191. for fname in "sum len sorted reversed list tuple set any all min max".split():
  192. try:
  193. singleArgBuiltins.append(getattr(__builtin__,fname))
  194. except AttributeError:
  195. continue
  196. _generatorType = type((y for y in range(1)))
  197. def _xml_escape(data):
  198. """Escape &, <, >, ", ', etc. in a string of data."""
  199. # ampersand must be replaced first
  200. from_symbols = '&><"\''
  201. to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
  202. for from_,to_ in zip(from_symbols, to_symbols):
  203. data = data.replace(from_, to_)
  204. return data
  205. alphas = string.ascii_uppercase + string.ascii_lowercase
  206. nums = "0123456789"
  207. hexnums = nums + "ABCDEFabcdef"
  208. alphanums = alphas + nums
  209. _bslash = chr(92)
  210. printables = "".join(c for c in string.printable if c not in string.whitespace)
  211. class ParseBaseException(Exception):
  212. """base exception class for all parsing runtime exceptions"""
  213. # Performance tuning: we construct a *lot* of these, so keep this
  214. # constructor as small and fast as possible
  215. def __init__( self, pstr, loc=0, msg=None, elem=None ):
  216. self.loc = loc
  217. if msg is None:
  218. self.msg = pstr
  219. self.pstr = ""
  220. else:
  221. self.msg = msg
  222. self.pstr = pstr
  223. self.parserElement = elem
  224. self.args = (pstr, loc, msg)
  225. @classmethod
  226. def _from_exception(cls, pe):
  227. """
  228. internal factory method to simplify creating one type of ParseException
  229. from another - avoids having __init__ signature conflicts among subclasses
  230. """
  231. return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
  232. def __getattr__( self, aname ):
  233. """supported attributes by name are:
  234. - lineno - returns the line number of the exception text
  235. - col - returns the column number of the exception text
  236. - line - returns the line containing the exception text
  237. """
  238. if( aname == "lineno" ):
  239. return lineno( self.loc, self.pstr )
  240. elif( aname in ("col", "column") ):
  241. return col( self.loc, self.pstr )
  242. elif( aname == "line" ):
  243. return line( self.loc, self.pstr )
  244. else:
  245. raise AttributeError(aname)
  246. def __str__( self ):
  247. return "%s (at char %d), (line:%d, col:%d)" % \
  248. ( self.msg, self.loc, self.lineno, self.column )
  249. def __repr__( self ):
  250. return _ustr(self)
  251. def markInputline( self, markerString = ">!<" ):
  252. """Extracts the exception line from the input string, and marks
  253. the location of the exception with a special symbol.
  254. """
  255. line_str = self.line
  256. line_column = self.column - 1
  257. if markerString:
  258. line_str = "".join((line_str[:line_column],
  259. markerString, line_str[line_column:]))
  260. return line_str.strip()
  261. def __dir__(self):
  262. return "lineno col line".split() + dir(type(self))
  263. class ParseException(ParseBaseException):
  264. """
  265. Exception thrown when parse expressions don't match class;
  266. supported attributes by name are:
  267. - lineno - returns the line number of the exception text
  268. - col - returns the column number of the exception text
  269. - line - returns the line containing the exception text
  270. Example::
  271. try:
  272. Word(nums).setName("integer").parseString("ABC")
  273. except ParseException as pe:
  274. print(pe)
  275. print("column: {}".format(pe.col))
  276. prints::
  277. Expected integer (at char 0), (line:1, col:1)
  278. column: 1
  279. """
  280. @staticmethod
  281. def explain(exc, depth=16):
  282. """
  283. Method to take an exception and translate the Python internal traceback into a list
  284. of the pyparsing expressions that caused the exception to be raised.
  285. Parameters:
  286. - exc - exception raised during parsing (need not be a ParseException, in support
  287. of Python exceptions that might be raised in a parse action)
  288. - depth (default=16) - number of levels back in the stack trace to list expression
  289. and function names; if None, the full stack trace names will be listed; if 0, only
  290. the failing input line, marker, and exception string will be shown
  291. Returns a multi-line string listing the ParserElements and/or function names in the
  292. exception's stack trace.
  293. Note: the diagnostic output will include string representations of the expressions
  294. that failed to parse. These representations will be more helpful if you use `setName` to
  295. give identifiable names to your expressions. Otherwise they will use the default string
  296. forms, which may be cryptic to read.
  297. explain() is only supported under Python 3.
  298. """
  299. import inspect
  300. if depth is None:
  301. depth = sys.getrecursionlimit()
  302. ret = []
  303. if isinstance(exc, ParseBaseException):
  304. ret.append(exc.line)
  305. ret.append(' ' * (exc.col - 1) + '^')
  306. ret.append("{0}: {1}".format(type(exc).__name__, exc))
  307. if depth > 0:
  308. callers = inspect.getinnerframes(exc.__traceback__, context=depth)
  309. seen = set()
  310. for i, ff in enumerate(callers[-depth:]):
  311. frm = ff[0]
  312. f_self = frm.f_locals.get('self', None)
  313. if isinstance(f_self, ParserElement):
  314. if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
  315. continue
  316. if f_self in seen:
  317. continue
  318. seen.add(f_self)
  319. self_type = type(f_self)
  320. ret.append("{0}.{1} - {2}".format(self_type.__module__,
  321. self_type.__name__,
  322. f_self))
  323. elif f_self is not None:
  324. self_type = type(f_self)
  325. ret.append("{0}.{1}".format(self_type.__module__,
  326. self_type.__name__))
  327. else:
  328. code = frm.f_code
  329. if code.co_name in ('wrapper', '<module>'):
  330. continue
  331. ret.append("{0}".format(code.co_name))
  332. depth -= 1
  333. if not depth:
  334. break
  335. return '\n'.join(ret)
  336. class ParseFatalException(ParseBaseException):
  337. """user-throwable exception thrown when inconsistent parse content
  338. is found; stops all parsing immediately"""
  339. pass
  340. class ParseSyntaxException(ParseFatalException):
  341. """just like :class:`ParseFatalException`, but thrown internally
  342. when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
  343. that parsing is to stop immediately because an unbacktrackable
  344. syntax error has been found.
  345. """
  346. pass
  347. #~ class ReparseException(ParseBaseException):
  348. #~ """Experimental class - parse actions can raise this exception to cause
  349. #~ pyparsing to reparse the input string:
  350. #~ - with a modified input string, and/or
  351. #~ - with a modified start location
  352. #~ Set the values of the ReparseException in the constructor, and raise the
  353. #~ exception in a parse action to cause pyparsing to use the new string/location.
  354. #~ Setting the values as None causes no change to be made.
  355. #~ """
  356. #~ def __init_( self, newstring, restartLoc ):
  357. #~ self.newParseText = newstring
  358. #~ self.reparseLoc = restartLoc
  359. class RecursiveGrammarException(Exception):
  360. """exception thrown by :class:`ParserElement.validate` if the
  361. grammar could be improperly recursive
  362. """
  363. def __init__( self, parseElementList ):
  364. self.parseElementTrace = parseElementList
  365. def __str__( self ):
  366. return "RecursiveGrammarException: %s" % self.parseElementTrace
  367. class _ParseResultsWithOffset(object):
  368. def __init__(self,p1,p2):
  369. self.tup = (p1,p2)
  370. def __getitem__(self,i):
  371. return self.tup[i]
  372. def __repr__(self):
  373. return repr(self.tup[0])
  374. def setOffset(self,i):
  375. self.tup = (self.tup[0],i)
  376. class ParseResults(object):
  377. """Structured parse results, to provide multiple means of access to
  378. the parsed data:
  379. - as a list (``len(results)``)
  380. - by list index (``results[0], results[1]``, etc.)
  381. - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`)
  382. Example::
  383. integer = Word(nums)
  384. date_str = (integer.setResultsName("year") + '/'
  385. + integer.setResultsName("month") + '/'
  386. + integer.setResultsName("day"))
  387. # equivalent form:
  388. # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  389. # parseString returns a ParseResults object
  390. result = date_str.parseString("1999/12/31")
  391. def test(s, fn=repr):
  392. print("%s -> %s" % (s, fn(eval(s))))
  393. test("list(result)")
  394. test("result[0]")
  395. test("result['month']")
  396. test("result.day")
  397. test("'month' in result")
  398. test("'minutes' in result")
  399. test("result.dump()", str)
  400. prints::
  401. list(result) -> ['1999', '/', '12', '/', '31']
  402. result[0] -> '1999'
  403. result['month'] -> '12'
  404. result.day -> '31'
  405. 'month' in result -> True
  406. 'minutes' in result -> False
  407. result.dump() -> ['1999', '/', '12', '/', '31']
  408. - day: 31
  409. - month: 12
  410. - year: 1999
  411. """
  412. def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
  413. if isinstance(toklist, cls):
  414. return toklist
  415. retobj = object.__new__(cls)
  416. retobj.__doinit = True
  417. return retobj
  418. # Performance tuning: we construct a *lot* of these, so keep this
  419. # constructor as small and fast as possible
  420. def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
  421. if self.__doinit:
  422. self.__doinit = False
  423. self.__name = None
  424. self.__parent = None
  425. self.__accumNames = {}
  426. self.__asList = asList
  427. self.__modal = modal
  428. if toklist is None:
  429. toklist = []
  430. if isinstance(toklist, list):
  431. self.__toklist = toklist[:]
  432. elif isinstance(toklist, _generatorType):
  433. self.__toklist = list(toklist)
  434. else:
  435. self.__toklist = [toklist]
  436. self.__tokdict = dict()
  437. if name is not None and name:
  438. if not modal:
  439. self.__accumNames[name] = 0
  440. if isinstance(name,int):
  441. name = _ustr(name) # will always return a str, but use _ustr for consistency
  442. self.__name = name
  443. if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
  444. if isinstance(toklist,basestring):
  445. toklist = [ toklist ]
  446. if asList:
  447. if isinstance(toklist,ParseResults):
  448. self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
  449. else:
  450. self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
  451. self[name].__name = name
  452. else:
  453. try:
  454. self[name] = toklist[0]
  455. except (KeyError,TypeError,IndexError):
  456. self[name] = toklist
  457. def __getitem__( self, i ):
  458. if isinstance( i, (int,slice) ):
  459. return self.__toklist[i]
  460. else:
  461. if i not in self.__accumNames:
  462. return self.__tokdict[i][-1][0]
  463. else:
  464. return ParseResults([ v[0] for v in self.__tokdict[i] ])
  465. def __setitem__( self, k, v, isinstance=isinstance ):
  466. if isinstance(v,_ParseResultsWithOffset):
  467. self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
  468. sub = v[0]
  469. elif isinstance(k,(int,slice)):
  470. self.__toklist[k] = v
  471. sub = v
  472. else:
  473. self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
  474. sub = v
  475. if isinstance(sub,ParseResults):
  476. sub.__parent = wkref(self)
  477. def __delitem__( self, i ):
  478. if isinstance(i,(int,slice)):
  479. mylen = len( self.__toklist )
  480. del self.__toklist[i]
  481. # convert int to slice
  482. if isinstance(i, int):
  483. if i < 0:
  484. i += mylen
  485. i = slice(i, i+1)
  486. # get removed indices
  487. removed = list(range(*i.indices(mylen)))
  488. removed.reverse()
  489. # fixup indices in token dictionary
  490. for name,occurrences in self.__tokdict.items():
  491. for j in removed:
  492. for k, (value, position) in enumerate(occurrences):
  493. occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
  494. else:
  495. del self.__tokdict[i]
  496. def __contains__( self, k ):
  497. return k in self.__tokdict
  498. def __len__( self ): return len( self.__toklist )
  499. def __bool__(self): return ( not not self.__toklist )
  500. __nonzero__ = __bool__
  501. def __iter__( self ): return iter( self.__toklist )
  502. def __reversed__( self ): return iter( self.__toklist[::-1] )
  503. def _iterkeys( self ):
  504. if hasattr(self.__tokdict, "iterkeys"):
  505. return self.__tokdict.iterkeys()
  506. else:
  507. return iter(self.__tokdict)
  508. def _itervalues( self ):
  509. return (self[k] for k in self._iterkeys())
  510. def _iteritems( self ):
  511. return ((k, self[k]) for k in self._iterkeys())
  512. if PY_3:
  513. keys = _iterkeys
  514. """Returns an iterator of all named result keys."""
  515. values = _itervalues
  516. """Returns an iterator of all named result values."""
  517. items = _iteritems
  518. """Returns an iterator of all named result key-value tuples."""
  519. else:
  520. iterkeys = _iterkeys
  521. """Returns an iterator of all named result keys (Python 2.x only)."""
  522. itervalues = _itervalues
  523. """Returns an iterator of all named result values (Python 2.x only)."""
  524. iteritems = _iteritems
  525. """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
  526. def keys( self ):
  527. """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
  528. return list(self.iterkeys())
  529. def values( self ):
  530. """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
  531. return list(self.itervalues())
  532. def items( self ):
  533. """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
  534. return list(self.iteritems())
  535. def haskeys( self ):
  536. """Since keys() returns an iterator, this method is helpful in bypassing
  537. code that looks for the existence of any defined results names."""
  538. return bool(self.__tokdict)
  539. def pop( self, *args, **kwargs):
  540. """
  541. Removes and returns item at specified index (default= ``last``).
  542. Supports both ``list`` and ``dict`` semantics for ``pop()``. If
  543. passed no argument or an integer argument, it will use ``list``
  544. semantics and pop tokens from the list of parsed tokens. If passed
  545. a non-integer argument (most likely a string), it will use ``dict``
  546. semantics and pop the corresponding value from any defined results
  547. names. A second default return value argument is supported, just as in
  548. ``dict.pop()``.
  549. Example::
  550. def remove_first(tokens):
  551. tokens.pop(0)
  552. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  553. print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
  554. label = Word(alphas)
  555. patt = label("LABEL") + OneOrMore(Word(nums))
  556. print(patt.parseString("AAB 123 321").dump())
  557. # Use pop() in a parse action to remove named result (note that corresponding value is not
  558. # removed from list form of results)
  559. def remove_LABEL(tokens):
  560. tokens.pop("LABEL")
  561. return tokens
  562. patt.addParseAction(remove_LABEL)
  563. print(patt.parseString("AAB 123 321").dump())
  564. prints::
  565. ['AAB', '123', '321']
  566. - LABEL: AAB
  567. ['AAB', '123', '321']
  568. """
  569. if not args:
  570. args = [-1]
  571. for k,v in kwargs.items():
  572. if k == 'default':
  573. args = (args[0], v)
  574. else:
  575. raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
  576. if (isinstance(args[0], int) or
  577. len(args) == 1 or
  578. args[0] in self):
  579. index = args[0]
  580. ret = self[index]
  581. del self[index]
  582. return ret
  583. else:
  584. defaultvalue = args[1]
  585. return defaultvalue
  586. def get(self, key, defaultValue=None):
  587. """
  588. Returns named result matching the given key, or if there is no
  589. such name, then returns the given ``defaultValue`` or ``None`` if no
  590. ``defaultValue`` is specified.
  591. Similar to ``dict.get()``.
  592. Example::
  593. integer = Word(nums)
  594. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  595. result = date_str.parseString("1999/12/31")
  596. print(result.get("year")) # -> '1999'
  597. print(result.get("hour", "not specified")) # -> 'not specified'
  598. print(result.get("hour")) # -> None
  599. """
  600. if key in self:
  601. return self[key]
  602. else:
  603. return defaultValue
  604. def insert( self, index, insStr ):
  605. """
  606. Inserts new element at location index in the list of parsed tokens.
  607. Similar to ``list.insert()``.
  608. Example::
  609. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  610. # use a parse action to insert the parse location in the front of the parsed results
  611. def insert_locn(locn, tokens):
  612. tokens.insert(0, locn)
  613. print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
  614. """
  615. self.__toklist.insert(index, insStr)
  616. # fixup indices in token dictionary
  617. for name,occurrences in self.__tokdict.items():
  618. for k, (value, position) in enumerate(occurrences):
  619. occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
  620. def append( self, item ):
  621. """
  622. Add single element to end of ParseResults list of elements.
  623. Example::
  624. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  625. # use a parse action to compute the sum of the parsed integers, and add it to the end
  626. def append_sum(tokens):
  627. tokens.append(sum(map(int, tokens)))
  628. print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
  629. """
  630. self.__toklist.append(item)
  631. def extend( self, itemseq ):
  632. """
  633. Add sequence of elements to end of ParseResults list of elements.
  634. Example::
  635. patt = OneOrMore(Word(alphas))
  636. # use a parse action to append the reverse of the matched strings, to make a palindrome
  637. def make_palindrome(tokens):
  638. tokens.extend(reversed([t[::-1] for t in tokens]))
  639. return ''.join(tokens)
  640. print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
  641. """
  642. if isinstance(itemseq, ParseResults):
  643. self.__iadd__(itemseq)
  644. else:
  645. self.__toklist.extend(itemseq)
  646. def clear( self ):
  647. """
  648. Clear all elements and results names.
  649. """
  650. del self.__toklist[:]
  651. self.__tokdict.clear()
  652. def __getattr__( self, name ):
  653. try:
  654. return self[name]
  655. except KeyError:
  656. return ""
  657. if name in self.__tokdict:
  658. if name not in self.__accumNames:
  659. return self.__tokdict[name][-1][0]
  660. else:
  661. return ParseResults([ v[0] for v in self.__tokdict[name] ])
  662. else:
  663. return ""
  664. def __add__( self, other ):
  665. ret = self.copy()
  666. ret += other
  667. return ret
  668. def __iadd__( self, other ):
  669. if other.__tokdict:
  670. offset = len(self.__toklist)
  671. addoffset = lambda a: offset if a<0 else a+offset
  672. otheritems = other.__tokdict.items()
  673. otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
  674. for (k,vlist) in otheritems for v in vlist]
  675. for k,v in otherdictitems:
  676. self[k] = v
  677. if isinstance(v[0],ParseResults):
  678. v[0].__parent = wkref(self)
  679. self.__toklist += other.__toklist
  680. self.__accumNames.update( other.__accumNames )
  681. return self
  682. def __radd__(self, other):
  683. if isinstance(other,int) and other == 0:
  684. # useful for merging many ParseResults using sum() builtin
  685. return self.copy()
  686. else:
  687. # this may raise a TypeError - so be it
  688. return other + self
  689. def __repr__( self ):
  690. return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
  691. def __str__( self ):
  692. return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
  693. def _asStringList( self, sep='' ):
  694. out = []
  695. for item in self.__toklist:
  696. if out and sep:
  697. out.append(sep)
  698. if isinstance( item, ParseResults ):
  699. out += item._asStringList()
  700. else:
  701. out.append( _ustr(item) )
  702. return out
  703. def asList( self ):
  704. """
  705. Returns the parse results as a nested list of matching tokens, all converted to strings.
  706. Example::
  707. patt = OneOrMore(Word(alphas))
  708. result = patt.parseString("sldkj lsdkj sldkj")
  709. # even though the result prints in string-like form, it is actually a pyparsing ParseResults
  710. print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
  711. # Use asList() to create an actual list
  712. result_list = result.asList()
  713. print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
  714. """
  715. return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
  716. def asDict( self ):
  717. """
  718. Returns the named parse results as a nested dictionary.
  719. Example::
  720. integer = Word(nums)
  721. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  722. result = date_str.parseString('12/31/1999')
  723. print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
  724. result_dict = result.asDict()
  725. print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
  726. # even though a ParseResults supports dict-like access, sometime you just need to have a dict
  727. import json
  728. print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
  729. print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
  730. """
  731. if PY_3:
  732. item_fn = self.items
  733. else:
  734. item_fn = self.iteritems
  735. def toItem(obj):
  736. if isinstance(obj, ParseResults):
  737. if obj.haskeys():
  738. return obj.asDict()
  739. else:
  740. return [toItem(v) for v in obj]
  741. else:
  742. return obj
  743. return dict((k,toItem(v)) for k,v in item_fn())
  744. def copy( self ):
  745. """
  746. Returns a new copy of a :class:`ParseResults` object.
  747. """
  748. ret = ParseResults( self.__toklist )
  749. ret.__tokdict = dict(self.__tokdict.items())
  750. ret.__parent = self.__parent
  751. ret.__accumNames.update( self.__accumNames )
  752. ret.__name = self.__name
  753. return ret
  754. def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
  755. """
  756. (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
  757. """
  758. nl = "\n"
  759. out = []
  760. namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
  761. for v in vlist)
  762. nextLevelIndent = indent + " "
  763. # collapse out indents if formatting is not desired
  764. if not formatted:
  765. indent = ""
  766. nextLevelIndent = ""
  767. nl = ""
  768. selfTag = None
  769. if doctag is not None:
  770. selfTag = doctag
  771. else:
  772. if self.__name:
  773. selfTag = self.__name
  774. if not selfTag:
  775. if namedItemsOnly:
  776. return ""
  777. else:
  778. selfTag = "ITEM"
  779. out += [ nl, indent, "<", selfTag, ">" ]
  780. for i,res in enumerate(self.__toklist):
  781. if isinstance(res,ParseResults):
  782. if i in namedItems:
  783. out += [ res.asXML(namedItems[i],
  784. namedItemsOnly and doctag is None,
  785. nextLevelIndent,
  786. formatted)]
  787. else:
  788. out += [ res.asXML(None,
  789. namedItemsOnly and doctag is None,
  790. nextLevelIndent,
  791. formatted)]
  792. else:
  793. # individual token, see if there is a name for it
  794. resTag = None
  795. if i in namedItems:
  796. resTag = namedItems[i]
  797. if not resTag:
  798. if namedItemsOnly:
  799. continue
  800. else:
  801. resTag = "ITEM"
  802. xmlBodyText = _xml_escape(_ustr(res))
  803. out += [ nl, nextLevelIndent, "<", resTag, ">",
  804. xmlBodyText,
  805. "</", resTag, ">" ]
  806. out += [ nl, indent, "</", selfTag, ">" ]
  807. return "".join(out)
  808. def __lookup(self,sub):
  809. for k,vlist in self.__tokdict.items():
  810. for v,loc in vlist:
  811. if sub is v:
  812. return k
  813. return None
  814. def getName(self):
  815. r"""
  816. Returns the results name for this token expression. Useful when several
  817. different expressions might match at a particular location.
  818. Example::
  819. integer = Word(nums)
  820. ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
  821. house_number_expr = Suppress('#') + Word(nums, alphanums)
  822. user_data = (Group(house_number_expr)("house_number")
  823. | Group(ssn_expr)("ssn")
  824. | Group(integer)("age"))
  825. user_info = OneOrMore(user_data)
  826. result = user_info.parseString("22 111-22-3333 #221B")
  827. for item in result:
  828. print(item.getName(), ':', item[0])
  829. prints::
  830. age : 22
  831. ssn : 111-22-3333
  832. house_number : 221B
  833. """
  834. if self.__name:
  835. return self.__name
  836. elif self.__parent:
  837. par = self.__parent()
  838. if par:
  839. return par.__lookup(self)
  840. else:
  841. return None
  842. elif (len(self) == 1 and
  843. len(self.__tokdict) == 1 and
  844. next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
  845. return next(iter(self.__tokdict.keys()))
  846. else:
  847. return None
  848. def dump(self, indent='', depth=0, full=True):
  849. """
  850. Diagnostic method for listing out the contents of
  851. a :class:`ParseResults`. Accepts an optional ``indent`` argument so
  852. that this string can be embedded in a nested display of other data.
  853. Example::
  854. integer = Word(nums)
  855. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  856. result = date_str.parseString('12/31/1999')
  857. print(result.dump())
  858. prints::
  859. ['12', '/', '31', '/', '1999']
  860. - day: 1999
  861. - month: 31
  862. - year: 12
  863. """
  864. out = []
  865. NL = '\n'
  866. out.append( indent+_ustr(self.asList()) )
  867. if full:
  868. if self.haskeys():
  869. items = sorted((str(k), v) for k,v in self.items())
  870. for k,v in items:
  871. if out:
  872. out.append(NL)
  873. out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
  874. if isinstance(v,ParseResults):
  875. if v:
  876. out.append( v.dump(indent,depth+1) )
  877. else:
  878. out.append(_ustr(v))
  879. else:
  880. out.append(repr(v))
  881. elif any(isinstance(vv,ParseResults) for vv in self):
  882. v = self
  883. for i,vv in enumerate(v):
  884. if isinstance(vv,ParseResults):
  885. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
  886. else:
  887. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
  888. return "".join(out)
  889. def pprint(self, *args, **kwargs):
  890. """
  891. Pretty-printer for parsed results as a list, using the
  892. `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
  893. Accepts additional positional or keyword args as defined for
  894. `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
  895. Example::
  896. ident = Word(alphas, alphanums)
  897. num = Word(nums)
  898. func = Forward()
  899. term = ident | num | Group('(' + func + ')')
  900. func <<= ident + Group(Optional(delimitedList(term)))
  901. result = func.parseString("fna a,b,(fnb c,d,200),100")
  902. result.pprint(width=40)
  903. prints::
  904. ['fna',
  905. ['a',
  906. 'b',
  907. ['(', 'fnb', ['c', 'd', '200'], ')'],
  908. '100']]
  909. """
  910. pprint.pprint(self.asList(), *args, **kwargs)
  911. # add support for pickle protocol
  912. def __getstate__(self):
  913. return ( self.__toklist,
  914. ( self.__tokdict.copy(),
  915. self.__parent is not None and self.__parent() or None,
  916. self.__accumNames,
  917. self.__name ) )
  918. def __setstate__(self,state):
  919. self.__toklist = state[0]
  920. (self.__tokdict,
  921. par,
  922. inAccumNames,
  923. self.__name) = state[1]
  924. self.__accumNames = {}
  925. self.__accumNames.update(inAccumNames)
  926. if par is not None:
  927. self.__parent = wkref(par)
  928. else:
  929. self.__parent = None
  930. def __getnewargs__(self):
  931. return self.__toklist, self.__name, self.__asList, self.__modal
  932. def __dir__(self):
  933. return (dir(type(self)) + list(self.keys()))
  934. MutableMapping.register(ParseResults)
  935. def col (loc,strg):
  936. """Returns current column within a string, counting newlines as line separators.
  937. The first column is number 1.
  938. Note: the default parsing behavior is to expand tabs in the input string
  939. before starting the parsing process. See
  940. :class:`ParserElement.parseString` for more
  941. information on parsing strings containing ``<TAB>`` s, and suggested
  942. methods to maintain a consistent view of the parsed string, the parse
  943. location, and line and column positions within the parsed string.
  944. """
  945. s = strg
  946. return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
  947. def lineno(loc,strg):
  948. """Returns current line number within a string, counting newlines as line separators.
  949. The first line is number 1.
  950. Note - the default parsing behavior is to expand tabs in the input string
  951. before starting the parsing process. See :class:`ParserElement.parseString`
  952. for more information on parsing strings containing ``<TAB>`` s, and
  953. suggested methods to maintain a consistent view of the parsed string, the
  954. parse location, and line and column positions within the parsed string.
  955. """
  956. return strg.count("\n",0,loc) + 1
  957. def line( loc, strg ):
  958. """Returns the line of text containing loc within a string, counting newlines as line separators.
  959. """
  960. lastCR = strg.rfind("\n", 0, loc)
  961. nextCR = strg.find("\n", loc)
  962. if nextCR >= 0:
  963. return strg[lastCR+1:nextCR]
  964. else:
  965. return strg[lastCR+1:]
  966. def _defaultStartDebugAction( instring, loc, expr ):
  967. print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
  968. def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
  969. print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
  970. def _defaultExceptionDebugAction( instring, loc, expr, exc ):
  971. print ("Exception raised:" + _ustr(exc))
  972. def nullDebugAction(*args):
  973. """'Do-nothing' debug action, to suppress debugging output during parsing."""
  974. pass
  975. # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
  976. #~ 'decorator to trim function calls to match the arity of the target'
  977. #~ def _trim_arity(func, maxargs=3):
  978. #~ if func in singleArgBuiltins:
  979. #~ return lambda s,l,t: func(t)
  980. #~ limit = 0
  981. #~ foundArity = False
  982. #~ def wrapper(*args):
  983. #~ nonlocal limit,foundArity
  984. #~ while 1:
  985. #~ try:
  986. #~ ret = func(*args[limit:])
  987. #~ foundArity = True
  988. #~ return ret
  989. #~ except TypeError:
  990. #~ if limit == maxargs or foundArity:
  991. #~ raise
  992. #~ limit += 1
  993. #~ continue
  994. #~ return wrapper
  995. # this version is Python 2.x-3.x cross-compatible
  996. 'decorator to trim function calls to match the arity of the target'
  997. def _trim_arity(func, maxargs=2):
  998. if func in singleArgBuiltins:
  999. return lambda s,l,t: func(t)
  1000. limit = [0]
  1001. foundArity = [False]
  1002. # traceback return data structure changed in Py3.5 - normalize back to plain tuples
  1003. if system_version[:2] >= (3,5):
  1004. def extract_stack(limit=0):
  1005. # special handling for Python 3.5.0 - extra deep call stack by 1
  1006. offset = -3 if system_version == (3,5,0) else -2
  1007. frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
  1008. return [frame_summary[:2]]
  1009. def extract_tb(tb, limit=0):
  1010. frames = traceback.extract_tb(tb, limit=limit)
  1011. frame_summary = frames[-1]
  1012. return [frame_summary[:2]]
  1013. else:
  1014. extract_stack = traceback.extract_stack
  1015. extract_tb = traceback.extract_tb
  1016. # synthesize what would be returned by traceback.extract_stack at the call to
  1017. # user's parse action 'func', so that we don't incur call penalty at parse time
  1018. LINE_DIFF = 6
  1019. # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
  1020. # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
  1021. this_line = extract_stack(limit=2)[-1]
  1022. pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
  1023. def wrapper(*args):
  1024. while 1:
  1025. try:
  1026. ret = func(*args[limit[0]:])
  1027. foundArity[0] = True
  1028. return ret
  1029. except TypeError:
  1030. # re-raise TypeErrors if they did not come from our arity testing
  1031. if foundArity[0]:
  1032. raise
  1033. else:
  1034. try:
  1035. tb = sys.exc_info()[-1]
  1036. if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
  1037. raise
  1038. finally:
  1039. del tb
  1040. if limit[0] <= maxargs:
  1041. limit[0] += 1
  1042. continue
  1043. raise
  1044. # copy func name to wrapper for sensible debug output
  1045. func_name = "<parse action>"
  1046. try:
  1047. func_name = getattr(func, '__name__',
  1048. getattr(func, '__class__').__name__)
  1049. except Exception:
  1050. func_name = str(func)
  1051. wrapper.__name__ = func_name
  1052. return wrapper
  1053. class ParserElement(object):
  1054. """Abstract base level parser element class."""
  1055. DEFAULT_WHITE_CHARS = " \n\t\r"
  1056. verbose_stacktrace = False
  1057. @staticmethod
  1058. def setDefaultWhitespaceChars( chars ):
  1059. r"""
  1060. Overrides the default whitespace chars
  1061. Example::
  1062. # default whitespace chars are space, <TAB> and newline
  1063. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
  1064. # change to just treat newline as significant
  1065. ParserElement.setDefaultWhitespaceChars(" \t")
  1066. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
  1067. """
  1068. ParserElement.DEFAULT_WHITE_CHARS = chars
  1069. @staticmethod
  1070. def inlineLiteralsUsing(cls):
  1071. """
  1072. Set class to be used for inclusion of string literals into a parser.
  1073. Example::
  1074. # default literal class used is Literal
  1075. integer = Word(nums)
  1076. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1077. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  1078. # change to Suppress
  1079. ParserElement.inlineLiteralsUsing(Suppress)
  1080. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1081. date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
  1082. """
  1083. ParserElement._literalStringClass = cls
  1084. def __init__( self, savelist=False ):
  1085. self.parseAction = list()
  1086. self.failAction = None
  1087. #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
  1088. self.strRepr = None
  1089. self.resultsName = None
  1090. self.saveAsList = savelist
  1091. self.skipWhitespace = True
  1092. self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
  1093. self.copyDefaultWhiteChars = True
  1094. self.mayReturnEmpty = False # used when checking for left-recursion
  1095. self.keepTabs = False
  1096. self.ignoreExprs = list()
  1097. self.debug = False
  1098. self.streamlined = False
  1099. self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
  1100. self.errmsg = ""
  1101. self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
  1102. self.debugActions = ( None, None, None ) #custom debug actions
  1103. self.re = None
  1104. self.callPreparse = True # used to avoid redundant calls to preParse
  1105. self.callDuringTry = False
  1106. def copy( self ):
  1107. """
  1108. Make a copy of this :class:`ParserElement`. Useful for defining
  1109. different parse actions for the same parsing pattern, using copies of
  1110. the original parse element.
  1111. Example::
  1112. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1113. integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
  1114. integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  1115. print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
  1116. prints::
  1117. [5120, 100, 655360, 268435456]
  1118. Equivalent form of ``expr.copy()`` is just ``expr()``::
  1119. integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  1120. """
  1121. cpy = copy.copy( self )
  1122. cpy.parseAction = self.parseAction[:]
  1123. cpy.ignoreExprs = self.ignoreExprs[:]
  1124. if self.copyDefaultWhiteChars:
  1125. cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1126. return cpy
  1127. def setName( self, name ):
  1128. """
  1129. Define name for this expression, makes debugging and exception messages clearer.
  1130. Example::
  1131. Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
  1132. Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
  1133. """
  1134. self.name = name
  1135. self.errmsg = "Expected " + self.name
  1136. if hasattr(self,"exception"):
  1137. self.exception.msg = self.errmsg
  1138. return self
  1139. def setResultsName( self, name, listAllMatches=False ):
  1140. """
  1141. Define name for referencing matching tokens as a nested attribute
  1142. of the returned parse results.
  1143. NOTE: this returns a *copy* of the original :class:`ParserElement` object;
  1144. this is so that the client can define a basic element, such as an
  1145. integer, and reference it in multiple places with different names.
  1146. You can also set results names using the abbreviated syntax,
  1147. ``expr("name")`` in place of ``expr.setResultsName("name")``
  1148. - see :class:`__call__`.
  1149. Example::
  1150. date_str = (integer.setResultsName("year") + '/'
  1151. + integer.setResultsName("month") + '/'
  1152. + integer.setResultsName("day"))
  1153. # equivalent form:
  1154. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1155. """
  1156. newself = self.copy()
  1157. if name.endswith("*"):
  1158. name = name[:-1]
  1159. listAllMatches=True
  1160. newself.resultsName = name
  1161. newself.modalResults = not listAllMatches
  1162. return newself
  1163. def setBreak(self,breakFlag = True):
  1164. """Method to invoke the Python pdb debugger when this element is
  1165. about to be parsed. Set ``breakFlag`` to True to enable, False to
  1166. disable.
  1167. """
  1168. if breakFlag:
  1169. _parseMethod = self._parse
  1170. def breaker(instring, loc, doActions=True, callPreParse=True):
  1171. import pdb
  1172. pdb.set_trace()
  1173. return _parseMethod( instring, loc, doActions, callPreParse )
  1174. breaker._originalParseMethod = _parseMethod
  1175. self._parse = breaker
  1176. else:
  1177. if hasattr(self._parse,"_originalParseMethod"):
  1178. self._parse = self._parse._originalParseMethod
  1179. return self
  1180. def setParseAction( self, *fns, **kwargs ):
  1181. """
  1182. Define one or more actions to perform when successfully matching parse element definition.
  1183. Parse action fn is a callable method with 0-3 arguments, called as ``fn(s,loc,toks)`` ,
  1184. ``fn(loc,toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
  1185. - s = the original string being parsed (see note below)
  1186. - loc = the location of the matching substring
  1187. - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
  1188. If the functions in fns modify the tokens, they can return them as the return
  1189. value from fn, and the modified list of tokens will replace the original.
  1190. Otherwise, fn does not need to return any value.
  1191. Optional keyword arguments:
  1192. - callDuringTry = (default= ``False`` ) indicate if parse action should be run during lookaheads and alternate testing
  1193. Note: the default parsing behavior is to expand tabs in the input string
  1194. before starting the parsing process. See :class:`parseString for more
  1195. information on parsing strings containing ``<TAB>`` s, and suggested
  1196. methods to maintain a consistent view of the parsed string, the parse
  1197. location, and line and column positions within the parsed string.
  1198. Example::
  1199. integer = Word(nums)
  1200. date_str = integer + '/' + integer + '/' + integer
  1201. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  1202. # use parse action to convert to ints at parse time
  1203. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1204. date_str = integer + '/' + integer + '/' + integer
  1205. # note that integer fields are now ints, not strings
  1206. date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
  1207. """
  1208. self.parseAction = list(map(_trim_arity, list(fns)))
  1209. self.callDuringTry = kwargs.get("callDuringTry", False)
  1210. return self
  1211. def addParseAction( self, *fns, **kwargs ):
  1212. """
  1213. Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
  1214. See examples in :class:`copy`.
  1215. """
  1216. self.parseAction += list(map(_trim_arity, list(fns)))
  1217. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1218. return self
  1219. def addCondition(self, *fns, **kwargs):
  1220. """Add a boolean predicate function to expression's list of parse actions. See
  1221. :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
  1222. functions passed to ``addCondition`` need to return boolean success/fail of the condition.
  1223. Optional keyword arguments:
  1224. - message = define a custom message to be used in the raised exception
  1225. - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
  1226. Example::
  1227. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1228. year_int = integer.copy()
  1229. year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
  1230. date_str = year_int + '/' + integer + '/' + integer
  1231. result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
  1232. """
  1233. msg = kwargs.get("message", "failed user-defined condition")
  1234. exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
  1235. for fn in fns:
  1236. fn = _trim_arity(fn)
  1237. def pa(s,l,t):
  1238. if not bool(fn(s,l,t)):
  1239. raise exc_type(s,l,msg)
  1240. self.parseAction.append(pa)
  1241. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1242. return self
  1243. def setFailAction( self, fn ):
  1244. """Define action to perform if parsing fails at this expression.
  1245. Fail acton fn is a callable function that takes the arguments
  1246. ``fn(s,loc,expr,err)`` where:
  1247. - s = string being parsed
  1248. - loc = location where expression match was attempted and failed
  1249. - expr = the parse expression that failed
  1250. - err = the exception thrown
  1251. The function returns no value. It may throw :class:`ParseFatalException`
  1252. if it is desired to stop parsing immediately."""
  1253. self.failAction = fn
  1254. return self
  1255. def _skipIgnorables( self, instring, loc ):
  1256. exprsFound = True
  1257. while exprsFound:
  1258. exprsFound = False
  1259. for e in self.ignoreExprs:
  1260. try:
  1261. while 1:
  1262. loc,dummy = e._parse( instring, loc )
  1263. exprsFound = True
  1264. except ParseException:
  1265. pass
  1266. return loc
  1267. def preParse( self, instring, loc ):
  1268. if self.ignoreExprs:
  1269. loc = self._skipIgnorables( instring, loc )
  1270. if self.skipWhitespace:
  1271. wt = self.whiteChars
  1272. instrlen = len(instring)
  1273. while loc < instrlen and instring[loc] in wt:
  1274. loc += 1
  1275. return loc
  1276. def parseImpl( self, instring, loc, doActions=True ):
  1277. return loc, []
  1278. def postParse( self, instring, loc, tokenlist ):
  1279. return tokenlist
  1280. #~ @profile
  1281. def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
  1282. debugging = ( self.debug ) #and doActions )
  1283. if debugging or self.failAction:
  1284. #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
  1285. if (self.debugActions[0] ):
  1286. self.debugActions[0]( instring, loc, self )
  1287. if callPreParse and self.callPreparse:
  1288. preloc = self.preParse( instring, loc )
  1289. else:
  1290. preloc = loc
  1291. tokensStart = preloc
  1292. try:
  1293. try:
  1294. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1295. except IndexError:
  1296. raise ParseException( instring, len(instring), self.errmsg, self )
  1297. except ParseBaseException as err:
  1298. #~ print ("Exception raised:", err)
  1299. if self.debugActions[2]:
  1300. self.debugActions[2]( instring, tokensStart, self, err )
  1301. if self.failAction:
  1302. self.failAction( instring, tokensStart, self, err )
  1303. raise
  1304. else:
  1305. if callPreParse and self.callPreparse:
  1306. preloc = self.preParse( instring, loc )
  1307. else:
  1308. preloc = loc
  1309. tokensStart = preloc
  1310. if self.mayIndexError or preloc >= len(instring):
  1311. try:
  1312. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1313. except IndexError:
  1314. raise ParseException( instring, len(instring), self.errmsg, self )
  1315. else:
  1316. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1317. tokens = self.postParse( instring, loc, tokens )
  1318. retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
  1319. if self.parseAction and (doActions or self.callDuringTry):
  1320. if debugging:
  1321. try:
  1322. for fn in self.parseAction:
  1323. try:
  1324. tokens = fn( instring, tokensStart, retTokens )
  1325. except IndexError as parse_action_exc:
  1326. exc = ParseException("exception raised in parse action")
  1327. exc.__cause__ = parse_action_exc
  1328. raise exc
  1329. if tokens is not None and tokens is not retTokens:
  1330. retTokens = ParseResults( tokens,
  1331. self.resultsName,
  1332. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1333. modal=self.modalResults )
  1334. except ParseBaseException as err:
  1335. #~ print "Exception raised in user parse action:", err
  1336. if (self.debugActions[2] ):
  1337. self.debugActions[2]( instring, tokensStart, self, err )
  1338. raise
  1339. else:
  1340. for fn in self.parseAction:
  1341. try:
  1342. tokens = fn( instring, tokensStart, retTokens )
  1343. except IndexError as parse_action_exc:
  1344. exc = ParseException("exception raised in parse action")
  1345. exc.__cause__ = parse_action_exc
  1346. raise exc
  1347. if tokens is not None and tokens is not retTokens:
  1348. retTokens = ParseResults( tokens,
  1349. self.resultsName,
  1350. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1351. modal=self.modalResults )
  1352. if debugging:
  1353. #~ print ("Matched",self,"->",retTokens.asList())
  1354. if (self.debugActions[1] ):
  1355. self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
  1356. return loc, retTokens
  1357. def tryParse( self, instring, loc ):
  1358. try:
  1359. return self._parse( instring, loc, doActions=False )[0]
  1360. except ParseFatalException:
  1361. raise ParseException( instring, loc, self.errmsg, self)
  1362. def canParseNext(self, instring, loc):
  1363. try:
  1364. self.tryParse(instring, loc)
  1365. except (ParseException, IndexError):
  1366. return False
  1367. else:
  1368. return True
  1369. class _UnboundedCache(object):
  1370. def __init__(self):
  1371. cache = {}
  1372. self.not_in_cache = not_in_cache = object()
  1373. def get(self, key):
  1374. return cache.get(key, not_in_cache)
  1375. def set(self, key, value):
  1376. cache[key] = value
  1377. def clear(self):
  1378. cache.clear()
  1379. def cache_len(self):
  1380. return len(cache)
  1381. self.get = types.MethodType(get, self)
  1382. self.set = types.MethodType(set, self)
  1383. self.clear = types.MethodType(clear, self)
  1384. self.__len__ = types.MethodType(cache_len, self)
  1385. if _OrderedDict is not None:
  1386. class _FifoCache(object):
  1387. def __init__(self, size):
  1388. self.not_in_cache = not_in_cache = object()
  1389. cache = _OrderedDict()
  1390. def get(self, key):
  1391. return cache.get(key, not_in_cache)
  1392. def set(self, key, value):
  1393. cache[key] = value
  1394. while len(cache) > size:
  1395. try:
  1396. cache.popitem(False)
  1397. except KeyError:
  1398. pass
  1399. def clear(self):
  1400. cache.clear()
  1401. def cache_len(self):
  1402. return len(cache)
  1403. self.get = types.MethodType(get, self)
  1404. self.set = types.MethodType(set, self)
  1405. self.clear = types.MethodType(clear, self)
  1406. self.__len__ = types.MethodType(cache_len, self)
  1407. else:
  1408. class _FifoCache(object):
  1409. def __init__(self, size):
  1410. self.not_in_cache = not_in_cache = object()
  1411. cache = {}
  1412. key_fifo = collections.deque([], size)
  1413. def get(self, key):
  1414. return cache.get(key, not_in_cache)
  1415. def set(self, key, value):
  1416. cache[key] = value
  1417. while len(key_fifo) > size:
  1418. cache.pop(key_fifo.popleft(), None)
  1419. key_fifo.append(key)
  1420. def clear(self):
  1421. cache.clear()
  1422. key_fifo.clear()
  1423. def cache_len(self):
  1424. return len(cache)
  1425. self.get = types.MethodType(get, self)
  1426. self.set = types.MethodType(set, self)
  1427. self.clear = types.MethodType(clear, self)
  1428. self.__len__ = types.MethodType(cache_len, self)
  1429. # argument cache for optimizing repeated calls when backtracking through recursive expressions
  1430. packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
  1431. packrat_cache_lock = RLock()
  1432. packrat_cache_stats = [0, 0]
  1433. # this method gets repeatedly called during backtracking with the same arguments -
  1434. # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
  1435. def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
  1436. HIT, MISS = 0, 1
  1437. lookup = (self, instring, loc, callPreParse, doActions)
  1438. with ParserElement.packrat_cache_lock:
  1439. cache = ParserElement.packrat_cache
  1440. value = cache.get(lookup)
  1441. if value is cache.not_in_cache:
  1442. ParserElement.packrat_cache_stats[MISS] += 1
  1443. try:
  1444. value = self._parseNoCache(instring, loc, doActions, callPreParse)
  1445. except ParseBaseException as pe:
  1446. # cache a copy of the exception, without the traceback
  1447. cache.set(lookup, pe.__class__(*pe.args))
  1448. raise
  1449. else:
  1450. cache.set(lookup, (value[0], value[1].copy()))
  1451. return value
  1452. else:
  1453. ParserElement.packrat_cache_stats[HIT] += 1
  1454. if isinstance(value, Exception):
  1455. raise value
  1456. return (value[0], value[1].copy())
  1457. _parse = _parseNoCache
  1458. @staticmethod
  1459. def resetCache():
  1460. ParserElement.packrat_cache.clear()
  1461. ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
  1462. _packratEnabled = False
  1463. @staticmethod
  1464. def enablePackrat(cache_size_limit=128):
  1465. """Enables "packrat" parsing, which adds memoizing to the parsing logic.
  1466. Repeated parse attempts at the same string location (which happens
  1467. often in many complex grammars) can immediately return a cached value,
  1468. instead of re-executing parsing/validating code. Memoizing is done of
  1469. both valid results and parsing exceptions.
  1470. Parameters:
  1471. - cache_size_limit - (default= ``128``) - if an integer value is provided
  1472. will limit the size of the packrat cache; if None is passed, then
  1473. the cache size will be unbounded; if 0 is passed, the cache will
  1474. be effectively disabled.
  1475. This speedup may break existing programs that use parse actions that
  1476. have side-effects. For this reason, packrat parsing is disabled when
  1477. you first import pyparsing. To activate the packrat feature, your
  1478. program must call the class method :class:`ParserElement.enablePackrat`.
  1479. For best results, call ``enablePackrat()`` immediately after
  1480. importing pyparsing.
  1481. Example::
  1482. import pyparsing
  1483. pyparsing.ParserElement.enablePackrat()
  1484. """
  1485. if not ParserElement._packratEnabled:
  1486. ParserElement._packratEnabled = True
  1487. if cache_size_limit is None:
  1488. ParserElement.packrat_cache = ParserElement._UnboundedCache()
  1489. else:
  1490. ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
  1491. ParserElement._parse = ParserElement._parseCache
  1492. def parseString( self, instring, parseAll=False ):
  1493. """
  1494. Execute the parse expression with the given string.
  1495. This is the main interface to the client code, once the complete
  1496. expression has been built.
  1497. If you want the grammar to require that the entire input string be
  1498. successfully parsed, then set ``parseAll`` to True (equivalent to ending
  1499. the grammar with ``StringEnd()``).
  1500. Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
  1501. in order to report proper column numbers in parse actions.
  1502. If the input string contains tabs and
  1503. the grammar uses parse actions that use the ``loc`` argument to index into the
  1504. string being parsed, you can ensure you have a consistent view of the input
  1505. string by:
  1506. - calling ``parseWithTabs`` on your grammar before calling ``parseString``
  1507. (see :class:`parseWithTabs`)
  1508. - define your parse action using the full ``(s,loc,toks)`` signature, and
  1509. reference the input string using the parse action's ``s`` argument
  1510. - explictly expand the tabs in your input string before calling
  1511. ``parseString``
  1512. Example::
  1513. Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
  1514. Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
  1515. """
  1516. ParserElement.resetCache()
  1517. if not self.streamlined:
  1518. self.streamline()
  1519. #~ self.saveAsList = True
  1520. for e in self.ignoreExprs:
  1521. e.streamline()
  1522. if not self.keepTabs:
  1523. instring = instring.expandtabs()
  1524. try:
  1525. loc, tokens = self._parse( instring, 0 )
  1526. if parseAll:
  1527. loc = self.preParse( instring, loc )
  1528. se = Empty() + StringEnd()
  1529. se._parse( instring, loc )
  1530. except ParseBaseException as exc:
  1531. if ParserElement.verbose_stacktrace:
  1532. raise
  1533. else:
  1534. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1535. raise exc
  1536. else:
  1537. return tokens
  1538. def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
  1539. """
  1540. Scan the input string for expression matches. Each match will return the
  1541. matching tokens, start location, and end location. May be called with optional
  1542. ``maxMatches`` argument, to clip scanning after 'n' matches are found. If
  1543. ``overlap`` is specified, then overlapping matches will be reported.
  1544. Note that the start and end locations are reported relative to the string
  1545. being parsed. See :class:`parseString` for more information on parsing
  1546. strings with embedded tabs.
  1547. Example::
  1548. source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
  1549. print(source)
  1550. for tokens,start,end in Word(alphas).scanString(source):
  1551. print(' '*start + '^'*(end-start))
  1552. print(' '*start + tokens[0])
  1553. prints::
  1554. sldjf123lsdjjkf345sldkjf879lkjsfd987
  1555. ^^^^^
  1556. sldjf
  1557. ^^^^^^^
  1558. lsdjjkf
  1559. ^^^^^^
  1560. sldkjf
  1561. ^^^^^^
  1562. lkjsfd
  1563. """
  1564. if not self.streamlined:
  1565. self.streamline()
  1566. for e in self.ignoreExprs:
  1567. e.streamline()
  1568. if not self.keepTabs:
  1569. instring = _ustr(instring).expandtabs()
  1570. instrlen = len(instring)
  1571. loc = 0
  1572. preparseFn = self.preParse
  1573. parseFn = self._parse
  1574. ParserElement.resetCache()
  1575. matches = 0
  1576. try:
  1577. while loc <= instrlen and matches < maxMatches:
  1578. try:
  1579. preloc = preparseFn( instring, loc )
  1580. nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
  1581. except ParseException:
  1582. loc = preloc+1
  1583. else:
  1584. if nextLoc > loc:
  1585. matches += 1
  1586. yield tokens, preloc, nextLoc
  1587. if overlap:
  1588. nextloc = preparseFn( instring, loc )
  1589. if nextloc > loc:
  1590. loc = nextLoc
  1591. else:
  1592. loc += 1
  1593. else:
  1594. loc = nextLoc
  1595. else:
  1596. loc = preloc+1
  1597. except ParseBaseException as exc:
  1598. if ParserElement.verbose_stacktrace:
  1599. raise
  1600. else:
  1601. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1602. raise exc
  1603. def transformString( self, instring ):
  1604. """
  1605. Extension to :class:`scanString`, to modify matching text with modified tokens that may
  1606. be returned from a parse action. To use ``transformString``, define a grammar and
  1607. attach a parse action to it that modifies the returned token list.
  1608. Invoking ``transformString()`` on a target string will then scan for matches,
  1609. and replace the matched text patterns according to the logic in the parse
  1610. action. ``transformString()`` returns the resulting transformed string.
  1611. Example::
  1612. wd = Word(alphas)
  1613. wd.setParseAction(lambda toks: toks[0].title())
  1614. print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
  1615. prints::
  1616. Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
  1617. """
  1618. out = []
  1619. lastE = 0
  1620. # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
  1621. # keep string locs straight between transformString and scanString
  1622. self.keepTabs = True
  1623. try:
  1624. for t,s,e in self.scanString( instring ):
  1625. out.append( instring[lastE:s] )
  1626. if t:
  1627. if isinstance(t,ParseResults):
  1628. out += t.asList()
  1629. elif isinstance(t,list):
  1630. out += t
  1631. else:
  1632. out.append(t)
  1633. lastE = e
  1634. out.append(instring[lastE:])
  1635. out = [o for o in out if o]
  1636. return "".join(map(_ustr,_flatten(out)))
  1637. except ParseBaseException as exc:
  1638. if ParserElement.verbose_stacktrace:
  1639. raise
  1640. else:
  1641. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1642. raise exc
  1643. def searchString( self, instring, maxMatches=_MAX_INT ):
  1644. """
  1645. Another extension to :class:`scanString`, simplifying the access to the tokens found
  1646. to match the given parse expression. May be called with optional
  1647. ``maxMatches`` argument, to clip searching after 'n' matches are found.
  1648. Example::
  1649. # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
  1650. cap_word = Word(alphas.upper(), alphas.lower())
  1651. print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
  1652. # the sum() builtin can be used to merge results into a single ParseResults object
  1653. print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
  1654. prints::
  1655. [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
  1656. ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
  1657. """
  1658. try:
  1659. return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
  1660. except ParseBaseException as exc:
  1661. if ParserElement.verbose_stacktrace:
  1662. raise
  1663. else:
  1664. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1665. raise exc
  1666. def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
  1667. """
  1668. Generator method to split a string using the given expression as a separator.
  1669. May be called with optional ``maxsplit`` argument, to limit the number of splits;
  1670. and the optional ``includeSeparators`` argument (default= ``False``), if the separating
  1671. matching text should be included in the split results.
  1672. Example::
  1673. punc = oneOf(list(".,;:/-!?"))
  1674. print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
  1675. prints::
  1676. ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
  1677. """
  1678. splits = 0
  1679. last = 0
  1680. for t,s,e in self.scanString(instring, maxMatches=maxsplit):
  1681. yield instring[last:s]
  1682. if includeSeparators:
  1683. yield t[0]
  1684. last = e
  1685. yield instring[last:]
  1686. def __add__(self, other ):
  1687. """
  1688. Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
  1689. converts them to :class:`Literal`s by default.
  1690. Example::
  1691. greet = Word(alphas) + "," + Word(alphas) + "!"
  1692. hello = "Hello, World!"
  1693. print (hello, "->", greet.parseString(hello))
  1694. prints::
  1695. Hello, World! -> ['Hello', ',', 'World', '!']
  1696. """
  1697. if isinstance( other, basestring ):
  1698. other = ParserElement._literalStringClass( other )
  1699. if not isinstance( other, ParserElement ):
  1700. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1701. SyntaxWarning, stacklevel=2)
  1702. return None
  1703. return And( [ self, other ] )
  1704. def __radd__(self, other ):
  1705. """
  1706. Implementation of + operator when left operand is not a :class:`ParserElement`
  1707. """
  1708. if isinstance( other, basestring ):
  1709. other = ParserElement._literalStringClass( other )
  1710. if not isinstance( other, ParserElement ):
  1711. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1712. SyntaxWarning, stacklevel=2)
  1713. return None
  1714. return other + self
  1715. def __sub__(self, other):
  1716. """
  1717. Implementation of - operator, returns :class:`And` with error stop
  1718. """
  1719. if isinstance( other, basestring ):
  1720. other = ParserElement._literalStringClass( other )
  1721. if not isinstance( other, ParserElement ):
  1722. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1723. SyntaxWarning, stacklevel=2)
  1724. return None
  1725. return self + And._ErrorStop() + other
  1726. def __rsub__(self, other ):
  1727. """
  1728. Implementation of - operator when left operand is not a :class:`ParserElement`
  1729. """
  1730. if isinstance( other, basestring ):
  1731. other = ParserElement._literalStringClass( other )
  1732. if not isinstance( other, ParserElement ):
  1733. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1734. SyntaxWarning, stacklevel=2)
  1735. return None
  1736. return other - self
  1737. def __mul__(self,other):
  1738. """
  1739. Implementation of * operator, allows use of ``expr * 3`` in place of
  1740. ``expr + expr + expr``. Expressions may also me multiplied by a 2-integer
  1741. tuple, similar to ``{min,max}`` multipliers in regular expressions. Tuples
  1742. may also include ``None`` as in:
  1743. - ``expr*(n,None)`` or ``expr*(n,)`` is equivalent
  1744. to ``expr*n + ZeroOrMore(expr)``
  1745. (read as "at least n instances of ``expr``")
  1746. - ``expr*(None,n)`` is equivalent to ``expr*(0,n)``
  1747. (read as "0 to n instances of ``expr``")
  1748. - ``expr*(None,None)`` is equivalent to ``ZeroOrMore(expr)``
  1749. - ``expr*(1,None)`` is equivalent to ``OneOrMore(expr)``
  1750. Note that ``expr*(None,n)`` does not raise an exception if
  1751. more than n exprs exist in the input stream; that is,
  1752. ``expr*(None,n)`` does not enforce a maximum number of expr
  1753. occurrences. If this behavior is desired, then write
  1754. ``expr*(None,n) + ~expr``
  1755. """
  1756. if isinstance(other,int):
  1757. minElements, optElements = other,0
  1758. elif isinstance(other,tuple):
  1759. other = (other + (None, None))[:2]
  1760. if other[0] is None:
  1761. other = (0, other[1])
  1762. if isinstance(other[0],int) and other[1] is None:
  1763. if other[0] == 0:
  1764. return ZeroOrMore(self)
  1765. if other[0] == 1:
  1766. return OneOrMore(self)
  1767. else:
  1768. return self*other[0] + ZeroOrMore(self)
  1769. elif isinstance(other[0],int) and isinstance(other[1],int):
  1770. minElements, optElements = other
  1771. optElements -= minElements
  1772. else:
  1773. raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
  1774. else:
  1775. raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
  1776. if minElements < 0:
  1777. raise ValueError("cannot multiply ParserElement by negative value")
  1778. if optElements < 0:
  1779. raise ValueError("second tuple value must be greater or equal to first tuple value")
  1780. if minElements == optElements == 0:
  1781. raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
  1782. if (optElements):
  1783. def makeOptionalList(n):
  1784. if n>1:
  1785. return Optional(self + makeOptionalList(n-1))
  1786. else:
  1787. return Optional(self)
  1788. if minElements:
  1789. if minElements == 1:
  1790. ret = self + makeOptionalList(optElements)
  1791. else:
  1792. ret = And([self]*minElements) + makeOptionalList(optElements)
  1793. else:
  1794. ret = makeOptionalList(optElements)
  1795. else:
  1796. if minElements == 1:
  1797. ret = self
  1798. else:
  1799. ret = And([self]*minElements)
  1800. return ret
  1801. def __rmul__(self, other):
  1802. return self.__mul__(other)
  1803. def __or__(self, other ):
  1804. """
  1805. Implementation of | operator - returns :class:`MatchFirst`
  1806. """
  1807. if isinstance( other, basestring ):
  1808. other = ParserElement._literalStringClass( other )
  1809. if not isinstance( other, ParserElement ):
  1810. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1811. SyntaxWarning, stacklevel=2)
  1812. return None
  1813. return MatchFirst( [ self, other ] )
  1814. def __ror__(self, other ):
  1815. """
  1816. Implementation of | operator when left operand is not a :class:`ParserElement`
  1817. """
  1818. if isinstance( other, basestring ):
  1819. other = ParserElement._literalStringClass( other )
  1820. if not isinstance( other, ParserElement ):
  1821. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1822. SyntaxWarning, stacklevel=2)
  1823. return None
  1824. return other | self
  1825. def __xor__(self, other ):
  1826. """
  1827. Implementation of ^ operator - returns :class:`Or`
  1828. """
  1829. if isinstance( other, basestring ):
  1830. other = ParserElement._literalStringClass( other )
  1831. if not isinstance( other, ParserElement ):
  1832. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1833. SyntaxWarning, stacklevel=2)
  1834. return None
  1835. return Or( [ self, other ] )
  1836. def __rxor__(self, other ):
  1837. """
  1838. Implementation of ^ operator when left operand is not a :class:`ParserElement`
  1839. """
  1840. if isinstance( other, basestring ):
  1841. other = ParserElement._literalStringClass( other )
  1842. if not isinstance( other, ParserElement ):
  1843. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1844. SyntaxWarning, stacklevel=2)
  1845. return None
  1846. return other ^ self
  1847. def __and__(self, other ):
  1848. """
  1849. Implementation of & operator - returns :class:`Each`
  1850. """
  1851. if isinstance( other, basestring ):
  1852. other = ParserElement._literalStringClass( other )
  1853. if not isinstance( other, ParserElement ):
  1854. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1855. SyntaxWarning, stacklevel=2)
  1856. return None
  1857. return Each( [ self, other ] )
  1858. def __rand__(self, other ):
  1859. """
  1860. Implementation of & operator when left operand is not a :class:`ParserElement`
  1861. """
  1862. if isinstance( other, basestring ):
  1863. other = ParserElement._literalStringClass( other )
  1864. if not isinstance( other, ParserElement ):
  1865. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1866. SyntaxWarning, stacklevel=2)
  1867. return None
  1868. return other & self
  1869. def __invert__( self ):
  1870. """
  1871. Implementation of ~ operator - returns :class:`NotAny`
  1872. """
  1873. return NotAny( self )
  1874. def __call__(self, name=None):
  1875. """
  1876. Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
  1877. If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
  1878. passed as ``True``.
  1879. If ``name` is omitted, same as calling :class:`copy`.
  1880. Example::
  1881. # these are equivalent
  1882. userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
  1883. userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
  1884. """
  1885. if name is not None:
  1886. return self.setResultsName(name)
  1887. else:
  1888. return self.copy()
  1889. def suppress( self ):
  1890. """
  1891. Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
  1892. cluttering up returned output.
  1893. """
  1894. return Suppress( self )
  1895. def leaveWhitespace( self ):
  1896. """
  1897. Disables the skipping of whitespace before matching the characters in the
  1898. :class:`ParserElement`'s defined pattern. This is normally only used internally by
  1899. the pyparsing module, but may be needed in some whitespace-sensitive grammars.
  1900. """
  1901. self.skipWhitespace = False
  1902. return self
  1903. def setWhitespaceChars( self, chars ):
  1904. """
  1905. Overrides the default whitespace chars
  1906. """
  1907. self.skipWhitespace = True
  1908. self.whiteChars = chars
  1909. self.copyDefaultWhiteChars = False
  1910. return self
  1911. def parseWithTabs( self ):
  1912. """
  1913. Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string.
  1914. Must be called before ``parseString`` when the input grammar contains elements that
  1915. match ``<TAB>`` characters.
  1916. """
  1917. self.keepTabs = True
  1918. return self
  1919. def ignore( self, other ):
  1920. """
  1921. Define expression to be ignored (e.g., comments) while doing pattern
  1922. matching; may be called repeatedly, to define multiple comment or other
  1923. ignorable patterns.
  1924. Example::
  1925. patt = OneOrMore(Word(alphas))
  1926. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
  1927. patt.ignore(cStyleComment)
  1928. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
  1929. """
  1930. if isinstance(other, basestring):
  1931. other = Suppress(other)
  1932. if isinstance( other, Suppress ):
  1933. if other not in self.ignoreExprs:
  1934. self.ignoreExprs.append(other)
  1935. else:
  1936. self.ignoreExprs.append( Suppress( other.copy() ) )
  1937. return self
  1938. def setDebugActions( self, startAction, successAction, exceptionAction ):
  1939. """
  1940. Enable display of debugging messages while doing pattern matching.
  1941. """
  1942. self.debugActions = (startAction or _defaultStartDebugAction,
  1943. successAction or _defaultSuccessDebugAction,
  1944. exceptionAction or _defaultExceptionDebugAction)
  1945. self.debug = True
  1946. return self
  1947. def setDebug( self, flag=True ):
  1948. """
  1949. Enable display of debugging messages while doing pattern matching.
  1950. Set ``flag`` to True to enable, False to disable.
  1951. Example::
  1952. wd = Word(alphas).setName("alphaword")
  1953. integer = Word(nums).setName("numword")
  1954. term = wd | integer
  1955. # turn on debugging for wd
  1956. wd.setDebug()
  1957. OneOrMore(term).parseString("abc 123 xyz 890")
  1958. prints::
  1959. Match alphaword at loc 0(1,1)
  1960. Matched alphaword -> ['abc']
  1961. Match alphaword at loc 3(1,4)
  1962. Exception raised:Expected alphaword (at char 4), (line:1, col:5)
  1963. Match alphaword at loc 7(1,8)
  1964. Matched alphaword -> ['xyz']
  1965. Match alphaword at loc 11(1,12)
  1966. Exception raised:Expected alphaword (at char 12), (line:1, col:13)
  1967. Match alphaword at loc 15(1,16)
  1968. Exception raised:Expected alphaword (at char 15), (line:1, col:16)
  1969. The output shown is that produced by the default debug actions - custom debug actions can be
  1970. specified using :class:`setDebugActions`. Prior to attempting
  1971. to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
  1972. is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
  1973. message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
  1974. which makes debugging and exception messages easier to understand - for instance, the default
  1975. name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
  1976. """
  1977. if flag:
  1978. self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
  1979. else:
  1980. self.debug = False
  1981. return self
  1982. def __str__( self ):
  1983. return self.name
  1984. def __repr__( self ):
  1985. return _ustr(self)
  1986. def streamline( self ):
  1987. self.streamlined = True
  1988. self.strRepr = None
  1989. return self
  1990. def checkRecursion( self, parseElementList ):
  1991. pass
  1992. def validate( self, validateTrace=[] ):
  1993. """
  1994. Check defined expressions for valid structure, check for infinite recursive definitions.
  1995. """
  1996. self.checkRecursion( [] )
  1997. def parseFile( self, file_or_filename, parseAll=False ):
  1998. """
  1999. Execute the parse expression on the given file or filename.
  2000. If a filename is specified (instead of a file object),
  2001. the entire file is opened, read, and closed before parsing.
  2002. """
  2003. try:
  2004. file_contents = file_or_filename.read()
  2005. except AttributeError:
  2006. with open(file_or_filename, "r") as f:
  2007. file_contents = f.read()
  2008. try:
  2009. return self.parseString(file_contents, parseAll)
  2010. except ParseBaseException as exc:
  2011. if ParserElement.verbose_stacktrace:
  2012. raise
  2013. else:
  2014. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  2015. raise exc
  2016. def __eq__(self,other):
  2017. if isinstance(other, ParserElement):
  2018. return self is other or vars(self) == vars(other)
  2019. elif isinstance(other, basestring):
  2020. return self.matches(other)
  2021. else:
  2022. return super(ParserElement,self)==other
  2023. def __ne__(self,other):
  2024. return not (self == other)
  2025. def __hash__(self):
  2026. return hash(id(self))
  2027. def __req__(self,other):
  2028. return self == other
  2029. def __rne__(self,other):
  2030. return not (self == other)
  2031. def matches(self, testString, parseAll=True):
  2032. """
  2033. Method for quick testing of a parser against a test string. Good for simple
  2034. inline microtests of sub expressions while building up larger parser.
  2035. Parameters:
  2036. - testString - to test against this expression for a match
  2037. - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
  2038. Example::
  2039. expr = Word(nums)
  2040. assert expr.matches("100")
  2041. """
  2042. try:
  2043. self.parseString(_ustr(testString), parseAll=parseAll)
  2044. return True
  2045. except ParseBaseException:
  2046. return False
  2047. def runTests(self, tests, parseAll=True, comment='#',
  2048. fullDump=True, printResults=True, failureTests=False, postParse=None):
  2049. """
  2050. Execute the parse expression on a series of test strings, showing each
  2051. test, the parsed results or where the parse failed. Quick and easy way to
  2052. run a parse expression against a list of sample strings.
  2053. Parameters:
  2054. - tests - a list of separate test strings, or a multiline string of test strings
  2055. - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
  2056. - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
  2057. string; pass None to disable comment filtering
  2058. - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
  2059. if False, only dump nested list
  2060. - printResults - (default= ``True``) prints test output to stdout
  2061. - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
  2062. - postParse - (default= ``None``) optional callback for successful parse results; called as
  2063. `fn(test_string, parse_results)` and returns a string to be added to the test output
  2064. Returns: a (success, results) tuple, where success indicates that all tests succeeded
  2065. (or failed if ``failureTests`` is True), and the results contain a list of lines of each
  2066. test's output
  2067. Example::
  2068. number_expr = pyparsing_common.number.copy()
  2069. result = number_expr.runTests('''
  2070. # unsigned integer
  2071. 100
  2072. # negative integer
  2073. -100
  2074. # float with scientific notation
  2075. 6.02e23
  2076. # integer with scientific notation
  2077. 1e-12
  2078. ''')
  2079. print("Success" if result[0] else "Failed!")
  2080. result = number_expr.runTests('''
  2081. # stray character
  2082. 100Z
  2083. # missing leading digit before '.'
  2084. -.100
  2085. # too many '.'
  2086. 3.14.159
  2087. ''', failureTests=True)
  2088. print("Success" if result[0] else "Failed!")
  2089. prints::
  2090. # unsigned integer
  2091. 100
  2092. [100]
  2093. # negative integer
  2094. -100
  2095. [-100]
  2096. # float with scientific notation
  2097. 6.02e23
  2098. [6.02e+23]
  2099. # integer with scientific notation
  2100. 1e-12
  2101. [1e-12]
  2102. Success
  2103. # stray character
  2104. 100Z
  2105. ^
  2106. FAIL: Expected end of text (at char 3), (line:1, col:4)
  2107. # missing leading digit before '.'
  2108. -.100
  2109. ^
  2110. FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
  2111. # too many '.'
  2112. 3.14.159
  2113. ^
  2114. FAIL: Expected end of text (at char 4), (line:1, col:5)
  2115. Success
  2116. Each test string must be on a single line. If you want to test a string that spans multiple
  2117. lines, create a test like this::
  2118. expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
  2119. (Note that this is a raw string literal, you must include the leading 'r'.)
  2120. """
  2121. if isinstance(tests, basestring):
  2122. tests = list(map(str.strip, tests.rstrip().splitlines()))
  2123. if isinstance(comment, basestring):
  2124. comment = Literal(comment)
  2125. allResults = []
  2126. comments = []
  2127. success = True
  2128. for t in tests:
  2129. if comment is not None and comment.matches(t, False) or comments and not t:
  2130. comments.append(t)
  2131. continue
  2132. if not t:
  2133. continue
  2134. out = ['\n'.join(comments), t]
  2135. comments = []
  2136. try:
  2137. # convert newline marks to actual newlines, and strip leading BOM if present
  2138. NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
  2139. BOM = '\ufeff'
  2140. t = NL.transformString(t.lstrip(BOM))
  2141. result = self.parseString(t, parseAll=parseAll)
  2142. out.append(result.dump(full=fullDump))
  2143. success = success and not failureTests
  2144. if postParse is not None:
  2145. try:
  2146. pp_value = postParse(t, result)
  2147. if pp_value is not None:
  2148. out.append(str(pp_value))
  2149. except Exception as e:
  2150. out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
  2151. except ParseBaseException as pe:
  2152. fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
  2153. if '\n' in t:
  2154. out.append(line(pe.loc, t))
  2155. out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
  2156. else:
  2157. out.append(' '*pe.loc + '^' + fatal)
  2158. out.append("FAIL: " + str(pe))
  2159. success = success and failureTests
  2160. result = pe
  2161. except Exception as exc:
  2162. out.append("FAIL-EXCEPTION: " + str(exc))
  2163. success = success and failureTests
  2164. result = exc
  2165. if printResults:
  2166. if fullDump:
  2167. out.append('')
  2168. print('\n'.join(out))
  2169. allResults.append((t, result))
  2170. return success, allResults
  2171. class Token(ParserElement):
  2172. """Abstract :class:`ParserElement` subclass, for defining atomic
  2173. matching patterns.
  2174. """
  2175. def __init__( self ):
  2176. super(Token,self).__init__( savelist=False )
  2177. class Empty(Token):
  2178. """An empty token, will always match.
  2179. """
  2180. def __init__( self ):
  2181. super(Empty,self).__init__()
  2182. self.name = "Empty"
  2183. self.mayReturnEmpty = True
  2184. self.mayIndexError = False
  2185. class NoMatch(Token):
  2186. """A token that will never match.
  2187. """
  2188. def __init__( self ):
  2189. super(NoMatch,self).__init__()
  2190. self.name = "NoMatch"
  2191. self.mayReturnEmpty = True
  2192. self.mayIndexError = False
  2193. self.errmsg = "Unmatchable token"
  2194. def parseImpl( self, instring, loc, doActions=True ):
  2195. raise ParseException(instring, loc, self.errmsg, self)
  2196. class Literal(Token):
  2197. """Token to exactly match a specified string.
  2198. Example::
  2199. Literal('blah').parseString('blah') # -> ['blah']
  2200. Literal('blah').parseString('blahfooblah') # -> ['blah']
  2201. Literal('blah').parseString('bla') # -> Exception: Expected "blah"
  2202. For case-insensitive matching, use :class:`CaselessLiteral`.
  2203. For keyword matching (force word break before and after the matched string),
  2204. use :class:`Keyword` or :class:`CaselessKeyword`.
  2205. """
  2206. def __init__( self, matchString ):
  2207. super(Literal,self).__init__()
  2208. self.match = matchString
  2209. self.matchLen = len(matchString)
  2210. try:
  2211. self.firstMatchChar = matchString[0]
  2212. except IndexError:
  2213. warnings.warn("null string passed to Literal; use Empty() instead",
  2214. SyntaxWarning, stacklevel=2)
  2215. self.__class__ = Empty
  2216. self.name = '"%s"' % _ustr(self.match)
  2217. self.errmsg = "Expected " + self.name
  2218. self.mayReturnEmpty = False
  2219. self.mayIndexError = False
  2220. # Performance tuning: this routine gets called a *lot*
  2221. # if this is a single character match string and the first character matches,
  2222. # short-circuit as quickly as possible, and avoid calling startswith
  2223. #~ @profile
  2224. def parseImpl( self, instring, loc, doActions=True ):
  2225. if (instring[loc] == self.firstMatchChar and
  2226. (self.matchLen==1 or instring.startswith(self.match,loc)) ):
  2227. return loc+self.matchLen, self.match
  2228. raise ParseException(instring, loc, self.errmsg, self)
  2229. _L = Literal
  2230. ParserElement._literalStringClass = Literal
  2231. class Keyword(Token):
  2232. """Token to exactly match a specified string as a keyword, that is,
  2233. it must be immediately followed by a non-keyword character. Compare
  2234. with :class:`Literal`:
  2235. - ``Literal("if")`` will match the leading ``'if'`` in
  2236. ``'ifAndOnlyIf'``.
  2237. - ``Keyword("if")`` will not; it will only match the leading
  2238. ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
  2239. Accepts two optional constructor arguments in addition to the
  2240. keyword string:
  2241. - ``identChars`` is a string of characters that would be valid
  2242. identifier characters, defaulting to all alphanumerics + "_" and
  2243. "$"
  2244. - ``caseless`` allows case-insensitive matching, default is ``False``.
  2245. Example::
  2246. Keyword("start").parseString("start") # -> ['start']
  2247. Keyword("start").parseString("starting") # -> Exception
  2248. For case-insensitive matching, use :class:`CaselessKeyword`.
  2249. """
  2250. DEFAULT_KEYWORD_CHARS = alphanums+"_$"
  2251. def __init__( self, matchString, identChars=None, caseless=False ):
  2252. super(Keyword,self).__init__()
  2253. if identChars is None:
  2254. identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2255. self.match = matchString
  2256. self.matchLen = len(matchString)
  2257. try:
  2258. self.firstMatchChar = matchString[0]
  2259. except IndexError:
  2260. warnings.warn("null string passed to Keyword; use Empty() instead",
  2261. SyntaxWarning, stacklevel=2)
  2262. self.name = '"%s"' % self.match
  2263. self.errmsg = "Expected " + self.name
  2264. self.mayReturnEmpty = False
  2265. self.mayIndexError = False
  2266. self.caseless = caseless
  2267. if caseless:
  2268. self.caselessmatch = matchString.upper()
  2269. identChars = identChars.upper()
  2270. self.identChars = set(identChars)
  2271. def parseImpl( self, instring, loc, doActions=True ):
  2272. if self.caseless:
  2273. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2274. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
  2275. (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
  2276. return loc+self.matchLen, self.match
  2277. else:
  2278. if (instring[loc] == self.firstMatchChar and
  2279. (self.matchLen==1 or instring.startswith(self.match,loc)) and
  2280. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
  2281. (loc == 0 or instring[loc-1] not in self.identChars) ):
  2282. return loc+self.matchLen, self.match
  2283. raise ParseException(instring, loc, self.errmsg, self)
  2284. def copy(self):
  2285. c = super(Keyword,self).copy()
  2286. c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2287. return c
  2288. @staticmethod
  2289. def setDefaultKeywordChars( chars ):
  2290. """Overrides the default Keyword chars
  2291. """
  2292. Keyword.DEFAULT_KEYWORD_CHARS = chars
  2293. class CaselessLiteral(Literal):
  2294. """Token to match a specified string, ignoring case of letters.
  2295. Note: the matched results will always be in the case of the given
  2296. match string, NOT the case of the input text.
  2297. Example::
  2298. OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
  2299. (Contrast with example for :class:`CaselessKeyword`.)
  2300. """
  2301. def __init__( self, matchString ):
  2302. super(CaselessLiteral,self).__init__( matchString.upper() )
  2303. # Preserve the defining literal.
  2304. self.returnString = matchString
  2305. self.name = "'%s'" % self.returnString
  2306. self.errmsg = "Expected " + self.name
  2307. def parseImpl( self, instring, loc, doActions=True ):
  2308. if instring[ loc:loc+self.matchLen ].upper() == self.match:
  2309. return loc+self.matchLen, self.returnString
  2310. raise ParseException(instring, loc, self.errmsg, self)
  2311. class CaselessKeyword(Keyword):
  2312. """
  2313. Caseless version of :class:`Keyword`.
  2314. Example::
  2315. OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
  2316. (Contrast with example for :class:`CaselessLiteral`.)
  2317. """
  2318. def __init__( self, matchString, identChars=None ):
  2319. super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
  2320. class CloseMatch(Token):
  2321. """A variation on :class:`Literal` which matches "close" matches,
  2322. that is, strings with at most 'n' mismatching characters.
  2323. :class:`CloseMatch` takes parameters:
  2324. - ``match_string`` - string to be matched
  2325. - ``maxMismatches`` - (``default=1``) maximum number of
  2326. mismatches allowed to count as a match
  2327. The results from a successful parse will contain the matched text
  2328. from the input string and the following named results:
  2329. - ``mismatches`` - a list of the positions within the
  2330. match_string where mismatches were found
  2331. - ``original`` - the original match_string used to compare
  2332. against the input string
  2333. If ``mismatches`` is an empty list, then the match was an exact
  2334. match.
  2335. Example::
  2336. patt = CloseMatch("ATCATCGAATGGA")
  2337. patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
  2338. patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
  2339. # exact match
  2340. patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
  2341. # close match allowing up to 2 mismatches
  2342. patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
  2343. patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
  2344. """
  2345. def __init__(self, match_string, maxMismatches=1):
  2346. super(CloseMatch,self).__init__()
  2347. self.name = match_string
  2348. self.match_string = match_string
  2349. self.maxMismatches = maxMismatches
  2350. self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
  2351. self.mayIndexError = False
  2352. self.mayReturnEmpty = False
  2353. def parseImpl( self, instring, loc, doActions=True ):
  2354. start = loc
  2355. instrlen = len(instring)
  2356. maxloc = start + len(self.match_string)
  2357. if maxloc <= instrlen:
  2358. match_string = self.match_string
  2359. match_stringloc = 0
  2360. mismatches = []
  2361. maxMismatches = self.maxMismatches
  2362. for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
  2363. src,mat = s_m
  2364. if src != mat:
  2365. mismatches.append(match_stringloc)
  2366. if len(mismatches) > maxMismatches:
  2367. break
  2368. else:
  2369. loc = match_stringloc + 1
  2370. results = ParseResults([instring[start:loc]])
  2371. results['original'] = self.match_string
  2372. results['mismatches'] = mismatches
  2373. return loc, results
  2374. raise ParseException(instring, loc, self.errmsg, self)
  2375. class Word(Token):
  2376. """Token for matching words composed of allowed character sets.
  2377. Defined with string containing all allowed initial characters, an
  2378. optional string containing allowed body characters (if omitted,
  2379. defaults to the initial character set), and an optional minimum,
  2380. maximum, and/or exact length. The default value for ``min`` is
  2381. 1 (a minimum value < 1 is not valid); the default values for
  2382. ``max`` and ``exact`` are 0, meaning no maximum or exact
  2383. length restriction. An optional ``excludeChars`` parameter can
  2384. list characters that might be found in the input ``bodyChars``
  2385. string; useful to define a word of all printables except for one or
  2386. two characters, for instance.
  2387. :class:`srange` is useful for defining custom character set strings
  2388. for defining ``Word`` expressions, using range notation from
  2389. regular expression character sets.
  2390. A common mistake is to use :class:`Word` to match a specific literal
  2391. string, as in ``Word("Address")``. Remember that :class:`Word`
  2392. uses the string argument to define *sets* of matchable characters.
  2393. This expression would match "Add", "AAA", "dAred", or any other word
  2394. made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
  2395. exact literal string, use :class:`Literal` or :class:`Keyword`.
  2396. pyparsing includes helper strings for building Words:
  2397. - :class:`alphas`
  2398. - :class:`nums`
  2399. - :class:`alphanums`
  2400. - :class:`hexnums`
  2401. - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
  2402. - accented, tilded, umlauted, etc.)
  2403. - :class:`punc8bit` (non-alphabetic characters in ASCII range
  2404. 128-255 - currency, symbols, superscripts, diacriticals, etc.)
  2405. - :class:`printables` (any non-whitespace character)
  2406. Example::
  2407. # a word composed of digits
  2408. integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
  2409. # a word with a leading capital, and zero or more lowercase
  2410. capital_word = Word(alphas.upper(), alphas.lower())
  2411. # hostnames are alphanumeric, with leading alpha, and '-'
  2412. hostname = Word(alphas, alphanums+'-')
  2413. # roman numeral (not a strict parser, accepts invalid mix of characters)
  2414. roman = Word("IVXLCDM")
  2415. # any string of non-whitespace characters, except for ','
  2416. csv_value = Word(printables, excludeChars=",")
  2417. """
  2418. def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
  2419. super(Word,self).__init__()
  2420. if excludeChars:
  2421. excludeChars = set(excludeChars)
  2422. initChars = ''.join(c for c in initChars if c not in excludeChars)
  2423. if bodyChars:
  2424. bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
  2425. self.initCharsOrig = initChars
  2426. self.initChars = set(initChars)
  2427. if bodyChars :
  2428. self.bodyCharsOrig = bodyChars
  2429. self.bodyChars = set(bodyChars)
  2430. else:
  2431. self.bodyCharsOrig = initChars
  2432. self.bodyChars = set(initChars)
  2433. self.maxSpecified = max > 0
  2434. if min < 1:
  2435. raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
  2436. self.minLen = min
  2437. if max > 0:
  2438. self.maxLen = max
  2439. else:
  2440. self.maxLen = _MAX_INT
  2441. if exact > 0:
  2442. self.maxLen = exact
  2443. self.minLen = exact
  2444. self.name = _ustr(self)
  2445. self.errmsg = "Expected " + self.name
  2446. self.mayIndexError = False
  2447. self.asKeyword = asKeyword
  2448. if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
  2449. if self.bodyCharsOrig == self.initCharsOrig:
  2450. self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
  2451. elif len(self.initCharsOrig) == 1:
  2452. self.reString = "%s[%s]*" % \
  2453. (re.escape(self.initCharsOrig),
  2454. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2455. else:
  2456. self.reString = "[%s][%s]*" % \
  2457. (_escapeRegexRangeChars(self.initCharsOrig),
  2458. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2459. if self.asKeyword:
  2460. self.reString = r"\b"+self.reString+r"\b"
  2461. try:
  2462. self.re = re.compile( self.reString )
  2463. except Exception:
  2464. self.re = None
  2465. def parseImpl( self, instring, loc, doActions=True ):
  2466. if self.re:
  2467. result = self.re.match(instring,loc)
  2468. if not result:
  2469. raise ParseException(instring, loc, self.errmsg, self)
  2470. loc = result.end()
  2471. return loc, result.group()
  2472. if instring[loc] not in self.initChars:
  2473. raise ParseException(instring, loc, self.errmsg, self)
  2474. start = loc
  2475. loc += 1
  2476. instrlen = len(instring)
  2477. bodychars = self.bodyChars
  2478. maxloc = start + self.maxLen
  2479. maxloc = min( maxloc, instrlen )
  2480. while loc < maxloc and instring[loc] in bodychars:
  2481. loc += 1
  2482. throwException = False
  2483. if loc - start < self.minLen:
  2484. throwException = True
  2485. elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
  2486. throwException = True
  2487. elif self.asKeyword:
  2488. if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
  2489. throwException = True
  2490. if throwException:
  2491. raise ParseException(instring, loc, self.errmsg, self)
  2492. return loc, instring[start:loc]
  2493. def __str__( self ):
  2494. try:
  2495. return super(Word,self).__str__()
  2496. except Exception:
  2497. pass
  2498. if self.strRepr is None:
  2499. def charsAsStr(s):
  2500. if len(s)>4:
  2501. return s[:4]+"..."
  2502. else:
  2503. return s
  2504. if ( self.initCharsOrig != self.bodyCharsOrig ):
  2505. self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
  2506. else:
  2507. self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
  2508. return self.strRepr
  2509. class Char(Word):
  2510. """A short-cut class for defining ``Word(characters, exact=1)``,
  2511. when defining a match of any single character in a string of
  2512. characters.
  2513. """
  2514. def __init__(self, charset, asKeyword=False, excludeChars=None):
  2515. super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
  2516. self.reString = "[%s]" % _escapeRegexRangeChars(self.initCharsOrig)
  2517. self.re = re.compile( self.reString )
  2518. class Regex(Token):
  2519. r"""Token for matching strings that match a given regular
  2520. expression. Defined with string specifying the regular expression in
  2521. a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
  2522. If the given regex contains named groups (defined using ``(?P<name>...)``),
  2523. these will be preserved as named parse results.
  2524. Example::
  2525. realnum = Regex(r"[+-]?\d+\.\d*")
  2526. date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
  2527. # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
  2528. roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
  2529. """
  2530. compiledREtype = type(re.compile("[A-Z]"))
  2531. def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
  2532. """The parameters ``pattern`` and ``flags`` are passed
  2533. to the ``re.compile()`` function as-is. See the Python
  2534. `re module <https://docs.python.org/3/library/re.html>`_ module for an
  2535. explanation of the acceptable patterns and flags.
  2536. """
  2537. super(Regex,self).__init__()
  2538. if isinstance(pattern, basestring):
  2539. if not pattern:
  2540. warnings.warn("null string passed to Regex; use Empty() instead",
  2541. SyntaxWarning, stacklevel=2)
  2542. self.pattern = pattern
  2543. self.flags = flags
  2544. try:
  2545. self.re = re.compile(self.pattern, self.flags)
  2546. self.reString = self.pattern
  2547. except sre_constants.error:
  2548. warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
  2549. SyntaxWarning, stacklevel=2)
  2550. raise
  2551. elif isinstance(pattern, Regex.compiledREtype):
  2552. self.re = pattern
  2553. self.pattern = \
  2554. self.reString = str(pattern)
  2555. self.flags = flags
  2556. else:
  2557. raise ValueError("Regex may only be constructed with a string or a compiled RE object")
  2558. self.name = _ustr(self)
  2559. self.errmsg = "Expected " + self.name
  2560. self.mayIndexError = False
  2561. self.mayReturnEmpty = True
  2562. self.asGroupList = asGroupList
  2563. self.asMatch = asMatch
  2564. if self.asGroupList:
  2565. self.parseImpl = self.parseImplAsGroupList
  2566. if self.asMatch:
  2567. self.parseImpl = self.parseImplAsMatch
  2568. def parseImpl(self, instring, loc, doActions=True):
  2569. result = self.re.match(instring,loc)
  2570. if not result:
  2571. raise ParseException(instring, loc, self.errmsg, self)
  2572. loc = result.end()
  2573. ret = ParseResults(result.group())
  2574. d = result.groupdict()
  2575. if d:
  2576. for k, v in d.items():
  2577. ret[k] = v
  2578. return loc, ret
  2579. def parseImplAsGroupList(self, instring, loc, doActions=True):
  2580. result = self.re.match(instring,loc)
  2581. if not result:
  2582. raise ParseException(instring, loc, self.errmsg, self)
  2583. loc = result.end()
  2584. ret = result.groups()
  2585. return loc, ret
  2586. def parseImplAsMatch(self, instring, loc, doActions=True):
  2587. result = self.re.match(instring,loc)
  2588. if not result:
  2589. raise ParseException(instring, loc, self.errmsg, self)
  2590. loc = result.end()
  2591. ret = result
  2592. return loc, ret
  2593. def __str__( self ):
  2594. try:
  2595. return super(Regex,self).__str__()
  2596. except Exception:
  2597. pass
  2598. if self.strRepr is None:
  2599. self.strRepr = "Re:(%s)" % repr(self.pattern)
  2600. return self.strRepr
  2601. def sub(self, repl):
  2602. r"""
  2603. Return Regex with an attached parse action to transform the parsed
  2604. result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
  2605. Example::
  2606. make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
  2607. print(make_html.transformString("h1:main title:"))
  2608. # prints "<h1>main title</h1>"
  2609. """
  2610. if self.asGroupList:
  2611. warnings.warn("cannot use sub() with Regex(asGroupList=True)",
  2612. SyntaxWarning, stacklevel=2)
  2613. raise SyntaxError()
  2614. if self.asMatch and callable(repl):
  2615. warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
  2616. SyntaxWarning, stacklevel=2)
  2617. raise SyntaxError()
  2618. if self.asMatch:
  2619. def pa(tokens):
  2620. return tokens[0].expand(repl)
  2621. else:
  2622. def pa(tokens):
  2623. return self.re.sub(repl, tokens[0])
  2624. return self.addParseAction(pa)
  2625. class QuotedString(Token):
  2626. r"""
  2627. Token for matching strings that are delimited by quoting characters.
  2628. Defined with the following parameters:
  2629. - quoteChar - string of one or more characters defining the
  2630. quote delimiting string
  2631. - escChar - character to escape quotes, typically backslash
  2632. (default= ``None`` )
  2633. - escQuote - special quote sequence to escape an embedded quote
  2634. string (such as SQL's ``""`` to escape an embedded ``"``)
  2635. (default= ``None`` )
  2636. - multiline - boolean indicating whether quotes can span
  2637. multiple lines (default= ``False`` )
  2638. - unquoteResults - boolean indicating whether the matched text
  2639. should be unquoted (default= ``True`` )
  2640. - endQuoteChar - string of one or more characters defining the
  2641. end of the quote delimited string (default= ``None`` => same as
  2642. quoteChar)
  2643. - convertWhitespaceEscapes - convert escaped whitespace
  2644. (``'\t'``, ``'\n'``, etc.) to actual whitespace
  2645. (default= ``True`` )
  2646. Example::
  2647. qs = QuotedString('"')
  2648. print(qs.searchString('lsjdf "This is the quote" sldjf'))
  2649. complex_qs = QuotedString('{{', endQuoteChar='}}')
  2650. print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
  2651. sql_qs = QuotedString('"', escQuote='""')
  2652. print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
  2653. prints::
  2654. [['This is the quote']]
  2655. [['This is the "quote"']]
  2656. [['This is the quote with "embedded" quotes']]
  2657. """
  2658. def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
  2659. super(QuotedString,self).__init__()
  2660. # remove white space from quote chars - wont work anyway
  2661. quoteChar = quoteChar.strip()
  2662. if not quoteChar:
  2663. warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2664. raise SyntaxError()
  2665. if endQuoteChar is None:
  2666. endQuoteChar = quoteChar
  2667. else:
  2668. endQuoteChar = endQuoteChar.strip()
  2669. if not endQuoteChar:
  2670. warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2671. raise SyntaxError()
  2672. self.quoteChar = quoteChar
  2673. self.quoteCharLen = len(quoteChar)
  2674. self.firstQuoteChar = quoteChar[0]
  2675. self.endQuoteChar = endQuoteChar
  2676. self.endQuoteCharLen = len(endQuoteChar)
  2677. self.escChar = escChar
  2678. self.escQuote = escQuote
  2679. self.unquoteResults = unquoteResults
  2680. self.convertWhitespaceEscapes = convertWhitespaceEscapes
  2681. if multiline:
  2682. self.flags = re.MULTILINE | re.DOTALL
  2683. self.pattern = r'%s(?:[^%s%s]' % \
  2684. ( re.escape(self.quoteChar),
  2685. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2686. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2687. else:
  2688. self.flags = 0
  2689. self.pattern = r'%s(?:[^%s\n\r%s]' % \
  2690. ( re.escape(self.quoteChar),
  2691. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2692. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2693. if len(self.endQuoteChar) > 1:
  2694. self.pattern += (
  2695. '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
  2696. _escapeRegexRangeChars(self.endQuoteChar[i]))
  2697. for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
  2698. )
  2699. if escQuote:
  2700. self.pattern += (r'|(?:%s)' % re.escape(escQuote))
  2701. if escChar:
  2702. self.pattern += (r'|(?:%s.)' % re.escape(escChar))
  2703. self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
  2704. self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
  2705. try:
  2706. self.re = re.compile(self.pattern, self.flags)
  2707. self.reString = self.pattern
  2708. except sre_constants.error:
  2709. warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
  2710. SyntaxWarning, stacklevel=2)
  2711. raise
  2712. self.name = _ustr(self)
  2713. self.errmsg = "Expected " + self.name
  2714. self.mayIndexError = False
  2715. self.mayReturnEmpty = True
  2716. def parseImpl( self, instring, loc, doActions=True ):
  2717. result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
  2718. if not result:
  2719. raise ParseException(instring, loc, self.errmsg, self)
  2720. loc = result.end()
  2721. ret = result.group()
  2722. if self.unquoteResults:
  2723. # strip off quotes
  2724. ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
  2725. if isinstance(ret,basestring):
  2726. # replace escaped whitespace
  2727. if '\\' in ret and self.convertWhitespaceEscapes:
  2728. ws_map = {
  2729. r'\t' : '\t',
  2730. r'\n' : '\n',
  2731. r'\f' : '\f',
  2732. r'\r' : '\r',
  2733. }
  2734. for wslit,wschar in ws_map.items():
  2735. ret = ret.replace(wslit, wschar)
  2736. # replace escaped characters
  2737. if self.escChar:
  2738. ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
  2739. # replace escaped quotes
  2740. if self.escQuote:
  2741. ret = ret.replace(self.escQuote, self.endQuoteChar)
  2742. return loc, ret
  2743. def __str__( self ):
  2744. try:
  2745. return super(QuotedString,self).__str__()
  2746. except Exception:
  2747. pass
  2748. if self.strRepr is None:
  2749. self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
  2750. return self.strRepr
  2751. class CharsNotIn(Token):
  2752. """Token for matching words composed of characters *not* in a given
  2753. set (will include whitespace in matched characters if not listed in
  2754. the provided exclusion set - see example). Defined with string
  2755. containing all disallowed characters, and an optional minimum,
  2756. maximum, and/or exact length. The default value for ``min`` is
  2757. 1 (a minimum value < 1 is not valid); the default values for
  2758. ``max`` and ``exact`` are 0, meaning no maximum or exact
  2759. length restriction.
  2760. Example::
  2761. # define a comma-separated-value as anything that is not a ','
  2762. csv_value = CharsNotIn(',')
  2763. print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
  2764. prints::
  2765. ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
  2766. """
  2767. def __init__( self, notChars, min=1, max=0, exact=0 ):
  2768. super(CharsNotIn,self).__init__()
  2769. self.skipWhitespace = False
  2770. self.notChars = notChars
  2771. if min < 1:
  2772. raise ValueError(
  2773. "cannot specify a minimum length < 1; use " +
  2774. "Optional(CharsNotIn()) if zero-length char group is permitted")
  2775. self.minLen = min
  2776. if max > 0:
  2777. self.maxLen = max
  2778. else:
  2779. self.maxLen = _MAX_INT
  2780. if exact > 0:
  2781. self.maxLen = exact
  2782. self.minLen = exact
  2783. self.name = _ustr(self)
  2784. self.errmsg = "Expected " + self.name
  2785. self.mayReturnEmpty = ( self.minLen == 0 )
  2786. self.mayIndexError = False
  2787. def parseImpl( self, instring, loc, doActions=True ):
  2788. if instring[loc] in self.notChars:
  2789. raise ParseException(instring, loc, self.errmsg, self)
  2790. start = loc
  2791. loc += 1
  2792. notchars = self.notChars
  2793. maxlen = min( start+self.maxLen, len(instring) )
  2794. while loc < maxlen and \
  2795. (instring[loc] not in notchars):
  2796. loc += 1
  2797. if loc - start < self.minLen:
  2798. raise ParseException(instring, loc, self.errmsg, self)
  2799. return loc, instring[start:loc]
  2800. def __str__( self ):
  2801. try:
  2802. return super(CharsNotIn, self).__str__()
  2803. except Exception:
  2804. pass
  2805. if self.strRepr is None:
  2806. if len(self.notChars) > 4:
  2807. self.strRepr = "!W:(%s...)" % self.notChars[:4]
  2808. else:
  2809. self.strRepr = "!W:(%s)" % self.notChars
  2810. return self.strRepr
  2811. class White(Token):
  2812. """Special matching class for matching whitespace. Normally,
  2813. whitespace is ignored by pyparsing grammars. This class is included
  2814. when some whitespace structures are significant. Define with
  2815. a string containing the whitespace characters to be matched; default
  2816. is ``" \\t\\r\\n"``. Also takes optional ``min``,
  2817. ``max``, and ``exact`` arguments, as defined for the
  2818. :class:`Word` class.
  2819. """
  2820. whiteStrs = {
  2821. ' ' : '<SP>',
  2822. '\t': '<TAB>',
  2823. '\n': '<LF>',
  2824. '\r': '<CR>',
  2825. '\f': '<FF>',
  2826. 'u\00A0': '<NBSP>',
  2827. 'u\1680': '<OGHAM_SPACE_MARK>',
  2828. 'u\180E': '<MONGOLIAN_VOWEL_SEPARATOR>',
  2829. 'u\2000': '<EN_QUAD>',
  2830. 'u\2001': '<EM_QUAD>',
  2831. 'u\2002': '<EN_SPACE>',
  2832. 'u\2003': '<EM_SPACE>',
  2833. 'u\2004': '<THREE-PER-EM_SPACE>',
  2834. 'u\2005': '<FOUR-PER-EM_SPACE>',
  2835. 'u\2006': '<SIX-PER-EM_SPACE>',
  2836. 'u\2007': '<FIGURE_SPACE>',
  2837. 'u\2008': '<PUNCTUATION_SPACE>',
  2838. 'u\2009': '<THIN_SPACE>',
  2839. 'u\200A': '<HAIR_SPACE>',
  2840. 'u\200B': '<ZERO_WIDTH_SPACE>',
  2841. 'u\202F': '<NNBSP>',
  2842. 'u\205F': '<MMSP>',
  2843. 'u\3000': '<IDEOGRAPHIC_SPACE>',
  2844. }
  2845. def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
  2846. super(White,self).__init__()
  2847. self.matchWhite = ws
  2848. self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
  2849. #~ self.leaveWhitespace()
  2850. self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
  2851. self.mayReturnEmpty = True
  2852. self.errmsg = "Expected " + self.name
  2853. self.minLen = min
  2854. if max > 0:
  2855. self.maxLen = max
  2856. else:
  2857. self.maxLen = _MAX_INT
  2858. if exact > 0:
  2859. self.maxLen = exact
  2860. self.minLen = exact
  2861. def parseImpl( self, instring, loc, doActions=True ):
  2862. if instring[loc] not in self.matchWhite:
  2863. raise ParseException(instring, loc, self.errmsg, self)
  2864. start = loc
  2865. loc += 1
  2866. maxloc = start + self.maxLen
  2867. maxloc = min( maxloc, len(instring) )
  2868. while loc < maxloc and instring[loc] in self.matchWhite:
  2869. loc += 1
  2870. if loc - start < self.minLen:
  2871. raise ParseException(instring, loc, self.errmsg, self)
  2872. return loc, instring[start:loc]
  2873. class _PositionToken(Token):
  2874. def __init__( self ):
  2875. super(_PositionToken,self).__init__()
  2876. self.name=self.__class__.__name__
  2877. self.mayReturnEmpty = True
  2878. self.mayIndexError = False
  2879. class GoToColumn(_PositionToken):
  2880. """Token to advance to a specific column of input text; useful for
  2881. tabular report scraping.
  2882. """
  2883. def __init__( self, colno ):
  2884. super(GoToColumn,self).__init__()
  2885. self.col = colno
  2886. def preParse( self, instring, loc ):
  2887. if col(loc,instring) != self.col:
  2888. instrlen = len(instring)
  2889. if self.ignoreExprs:
  2890. loc = self._skipIgnorables( instring, loc )
  2891. while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
  2892. loc += 1
  2893. return loc
  2894. def parseImpl( self, instring, loc, doActions=True ):
  2895. thiscol = col( loc, instring )
  2896. if thiscol > self.col:
  2897. raise ParseException( instring, loc, "Text not in expected column", self )
  2898. newloc = loc + self.col - thiscol
  2899. ret = instring[ loc: newloc ]
  2900. return newloc, ret
  2901. class LineStart(_PositionToken):
  2902. r"""Matches if current position is at the beginning of a line within
  2903. the parse string
  2904. Example::
  2905. test = '''\
  2906. AAA this line
  2907. AAA and this line
  2908. AAA but not this one
  2909. B AAA and definitely not this one
  2910. '''
  2911. for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
  2912. print(t)
  2913. prints::
  2914. ['AAA', ' this line']
  2915. ['AAA', ' and this line']
  2916. """
  2917. def __init__( self ):
  2918. super(LineStart,self).__init__()
  2919. self.errmsg = "Expected start of line"
  2920. def parseImpl( self, instring, loc, doActions=True ):
  2921. if col(loc, instring) == 1:
  2922. return loc, []
  2923. raise ParseException(instring, loc, self.errmsg, self)
  2924. class LineEnd(_PositionToken):
  2925. """Matches if current position is at the end of a line within the
  2926. parse string
  2927. """
  2928. def __init__( self ):
  2929. super(LineEnd,self).__init__()
  2930. self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
  2931. self.errmsg = "Expected end of line"
  2932. def parseImpl( self, instring, loc, doActions=True ):
  2933. if loc<len(instring):
  2934. if instring[loc] == "\n":
  2935. return loc+1, "\n"
  2936. else:
  2937. raise ParseException(instring, loc, self.errmsg, self)
  2938. elif loc == len(instring):
  2939. return loc+1, []
  2940. else:
  2941. raise ParseException(instring, loc, self.errmsg, self)
  2942. class StringStart(_PositionToken):
  2943. """Matches if current position is at the beginning of the parse
  2944. string
  2945. """
  2946. def __init__( self ):
  2947. super(StringStart,self).__init__()
  2948. self.errmsg = "Expected start of text"
  2949. def parseImpl( self, instring, loc, doActions=True ):
  2950. if loc != 0:
  2951. # see if entire string up to here is just whitespace and ignoreables
  2952. if loc != self.preParse( instring, 0 ):
  2953. raise ParseException(instring, loc, self.errmsg, self)
  2954. return loc, []
  2955. class StringEnd(_PositionToken):
  2956. """Matches if current position is at the end of the parse string
  2957. """
  2958. def __init__( self ):
  2959. super(StringEnd,self).__init__()
  2960. self.errmsg = "Expected end of text"
  2961. def parseImpl( self, instring, loc, doActions=True ):
  2962. if loc < len(instring):
  2963. raise ParseException(instring, loc, self.errmsg, self)
  2964. elif loc == len(instring):
  2965. return loc+1, []
  2966. elif loc > len(instring):
  2967. return loc, []
  2968. else:
  2969. raise ParseException(instring, loc, self.errmsg, self)
  2970. class WordStart(_PositionToken):
  2971. """Matches if the current position is at the beginning of a Word,
  2972. and is not preceded by any character in a given set of
  2973. ``wordChars`` (default= ``printables``). To emulate the
  2974. ``\b`` behavior of regular expressions, use
  2975. ``WordStart(alphanums)``. ``WordStart`` will also match at
  2976. the beginning of the string being parsed, or at the beginning of
  2977. a line.
  2978. """
  2979. def __init__(self, wordChars = printables):
  2980. super(WordStart,self).__init__()
  2981. self.wordChars = set(wordChars)
  2982. self.errmsg = "Not at the start of a word"
  2983. def parseImpl(self, instring, loc, doActions=True ):
  2984. if loc != 0:
  2985. if (instring[loc-1] in self.wordChars or
  2986. instring[loc] not in self.wordChars):
  2987. raise ParseException(instring, loc, self.errmsg, self)
  2988. return loc, []
  2989. class WordEnd(_PositionToken):
  2990. """Matches if the current position is at the end of a Word, and is
  2991. not followed by any character in a given set of ``wordChars``
  2992. (default= ``printables``). To emulate the ``\b`` behavior of
  2993. regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
  2994. will also match at the end of the string being parsed, or at the end
  2995. of a line.
  2996. """
  2997. def __init__(self, wordChars = printables):
  2998. super(WordEnd,self).__init__()
  2999. self.wordChars = set(wordChars)
  3000. self.skipWhitespace = False
  3001. self.errmsg = "Not at the end of a word"
  3002. def parseImpl(self, instring, loc, doActions=True ):
  3003. instrlen = len(instring)
  3004. if instrlen>0 and loc<instrlen:
  3005. if (instring[loc] in self.wordChars or
  3006. instring[loc-1] not in self.wordChars):
  3007. raise ParseException(instring, loc, self.errmsg, self)
  3008. return loc, []
  3009. class ParseExpression(ParserElement):
  3010. """Abstract subclass of ParserElement, for combining and
  3011. post-processing parsed tokens.
  3012. """
  3013. def __init__( self, exprs, savelist = False ):
  3014. super(ParseExpression,self).__init__(savelist)
  3015. if isinstance( exprs, _generatorType ):
  3016. exprs = list(exprs)
  3017. if isinstance( exprs, basestring ):
  3018. self.exprs = [ ParserElement._literalStringClass( exprs ) ]
  3019. elif isinstance( exprs, Iterable ):
  3020. exprs = list(exprs)
  3021. # if sequence of strings provided, wrap with Literal
  3022. if all(isinstance(expr, basestring) for expr in exprs):
  3023. exprs = map(ParserElement._literalStringClass, exprs)
  3024. self.exprs = list(exprs)
  3025. else:
  3026. try:
  3027. self.exprs = list( exprs )
  3028. except TypeError:
  3029. self.exprs = [ exprs ]
  3030. self.callPreparse = False
  3031. def __getitem__( self, i ):
  3032. return self.exprs[i]
  3033. def append( self, other ):
  3034. self.exprs.append( other )
  3035. self.strRepr = None
  3036. return self
  3037. def leaveWhitespace( self ):
  3038. """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
  3039. all contained expressions."""
  3040. self.skipWhitespace = False
  3041. self.exprs = [ e.copy() for e in self.exprs ]
  3042. for e in self.exprs:
  3043. e.leaveWhitespace()
  3044. return self
  3045. def ignore( self, other ):
  3046. if isinstance( other, Suppress ):
  3047. if other not in self.ignoreExprs:
  3048. super( ParseExpression, self).ignore( other )
  3049. for e in self.exprs:
  3050. e.ignore( self.ignoreExprs[-1] )
  3051. else:
  3052. super( ParseExpression, self).ignore( other )
  3053. for e in self.exprs:
  3054. e.ignore( self.ignoreExprs[-1] )
  3055. return self
  3056. def __str__( self ):
  3057. try:
  3058. return super(ParseExpression,self).__str__()
  3059. except Exception:
  3060. pass
  3061. if self.strRepr is None:
  3062. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
  3063. return self.strRepr
  3064. def streamline( self ):
  3065. super(ParseExpression,self).streamline()
  3066. for e in self.exprs:
  3067. e.streamline()
  3068. # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
  3069. # but only if there are no parse actions or resultsNames on the nested And's
  3070. # (likewise for Or's and MatchFirst's)
  3071. if ( len(self.exprs) == 2 ):
  3072. other = self.exprs[0]
  3073. if ( isinstance( other, self.__class__ ) and
  3074. not(other.parseAction) and
  3075. other.resultsName is None and
  3076. not other.debug ):
  3077. self.exprs = other.exprs[:] + [ self.exprs[1] ]
  3078. self.strRepr = None
  3079. self.mayReturnEmpty |= other.mayReturnEmpty
  3080. self.mayIndexError |= other.mayIndexError
  3081. other = self.exprs[-1]
  3082. if ( isinstance( other, self.__class__ ) and
  3083. not(other.parseAction) and
  3084. other.resultsName is None and
  3085. not other.debug ):
  3086. self.exprs = self.exprs[:-1] + other.exprs[:]
  3087. self.strRepr = None
  3088. self.mayReturnEmpty |= other.mayReturnEmpty
  3089. self.mayIndexError |= other.mayIndexError
  3090. self.errmsg = "Expected " + _ustr(self)
  3091. return self
  3092. def validate( self, validateTrace=[] ):
  3093. tmp = validateTrace[:]+[self]
  3094. for e in self.exprs:
  3095. e.validate(tmp)
  3096. self.checkRecursion( [] )
  3097. def copy(self):
  3098. ret = super(ParseExpression,self).copy()
  3099. ret.exprs = [e.copy() for e in self.exprs]
  3100. return ret
  3101. class And(ParseExpression):
  3102. """
  3103. Requires all given :class:`ParseExpression` s to be found in the given order.
  3104. Expressions may be separated by whitespace.
  3105. May be constructed using the ``'+'`` operator.
  3106. May also be constructed using the ``'-'`` operator, which will
  3107. suppress backtracking.
  3108. Example::
  3109. integer = Word(nums)
  3110. name_expr = OneOrMore(Word(alphas))
  3111. expr = And([integer("id"),name_expr("name"),integer("age")])
  3112. # more easily written as:
  3113. expr = integer("id") + name_expr("name") + integer("age")
  3114. """
  3115. class _ErrorStop(Empty):
  3116. def __init__(self, *args, **kwargs):
  3117. super(And._ErrorStop,self).__init__(*args, **kwargs)
  3118. self.name = '-'
  3119. self.leaveWhitespace()
  3120. def __init__( self, exprs, savelist = True ):
  3121. super(And,self).__init__(exprs, savelist)
  3122. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3123. self.setWhitespaceChars( self.exprs[0].whiteChars )
  3124. self.skipWhitespace = self.exprs[0].skipWhitespace
  3125. self.callPreparse = True
  3126. def streamline(self):
  3127. super(And, self).streamline()
  3128. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3129. return self
  3130. def parseImpl( self, instring, loc, doActions=True ):
  3131. # pass False as last arg to _parse for first element, since we already
  3132. # pre-parsed the string as part of our And pre-parsing
  3133. loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
  3134. errorStop = False
  3135. for e in self.exprs[1:]:
  3136. if isinstance(e, And._ErrorStop):
  3137. errorStop = True
  3138. continue
  3139. if errorStop:
  3140. try:
  3141. loc, exprtokens = e._parse( instring, loc, doActions )
  3142. except ParseSyntaxException:
  3143. raise
  3144. except ParseBaseException as pe:
  3145. pe.__traceback__ = None
  3146. raise ParseSyntaxException._from_exception(pe)
  3147. except IndexError:
  3148. raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
  3149. else:
  3150. loc, exprtokens = e._parse( instring, loc, doActions )
  3151. if exprtokens or exprtokens.haskeys():
  3152. resultlist += exprtokens
  3153. return loc, resultlist
  3154. def __iadd__(self, other ):
  3155. if isinstance( other, basestring ):
  3156. other = ParserElement._literalStringClass( other )
  3157. return self.append( other ) #And( [ self, other ] )
  3158. def checkRecursion( self, parseElementList ):
  3159. subRecCheckList = parseElementList[:] + [ self ]
  3160. for e in self.exprs:
  3161. e.checkRecursion( subRecCheckList )
  3162. if not e.mayReturnEmpty:
  3163. break
  3164. def __str__( self ):
  3165. if hasattr(self,"name"):
  3166. return self.name
  3167. if self.strRepr is None:
  3168. self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
  3169. return self.strRepr
  3170. class Or(ParseExpression):
  3171. """Requires that at least one :class:`ParseExpression` is found. If
  3172. two expressions match, the expression that matches the longest
  3173. string will be used. May be constructed using the ``'^'``
  3174. operator.
  3175. Example::
  3176. # construct Or using '^' operator
  3177. number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
  3178. print(number.searchString("123 3.1416 789"))
  3179. prints::
  3180. [['123'], ['3.1416'], ['789']]
  3181. """
  3182. def __init__( self, exprs, savelist = False ):
  3183. super(Or,self).__init__(exprs, savelist)
  3184. if self.exprs:
  3185. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3186. else:
  3187. self.mayReturnEmpty = True
  3188. def streamline(self):
  3189. super(Or, self).streamline()
  3190. if __compat__.collect_all_And_tokens:
  3191. self.saveAsList = any(e.saveAsList for e in self.exprs)
  3192. return self
  3193. def parseImpl( self, instring, loc, doActions=True ):
  3194. maxExcLoc = -1
  3195. maxException = None
  3196. matches = []
  3197. for e in self.exprs:
  3198. try:
  3199. loc2 = e.tryParse( instring, loc )
  3200. except ParseException as err:
  3201. err.__traceback__ = None
  3202. if err.loc > maxExcLoc:
  3203. maxException = err
  3204. maxExcLoc = err.loc
  3205. except IndexError:
  3206. if len(instring) > maxExcLoc:
  3207. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3208. maxExcLoc = len(instring)
  3209. else:
  3210. # save match among all matches, to retry longest to shortest
  3211. matches.append((loc2, e))
  3212. if matches:
  3213. matches.sort(key=lambda x: -x[0])
  3214. for _,e in matches:
  3215. try:
  3216. return e._parse( instring, loc, doActions )
  3217. except ParseException as err:
  3218. err.__traceback__ = None
  3219. if err.loc > maxExcLoc:
  3220. maxException = err
  3221. maxExcLoc = err.loc
  3222. if maxException is not None:
  3223. maxException.msg = self.errmsg
  3224. raise maxException
  3225. else:
  3226. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3227. def __ixor__(self, other ):
  3228. if isinstance( other, basestring ):
  3229. other = ParserElement._literalStringClass( other )
  3230. return self.append( other ) #Or( [ self, other ] )
  3231. def __str__( self ):
  3232. if hasattr(self,"name"):
  3233. return self.name
  3234. if self.strRepr is None:
  3235. self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
  3236. return self.strRepr
  3237. def checkRecursion( self, parseElementList ):
  3238. subRecCheckList = parseElementList[:] + [ self ]
  3239. for e in self.exprs:
  3240. e.checkRecursion( subRecCheckList )
  3241. class MatchFirst(ParseExpression):
  3242. """Requires that at least one :class:`ParseExpression` is found. If
  3243. two expressions match, the first one listed is the one that will
  3244. match. May be constructed using the ``'|'`` operator.
  3245. Example::
  3246. # construct MatchFirst using '|' operator
  3247. # watch the order of expressions to match
  3248. number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
  3249. print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
  3250. # put more selective expression first
  3251. number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
  3252. print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
  3253. """
  3254. def __init__( self, exprs, savelist = False ):
  3255. super(MatchFirst,self).__init__(exprs, savelist)
  3256. if self.exprs:
  3257. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3258. else:
  3259. self.mayReturnEmpty = True
  3260. def streamline(self):
  3261. super(MatchFirst, self).streamline()
  3262. if __compat__.collect_all_And_tokens:
  3263. self.saveAsList = any(e.saveAsList for e in self.exprs)
  3264. return self
  3265. def parseImpl( self, instring, loc, doActions=True ):
  3266. maxExcLoc = -1
  3267. maxException = None
  3268. for e in self.exprs:
  3269. try:
  3270. ret = e._parse( instring, loc, doActions )
  3271. return ret
  3272. except ParseException as err:
  3273. if err.loc > maxExcLoc:
  3274. maxException = err
  3275. maxExcLoc = err.loc
  3276. except IndexError:
  3277. if len(instring) > maxExcLoc:
  3278. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3279. maxExcLoc = len(instring)
  3280. # only got here if no expression matched, raise exception for match that made it the furthest
  3281. else:
  3282. if maxException is not None:
  3283. maxException.msg = self.errmsg
  3284. raise maxException
  3285. else:
  3286. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3287. def __ior__(self, other ):
  3288. if isinstance( other, basestring ):
  3289. other = ParserElement._literalStringClass( other )
  3290. return self.append( other ) #MatchFirst( [ self, other ] )
  3291. def __str__( self ):
  3292. if hasattr(self,"name"):
  3293. return self.name
  3294. if self.strRepr is None:
  3295. self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
  3296. return self.strRepr
  3297. def checkRecursion( self, parseElementList ):
  3298. subRecCheckList = parseElementList[:] + [ self ]
  3299. for e in self.exprs:
  3300. e.checkRecursion( subRecCheckList )
  3301. class Each(ParseExpression):
  3302. """Requires all given :class:`ParseExpression` s to be found, but in
  3303. any order. Expressions may be separated by whitespace.
  3304. May be constructed using the ``'&'`` operator.
  3305. Example::
  3306. color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
  3307. shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
  3308. integer = Word(nums)
  3309. shape_attr = "shape:" + shape_type("shape")
  3310. posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
  3311. color_attr = "color:" + color("color")
  3312. size_attr = "size:" + integer("size")
  3313. # use Each (using operator '&') to accept attributes in any order
  3314. # (shape and posn are required, color and size are optional)
  3315. shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
  3316. shape_spec.runTests('''
  3317. shape: SQUARE color: BLACK posn: 100, 120
  3318. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3319. color:GREEN size:20 shape:TRIANGLE posn:20,40
  3320. '''
  3321. )
  3322. prints::
  3323. shape: SQUARE color: BLACK posn: 100, 120
  3324. ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
  3325. - color: BLACK
  3326. - posn: ['100', ',', '120']
  3327. - x: 100
  3328. - y: 120
  3329. - shape: SQUARE
  3330. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3331. ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
  3332. - color: BLUE
  3333. - posn: ['50', ',', '80']
  3334. - x: 50
  3335. - y: 80
  3336. - shape: CIRCLE
  3337. - size: 50
  3338. color: GREEN size: 20 shape: TRIANGLE posn: 20,40
  3339. ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
  3340. - color: GREEN
  3341. - posn: ['20', ',', '40']
  3342. - x: 20
  3343. - y: 40
  3344. - shape: TRIANGLE
  3345. - size: 20
  3346. """
  3347. def __init__( self, exprs, savelist = True ):
  3348. super(Each,self).__init__(exprs, savelist)
  3349. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3350. self.skipWhitespace = True
  3351. self.initExprGroups = True
  3352. self.saveAsList = True
  3353. def streamline(self):
  3354. super(Each, self).streamline()
  3355. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3356. return self
  3357. def parseImpl( self, instring, loc, doActions=True ):
  3358. if self.initExprGroups:
  3359. self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
  3360. opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
  3361. opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
  3362. self.optionals = opt1 + opt2
  3363. self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
  3364. self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
  3365. self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
  3366. self.required += self.multirequired
  3367. self.initExprGroups = False
  3368. tmpLoc = loc
  3369. tmpReqd = self.required[:]
  3370. tmpOpt = self.optionals[:]
  3371. matchOrder = []
  3372. keepMatching = True
  3373. while keepMatching:
  3374. tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
  3375. failed = []
  3376. for e in tmpExprs:
  3377. try:
  3378. tmpLoc = e.tryParse( instring, tmpLoc )
  3379. except ParseException:
  3380. failed.append(e)
  3381. else:
  3382. matchOrder.append(self.opt1map.get(id(e),e))
  3383. if e in tmpReqd:
  3384. tmpReqd.remove(e)
  3385. elif e in tmpOpt:
  3386. tmpOpt.remove(e)
  3387. if len(failed) == len(tmpExprs):
  3388. keepMatching = False
  3389. if tmpReqd:
  3390. missing = ", ".join(_ustr(e) for e in tmpReqd)
  3391. raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
  3392. # add any unmatched Optionals, in case they have default values defined
  3393. matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
  3394. resultlist = []
  3395. for e in matchOrder:
  3396. loc,results = e._parse(instring,loc,doActions)
  3397. resultlist.append(results)
  3398. finalResults = sum(resultlist, ParseResults([]))
  3399. return loc, finalResults
  3400. def __str__( self ):
  3401. if hasattr(self,"name"):
  3402. return self.name
  3403. if self.strRepr is None:
  3404. self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
  3405. return self.strRepr
  3406. def checkRecursion( self, parseElementList ):
  3407. subRecCheckList = parseElementList[:] + [ self ]
  3408. for e in self.exprs:
  3409. e.checkRecursion( subRecCheckList )
  3410. class ParseElementEnhance(ParserElement):
  3411. """Abstract subclass of :class:`ParserElement`, for combining and
  3412. post-processing parsed tokens.
  3413. """
  3414. def __init__( self, expr, savelist=False ):
  3415. super(ParseElementEnhance,self).__init__(savelist)
  3416. if isinstance( expr, basestring ):
  3417. if issubclass(ParserElement._literalStringClass, Token):
  3418. expr = ParserElement._literalStringClass(expr)
  3419. else:
  3420. expr = ParserElement._literalStringClass(Literal(expr))
  3421. self.expr = expr
  3422. self.strRepr = None
  3423. if expr is not None:
  3424. self.mayIndexError = expr.mayIndexError
  3425. self.mayReturnEmpty = expr.mayReturnEmpty
  3426. self.setWhitespaceChars( expr.whiteChars )
  3427. self.skipWhitespace = expr.skipWhitespace
  3428. self.saveAsList = expr.saveAsList
  3429. self.callPreparse = expr.callPreparse
  3430. self.ignoreExprs.extend(expr.ignoreExprs)
  3431. def parseImpl( self, instring, loc, doActions=True ):
  3432. if self.expr is not None:
  3433. return self.expr._parse( instring, loc, doActions, callPreParse=False )
  3434. else:
  3435. raise ParseException("",loc,self.errmsg,self)
  3436. def leaveWhitespace( self ):
  3437. self.skipWhitespace = False
  3438. self.expr = self.expr.copy()
  3439. if self.expr is not None:
  3440. self.expr.leaveWhitespace()
  3441. return self
  3442. def ignore( self, other ):
  3443. if isinstance( other, Suppress ):
  3444. if other not in self.ignoreExprs:
  3445. super( ParseElementEnhance, self).ignore( other )
  3446. if self.expr is not None:
  3447. self.expr.ignore( self.ignoreExprs[-1] )
  3448. else:
  3449. super( ParseElementEnhance, self).ignore( other )
  3450. if self.expr is not None:
  3451. self.expr.ignore( self.ignoreExprs[-1] )
  3452. return self
  3453. def streamline( self ):
  3454. super(ParseElementEnhance,self).streamline()
  3455. if self.expr is not None:
  3456. self.expr.streamline()
  3457. return self
  3458. def checkRecursion( self, parseElementList ):
  3459. if self in parseElementList:
  3460. raise RecursiveGrammarException( parseElementList+[self] )
  3461. subRecCheckList = parseElementList[:] + [ self ]
  3462. if self.expr is not None:
  3463. self.expr.checkRecursion( subRecCheckList )
  3464. def validate( self, validateTrace=[] ):
  3465. tmp = validateTrace[:]+[self]
  3466. if self.expr is not None:
  3467. self.expr.validate(tmp)
  3468. self.checkRecursion( [] )
  3469. def __str__( self ):
  3470. try:
  3471. return super(ParseElementEnhance,self).__str__()
  3472. except Exception:
  3473. pass
  3474. if self.strRepr is None and self.expr is not None:
  3475. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
  3476. return self.strRepr
  3477. class FollowedBy(ParseElementEnhance):
  3478. """Lookahead matching of the given parse expression.
  3479. ``FollowedBy`` does *not* advance the parsing position within
  3480. the input string, it only verifies that the specified parse
  3481. expression matches at the current position. ``FollowedBy``
  3482. always returns a null token list. If any results names are defined
  3483. in the lookahead expression, those *will* be returned for access by
  3484. name.
  3485. Example::
  3486. # use FollowedBy to match a label only if it is followed by a ':'
  3487. data_word = Word(alphas)
  3488. label = data_word + FollowedBy(':')
  3489. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3490. OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
  3491. prints::
  3492. [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
  3493. """
  3494. def __init__( self, expr ):
  3495. super(FollowedBy,self).__init__(expr)
  3496. self.mayReturnEmpty = True
  3497. def parseImpl( self, instring, loc, doActions=True ):
  3498. _, ret = self.expr._parse(instring, loc, doActions=doActions)
  3499. del ret[:]
  3500. return loc, ret
  3501. class PrecededBy(ParseElementEnhance):
  3502. """Lookbehind matching of the given parse expression.
  3503. ``PrecededBy`` does not advance the parsing position within the
  3504. input string, it only verifies that the specified parse expression
  3505. matches prior to the current position. ``PrecededBy`` always
  3506. returns a null token list, but if a results name is defined on the
  3507. given expression, it is returned.
  3508. Parameters:
  3509. - expr - expression that must match prior to the current parse
  3510. location
  3511. - retreat - (default= ``None``) - (int) maximum number of characters
  3512. to lookbehind prior to the current parse location
  3513. If the lookbehind expression is a string, Literal, Keyword, or
  3514. a Word or CharsNotIn with a specified exact or maximum length, then
  3515. the retreat parameter is not required. Otherwise, retreat must be
  3516. specified to give a maximum number of characters to look back from
  3517. the current parse position for a lookbehind match.
  3518. Example::
  3519. # VB-style variable names with type prefixes
  3520. int_var = PrecededBy("#") + pyparsing_common.identifier
  3521. str_var = PrecededBy("$") + pyparsing_common.identifier
  3522. """
  3523. def __init__(self, expr, retreat=None):
  3524. super(PrecededBy, self).__init__(expr)
  3525. self.expr = self.expr().leaveWhitespace()
  3526. self.mayReturnEmpty = True
  3527. self.mayIndexError = False
  3528. self.exact = False
  3529. if isinstance(expr, str):
  3530. retreat = len(expr)
  3531. self.exact = True
  3532. elif isinstance(expr, (Literal, Keyword)):
  3533. retreat = expr.matchLen
  3534. self.exact = True
  3535. elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
  3536. retreat = expr.maxLen
  3537. self.exact = True
  3538. elif isinstance(expr, _PositionToken):
  3539. retreat = 0
  3540. self.exact = True
  3541. self.retreat = retreat
  3542. self.errmsg = "not preceded by " + str(expr)
  3543. self.skipWhitespace = False
  3544. def parseImpl(self, instring, loc=0, doActions=True):
  3545. if self.exact:
  3546. if loc < self.retreat:
  3547. raise ParseException(instring, loc, self.errmsg)
  3548. start = loc - self.retreat
  3549. _, ret = self.expr._parse(instring, start)
  3550. else:
  3551. # retreat specified a maximum lookbehind window, iterate
  3552. test_expr = self.expr + StringEnd()
  3553. instring_slice = instring[:loc]
  3554. last_expr = ParseException(instring, loc, self.errmsg)
  3555. for offset in range(1, min(loc, self.retreat+1)):
  3556. try:
  3557. _, ret = test_expr._parse(instring_slice, loc-offset)
  3558. except ParseBaseException as pbe:
  3559. last_expr = pbe
  3560. else:
  3561. break
  3562. else:
  3563. raise last_expr
  3564. # return empty list of tokens, but preserve any defined results names
  3565. del ret[:]
  3566. return loc, ret
  3567. class NotAny(ParseElementEnhance):
  3568. """Lookahead to disallow matching with the given parse expression.
  3569. ``NotAny`` does *not* advance the parsing position within the
  3570. input string, it only verifies that the specified parse expression
  3571. does *not* match at the current position. Also, ``NotAny`` does
  3572. *not* skip over leading whitespace. ``NotAny`` always returns
  3573. a null token list. May be constructed using the '~' operator.
  3574. Example::
  3575. AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
  3576. # take care not to mistake keywords for identifiers
  3577. ident = ~(AND | OR | NOT) + Word(alphas)
  3578. boolean_term = Optional(NOT) + ident
  3579. # very crude boolean expression - to support parenthesis groups and
  3580. # operation hierarchy, use infixNotation
  3581. boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
  3582. # integers that are followed by "." are actually floats
  3583. integer = Word(nums) + ~Char(".")
  3584. """
  3585. def __init__( self, expr ):
  3586. super(NotAny,self).__init__(expr)
  3587. #~ self.leaveWhitespace()
  3588. self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
  3589. self.mayReturnEmpty = True
  3590. self.errmsg = "Found unwanted token, "+_ustr(self.expr)
  3591. def parseImpl( self, instring, loc, doActions=True ):
  3592. if self.expr.canParseNext(instring, loc):
  3593. raise ParseException(instring, loc, self.errmsg, self)
  3594. return loc, []
  3595. def __str__( self ):
  3596. if hasattr(self,"name"):
  3597. return self.name
  3598. if self.strRepr is None:
  3599. self.strRepr = "~{" + _ustr(self.expr) + "}"
  3600. return self.strRepr
  3601. class _MultipleMatch(ParseElementEnhance):
  3602. def __init__( self, expr, stopOn=None):
  3603. super(_MultipleMatch, self).__init__(expr)
  3604. self.saveAsList = True
  3605. ender = stopOn
  3606. if isinstance(ender, basestring):
  3607. ender = ParserElement._literalStringClass(ender)
  3608. self.not_ender = ~ender if ender is not None else None
  3609. def parseImpl( self, instring, loc, doActions=True ):
  3610. self_expr_parse = self.expr._parse
  3611. self_skip_ignorables = self._skipIgnorables
  3612. check_ender = self.not_ender is not None
  3613. if check_ender:
  3614. try_not_ender = self.not_ender.tryParse
  3615. # must be at least one (but first see if we are the stopOn sentinel;
  3616. # if so, fail)
  3617. if check_ender:
  3618. try_not_ender(instring, loc)
  3619. loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
  3620. try:
  3621. hasIgnoreExprs = (not not self.ignoreExprs)
  3622. while 1:
  3623. if check_ender:
  3624. try_not_ender(instring, loc)
  3625. if hasIgnoreExprs:
  3626. preloc = self_skip_ignorables( instring, loc )
  3627. else:
  3628. preloc = loc
  3629. loc, tmptokens = self_expr_parse( instring, preloc, doActions )
  3630. if tmptokens or tmptokens.haskeys():
  3631. tokens += tmptokens
  3632. except (ParseException,IndexError):
  3633. pass
  3634. return loc, tokens
  3635. class OneOrMore(_MultipleMatch):
  3636. """Repetition of one or more of the given expression.
  3637. Parameters:
  3638. - expr - expression that must match one or more times
  3639. - stopOn - (default= ``None``) - expression for a terminating sentinel
  3640. (only required if the sentinel would ordinarily match the repetition
  3641. expression)
  3642. Example::
  3643. data_word = Word(alphas)
  3644. label = data_word + FollowedBy(':')
  3645. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3646. text = "shape: SQUARE posn: upper left color: BLACK"
  3647. OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
  3648. # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
  3649. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3650. OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
  3651. # could also be written as
  3652. (attr_expr * (1,)).parseString(text).pprint()
  3653. """
  3654. def __str__( self ):
  3655. if hasattr(self,"name"):
  3656. return self.name
  3657. if self.strRepr is None:
  3658. self.strRepr = "{" + _ustr(self.expr) + "}..."
  3659. return self.strRepr
  3660. class ZeroOrMore(_MultipleMatch):
  3661. """Optional repetition of zero or more of the given expression.
  3662. Parameters:
  3663. - expr - expression that must match zero or more times
  3664. - stopOn - (default= ``None``) - expression for a terminating sentinel
  3665. (only required if the sentinel would ordinarily match the repetition
  3666. expression)
  3667. Example: similar to :class:`OneOrMore`
  3668. """
  3669. def __init__( self, expr, stopOn=None):
  3670. super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
  3671. self.mayReturnEmpty = True
  3672. def parseImpl( self, instring, loc, doActions=True ):
  3673. try:
  3674. return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
  3675. except (ParseException,IndexError):
  3676. return loc, []
  3677. def __str__( self ):
  3678. if hasattr(self,"name"):
  3679. return self.name
  3680. if self.strRepr is None:
  3681. self.strRepr = "[" + _ustr(self.expr) + "]..."
  3682. return self.strRepr
  3683. class _NullToken(object):
  3684. def __bool__(self):
  3685. return False
  3686. __nonzero__ = __bool__
  3687. def __str__(self):
  3688. return ""
  3689. _optionalNotMatched = _NullToken()
  3690. class Optional(ParseElementEnhance):
  3691. """Optional matching of the given expression.
  3692. Parameters:
  3693. - expr - expression that must match zero or more times
  3694. - default (optional) - value to be returned if the optional expression is not found.
  3695. Example::
  3696. # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
  3697. zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
  3698. zip.runTests('''
  3699. # traditional ZIP code
  3700. 12345
  3701. # ZIP+4 form
  3702. 12101-0001
  3703. # invalid ZIP
  3704. 98765-
  3705. ''')
  3706. prints::
  3707. # traditional ZIP code
  3708. 12345
  3709. ['12345']
  3710. # ZIP+4 form
  3711. 12101-0001
  3712. ['12101-0001']
  3713. # invalid ZIP
  3714. 98765-
  3715. ^
  3716. FAIL: Expected end of text (at char 5), (line:1, col:6)
  3717. """
  3718. def __init__( self, expr, default=_optionalNotMatched ):
  3719. super(Optional,self).__init__( expr, savelist=False )
  3720. self.saveAsList = self.expr.saveAsList
  3721. self.defaultValue = default
  3722. self.mayReturnEmpty = True
  3723. def parseImpl( self, instring, loc, doActions=True ):
  3724. try:
  3725. loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  3726. except (ParseException,IndexError):
  3727. if self.defaultValue is not _optionalNotMatched:
  3728. if self.expr.resultsName:
  3729. tokens = ParseResults([ self.defaultValue ])
  3730. tokens[self.expr.resultsName] = self.defaultValue
  3731. else:
  3732. tokens = [ self.defaultValue ]
  3733. else:
  3734. tokens = []
  3735. return loc, tokens
  3736. def __str__( self ):
  3737. if hasattr(self,"name"):
  3738. return self.name
  3739. if self.strRepr is None:
  3740. self.strRepr = "[" + _ustr(self.expr) + "]"
  3741. return self.strRepr
  3742. class SkipTo(ParseElementEnhance):
  3743. """Token for skipping over all undefined text until the matched
  3744. expression is found.
  3745. Parameters:
  3746. - expr - target expression marking the end of the data to be skipped
  3747. - include - (default= ``False``) if True, the target expression is also parsed
  3748. (the skipped text and target expression are returned as a 2-element list).
  3749. - ignore - (default= ``None``) used to define grammars (typically quoted strings and
  3750. comments) that might contain false matches to the target expression
  3751. - failOn - (default= ``None``) define expressions that are not allowed to be
  3752. included in the skipped test; if found before the target expression is found,
  3753. the SkipTo is not a match
  3754. Example::
  3755. report = '''
  3756. Outstanding Issues Report - 1 Jan 2000
  3757. # | Severity | Description | Days Open
  3758. -----+----------+-------------------------------------------+-----------
  3759. 101 | Critical | Intermittent system crash | 6
  3760. 94 | Cosmetic | Spelling error on Login ('log|n') | 14
  3761. 79 | Minor | System slow when running too many reports | 47
  3762. '''
  3763. integer = Word(nums)
  3764. SEP = Suppress('|')
  3765. # use SkipTo to simply match everything up until the next SEP
  3766. # - ignore quoted strings, so that a '|' character inside a quoted string does not match
  3767. # - parse action will call token.strip() for each matched token, i.e., the description body
  3768. string_data = SkipTo(SEP, ignore=quotedString)
  3769. string_data.setParseAction(tokenMap(str.strip))
  3770. ticket_expr = (integer("issue_num") + SEP
  3771. + string_data("sev") + SEP
  3772. + string_data("desc") + SEP
  3773. + integer("days_open"))
  3774. for tkt in ticket_expr.searchString(report):
  3775. print tkt.dump()
  3776. prints::
  3777. ['101', 'Critical', 'Intermittent system crash', '6']
  3778. - days_open: 6
  3779. - desc: Intermittent system crash
  3780. - issue_num: 101
  3781. - sev: Critical
  3782. ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
  3783. - days_open: 14
  3784. - desc: Spelling error on Login ('log|n')
  3785. - issue_num: 94
  3786. - sev: Cosmetic
  3787. ['79', 'Minor', 'System slow when running too many reports', '47']
  3788. - days_open: 47
  3789. - desc: System slow when running too many reports
  3790. - issue_num: 79
  3791. - sev: Minor
  3792. """
  3793. def __init__( self, other, include=False, ignore=None, failOn=None ):
  3794. super( SkipTo, self ).__init__( other )
  3795. self.ignoreExpr = ignore
  3796. self.mayReturnEmpty = True
  3797. self.mayIndexError = False
  3798. self.includeMatch = include
  3799. self.saveAsList = False
  3800. if isinstance(failOn, basestring):
  3801. self.failOn = ParserElement._literalStringClass(failOn)
  3802. else:
  3803. self.failOn = failOn
  3804. self.errmsg = "No match found for "+_ustr(self.expr)
  3805. def parseImpl( self, instring, loc, doActions=True ):
  3806. startloc = loc
  3807. instrlen = len(instring)
  3808. expr = self.expr
  3809. expr_parse = self.expr._parse
  3810. self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
  3811. self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
  3812. tmploc = loc
  3813. while tmploc <= instrlen:
  3814. if self_failOn_canParseNext is not None:
  3815. # break if failOn expression matches
  3816. if self_failOn_canParseNext(instring, tmploc):
  3817. break
  3818. if self_ignoreExpr_tryParse is not None:
  3819. # advance past ignore expressions
  3820. while 1:
  3821. try:
  3822. tmploc = self_ignoreExpr_tryParse(instring, tmploc)
  3823. except ParseBaseException:
  3824. break
  3825. try:
  3826. expr_parse(instring, tmploc, doActions=False, callPreParse=False)
  3827. except (ParseException, IndexError):
  3828. # no match, advance loc in string
  3829. tmploc += 1
  3830. else:
  3831. # matched skipto expr, done
  3832. break
  3833. else:
  3834. # ran off the end of the input string without matching skipto expr, fail
  3835. raise ParseException(instring, loc, self.errmsg, self)
  3836. # build up return values
  3837. loc = tmploc
  3838. skiptext = instring[startloc:loc]
  3839. skipresult = ParseResults(skiptext)
  3840. if self.includeMatch:
  3841. loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
  3842. skipresult += mat
  3843. return loc, skipresult
  3844. class Forward(ParseElementEnhance):
  3845. """Forward declaration of an expression to be defined later -
  3846. used for recursive grammars, such as algebraic infix notation.
  3847. When the expression is known, it is assigned to the ``Forward``
  3848. variable using the '<<' operator.
  3849. Note: take care when assigning to ``Forward`` not to overlook
  3850. precedence of operators.
  3851. Specifically, '|' has a lower precedence than '<<', so that::
  3852. fwdExpr << a | b | c
  3853. will actually be evaluated as::
  3854. (fwdExpr << a) | b | c
  3855. thereby leaving b and c out as parseable alternatives. It is recommended that you
  3856. explicitly group the values inserted into the ``Forward``::
  3857. fwdExpr << (a | b | c)
  3858. Converting to use the '<<=' operator instead will avoid this problem.
  3859. See :class:`ParseResults.pprint` for an example of a recursive
  3860. parser created using ``Forward``.
  3861. """
  3862. def __init__( self, other=None ):
  3863. super(Forward,self).__init__( other, savelist=False )
  3864. def __lshift__( self, other ):
  3865. if isinstance( other, basestring ):
  3866. other = ParserElement._literalStringClass(other)
  3867. self.expr = other
  3868. self.strRepr = None
  3869. self.mayIndexError = self.expr.mayIndexError
  3870. self.mayReturnEmpty = self.expr.mayReturnEmpty
  3871. self.setWhitespaceChars( self.expr.whiteChars )
  3872. self.skipWhitespace = self.expr.skipWhitespace
  3873. self.saveAsList = self.expr.saveAsList
  3874. self.ignoreExprs.extend(self.expr.ignoreExprs)
  3875. return self
  3876. def __ilshift__(self, other):
  3877. return self << other
  3878. def leaveWhitespace( self ):
  3879. self.skipWhitespace = False
  3880. return self
  3881. def streamline( self ):
  3882. if not self.streamlined:
  3883. self.streamlined = True
  3884. if self.expr is not None:
  3885. self.expr.streamline()
  3886. return self
  3887. def validate( self, validateTrace=[] ):
  3888. if self not in validateTrace:
  3889. tmp = validateTrace[:]+[self]
  3890. if self.expr is not None:
  3891. self.expr.validate(tmp)
  3892. self.checkRecursion([])
  3893. def __str__( self ):
  3894. if hasattr(self,"name"):
  3895. return self.name
  3896. # Avoid infinite recursion by setting a temporary name
  3897. self.name = self.__class__.__name__ + ": ..."
  3898. # Use the string representation of main expression.
  3899. try:
  3900. if self.expr is not None:
  3901. retString = _ustr(self.expr)
  3902. else:
  3903. retString = "None"
  3904. finally:
  3905. del self.name
  3906. return self.__class__.__name__ + ": " + retString
  3907. def copy(self):
  3908. if self.expr is not None:
  3909. return super(Forward,self).copy()
  3910. else:
  3911. ret = Forward()
  3912. ret <<= self
  3913. return ret
  3914. class TokenConverter(ParseElementEnhance):
  3915. """
  3916. Abstract subclass of :class:`ParseExpression`, for converting parsed results.
  3917. """
  3918. def __init__( self, expr, savelist=False ):
  3919. super(TokenConverter,self).__init__( expr )#, savelist )
  3920. self.saveAsList = False
  3921. class Combine(TokenConverter):
  3922. """Converter to concatenate all matching tokens to a single string.
  3923. By default, the matching patterns must also be contiguous in the
  3924. input string; this can be disabled by specifying
  3925. ``'adjacent=False'`` in the constructor.
  3926. Example::
  3927. real = Word(nums) + '.' + Word(nums)
  3928. print(real.parseString('3.1416')) # -> ['3', '.', '1416']
  3929. # will also erroneously match the following
  3930. print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
  3931. real = Combine(Word(nums) + '.' + Word(nums))
  3932. print(real.parseString('3.1416')) # -> ['3.1416']
  3933. # no match when there are internal spaces
  3934. print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
  3935. """
  3936. def __init__( self, expr, joinString="", adjacent=True ):
  3937. super(Combine,self).__init__( expr )
  3938. # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
  3939. if adjacent:
  3940. self.leaveWhitespace()
  3941. self.adjacent = adjacent
  3942. self.skipWhitespace = True
  3943. self.joinString = joinString
  3944. self.callPreparse = True
  3945. def ignore( self, other ):
  3946. if self.adjacent:
  3947. ParserElement.ignore(self, other)
  3948. else:
  3949. super( Combine, self).ignore( other )
  3950. return self
  3951. def postParse( self, instring, loc, tokenlist ):
  3952. retToks = tokenlist.copy()
  3953. del retToks[:]
  3954. retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
  3955. if self.resultsName and retToks.haskeys():
  3956. return [ retToks ]
  3957. else:
  3958. return retToks
  3959. class Group(TokenConverter):
  3960. """Converter to return the matched tokens as a list - useful for
  3961. returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
  3962. Example::
  3963. ident = Word(alphas)
  3964. num = Word(nums)
  3965. term = ident | num
  3966. func = ident + Optional(delimitedList(term))
  3967. print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
  3968. func = ident + Group(Optional(delimitedList(term)))
  3969. print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
  3970. """
  3971. def __init__( self, expr ):
  3972. super(Group,self).__init__( expr )
  3973. self.saveAsList = True
  3974. def postParse( self, instring, loc, tokenlist ):
  3975. return [ tokenlist ]
  3976. class Dict(TokenConverter):
  3977. """Converter to return a repetitive expression as a list, but also
  3978. as a dictionary. Each element can also be referenced using the first
  3979. token in the expression as its key. Useful for tabular report
  3980. scraping when the first column can be used as a item key.
  3981. Example::
  3982. data_word = Word(alphas)
  3983. label = data_word + FollowedBy(':')
  3984. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3985. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3986. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3987. # print attributes as plain groups
  3988. print(OneOrMore(attr_expr).parseString(text).dump())
  3989. # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
  3990. result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
  3991. print(result.dump())
  3992. # access named fields as dict entries, or output as dict
  3993. print(result['shape'])
  3994. print(result.asDict())
  3995. prints::
  3996. ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
  3997. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  3998. - color: light blue
  3999. - posn: upper left
  4000. - shape: SQUARE
  4001. - texture: burlap
  4002. SQUARE
  4003. {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
  4004. See more examples at :class:`ParseResults` of accessing fields by results name.
  4005. """
  4006. def __init__( self, expr ):
  4007. super(Dict,self).__init__( expr )
  4008. self.saveAsList = True
  4009. def postParse( self, instring, loc, tokenlist ):
  4010. for i,tok in enumerate(tokenlist):
  4011. if len(tok) == 0:
  4012. continue
  4013. ikey = tok[0]
  4014. if isinstance(ikey,int):
  4015. ikey = _ustr(tok[0]).strip()
  4016. if len(tok)==1:
  4017. tokenlist[ikey] = _ParseResultsWithOffset("",i)
  4018. elif len(tok)==2 and not isinstance(tok[1],ParseResults):
  4019. tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
  4020. else:
  4021. dictvalue = tok.copy() #ParseResults(i)
  4022. del dictvalue[0]
  4023. if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
  4024. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
  4025. else:
  4026. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
  4027. if self.resultsName:
  4028. return [ tokenlist ]
  4029. else:
  4030. return tokenlist
  4031. class Suppress(TokenConverter):
  4032. """Converter for ignoring the results of a parsed expression.
  4033. Example::
  4034. source = "a, b, c,d"
  4035. wd = Word(alphas)
  4036. wd_list1 = wd + ZeroOrMore(',' + wd)
  4037. print(wd_list1.parseString(source))
  4038. # often, delimiters that are useful during parsing are just in the
  4039. # way afterward - use Suppress to keep them out of the parsed output
  4040. wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
  4041. print(wd_list2.parseString(source))
  4042. prints::
  4043. ['a', ',', 'b', ',', 'c', ',', 'd']
  4044. ['a', 'b', 'c', 'd']
  4045. (See also :class:`delimitedList`.)
  4046. """
  4047. def postParse( self, instring, loc, tokenlist ):
  4048. return []
  4049. def suppress( self ):
  4050. return self
  4051. class OnlyOnce(object):
  4052. """Wrapper for parse actions, to ensure they are only called once.
  4053. """
  4054. def __init__(self, methodCall):
  4055. self.callable = _trim_arity(methodCall)
  4056. self.called = False
  4057. def __call__(self,s,l,t):
  4058. if not self.called:
  4059. results = self.callable(s,l,t)
  4060. self.called = True
  4061. return results
  4062. raise ParseException(s,l,"")
  4063. def reset(self):
  4064. self.called = False
  4065. def traceParseAction(f):
  4066. """Decorator for debugging parse actions.
  4067. When the parse action is called, this decorator will print
  4068. ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
  4069. When the parse action completes, the decorator will print
  4070. ``"<<"`` followed by the returned value, or any exception that the parse action raised.
  4071. Example::
  4072. wd = Word(alphas)
  4073. @traceParseAction
  4074. def remove_duplicate_chars(tokens):
  4075. return ''.join(sorted(set(''.join(tokens))))
  4076. wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
  4077. print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
  4078. prints::
  4079. >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
  4080. <<leaving remove_duplicate_chars (ret: 'dfjkls')
  4081. ['dfjkls']
  4082. """
  4083. f = _trim_arity(f)
  4084. def z(*paArgs):
  4085. thisFunc = f.__name__
  4086. s,l,t = paArgs[-3:]
  4087. if len(paArgs)>3:
  4088. thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
  4089. sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
  4090. try:
  4091. ret = f(*paArgs)
  4092. except Exception as exc:
  4093. sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
  4094. raise
  4095. sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
  4096. return ret
  4097. try:
  4098. z.__name__ = f.__name__
  4099. except AttributeError:
  4100. pass
  4101. return z
  4102. #
  4103. # global helpers
  4104. #
  4105. def delimitedList( expr, delim=",", combine=False ):
  4106. """Helper to define a delimited list of expressions - the delimiter
  4107. defaults to ','. By default, the list elements and delimiters can
  4108. have intervening whitespace, and comments, but this can be
  4109. overridden by passing ``combine=True`` in the constructor. If
  4110. ``combine`` is set to ``True``, the matching tokens are
  4111. returned as a single token string, with the delimiters included;
  4112. otherwise, the matching tokens are returned as a list of tokens,
  4113. with the delimiters suppressed.
  4114. Example::
  4115. delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
  4116. delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
  4117. """
  4118. dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
  4119. if combine:
  4120. return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
  4121. else:
  4122. return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
  4123. def countedArray( expr, intExpr=None ):
  4124. """Helper to define a counted list of expressions.
  4125. This helper defines a pattern of the form::
  4126. integer expr expr expr...
  4127. where the leading integer tells how many expr expressions follow.
  4128. The matched tokens returns the array of expr tokens as a list - the
  4129. leading count token is suppressed.
  4130. If ``intExpr`` is specified, it should be a pyparsing expression
  4131. that produces an integer value.
  4132. Example::
  4133. countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
  4134. # in this parser, the leading integer value is given in binary,
  4135. # '10' indicating that 2 values are in the array
  4136. binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
  4137. countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
  4138. """
  4139. arrayExpr = Forward()
  4140. def countFieldParseAction(s,l,t):
  4141. n = t[0]
  4142. arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
  4143. return []
  4144. if intExpr is None:
  4145. intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
  4146. else:
  4147. intExpr = intExpr.copy()
  4148. intExpr.setName("arrayLen")
  4149. intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
  4150. return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
  4151. def _flatten(L):
  4152. ret = []
  4153. for i in L:
  4154. if isinstance(i,list):
  4155. ret.extend(_flatten(i))
  4156. else:
  4157. ret.append(i)
  4158. return ret
  4159. def matchPreviousLiteral(expr):
  4160. """Helper to define an expression that is indirectly defined from
  4161. the tokens matched in a previous expression, that is, it looks for
  4162. a 'repeat' of a previous expression. For example::
  4163. first = Word(nums)
  4164. second = matchPreviousLiteral(first)
  4165. matchExpr = first + ":" + second
  4166. will match ``"1:1"``, but not ``"1:2"``. Because this
  4167. matches a previous literal, will also match the leading
  4168. ``"1:1"`` in ``"1:10"``. If this is not desired, use
  4169. :class:`matchPreviousExpr`. Do *not* use with packrat parsing
  4170. enabled.
  4171. """
  4172. rep = Forward()
  4173. def copyTokenToRepeater(s,l,t):
  4174. if t:
  4175. if len(t) == 1:
  4176. rep << t[0]
  4177. else:
  4178. # flatten t tokens
  4179. tflat = _flatten(t.asList())
  4180. rep << And(Literal(tt) for tt in tflat)
  4181. else:
  4182. rep << Empty()
  4183. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  4184. rep.setName('(prev) ' + _ustr(expr))
  4185. return rep
  4186. def matchPreviousExpr(expr):
  4187. """Helper to define an expression that is indirectly defined from
  4188. the tokens matched in a previous expression, that is, it looks for
  4189. a 'repeat' of a previous expression. For example::
  4190. first = Word(nums)
  4191. second = matchPreviousExpr(first)
  4192. matchExpr = first + ":" + second
  4193. will match ``"1:1"``, but not ``"1:2"``. Because this
  4194. matches by expressions, will *not* match the leading ``"1:1"``
  4195. in ``"1:10"``; the expressions are evaluated first, and then
  4196. compared, so ``"1"`` is compared with ``"10"``. Do *not* use
  4197. with packrat parsing enabled.
  4198. """
  4199. rep = Forward()
  4200. e2 = expr.copy()
  4201. rep <<= e2
  4202. def copyTokenToRepeater(s,l,t):
  4203. matchTokens = _flatten(t.asList())
  4204. def mustMatchTheseTokens(s,l,t):
  4205. theseTokens = _flatten(t.asList())
  4206. if theseTokens != matchTokens:
  4207. raise ParseException("",0,"")
  4208. rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
  4209. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  4210. rep.setName('(prev) ' + _ustr(expr))
  4211. return rep
  4212. def _escapeRegexRangeChars(s):
  4213. #~ escape these chars: ^-]
  4214. for c in r"\^-]":
  4215. s = s.replace(c,_bslash+c)
  4216. s = s.replace("\n",r"\n")
  4217. s = s.replace("\t",r"\t")
  4218. return _ustr(s)
  4219. def oneOf( strs, caseless=False, useRegex=True ):
  4220. """Helper to quickly define a set of alternative Literals, and makes
  4221. sure to do longest-first testing when there is a conflict,
  4222. regardless of the input order, but returns
  4223. a :class:`MatchFirst` for best performance.
  4224. Parameters:
  4225. - strs - a string of space-delimited literals, or a collection of
  4226. string literals
  4227. - caseless - (default= ``False``) - treat all literals as
  4228. caseless
  4229. - useRegex - (default= ``True``) - as an optimization, will
  4230. generate a Regex object; otherwise, will generate
  4231. a :class:`MatchFirst` object (if ``caseless=True``, or if
  4232. creating a :class:`Regex` raises an exception)
  4233. Example::
  4234. comp_oper = oneOf("< = > <= >= !=")
  4235. var = Word(alphas)
  4236. number = Word(nums)
  4237. term = var | number
  4238. comparison_expr = term + comp_oper + term
  4239. print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
  4240. prints::
  4241. [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
  4242. """
  4243. if caseless:
  4244. isequal = ( lambda a,b: a.upper() == b.upper() )
  4245. masks = ( lambda a,b: b.upper().startswith(a.upper()) )
  4246. parseElementClass = CaselessLiteral
  4247. else:
  4248. isequal = ( lambda a,b: a == b )
  4249. masks = ( lambda a,b: b.startswith(a) )
  4250. parseElementClass = Literal
  4251. symbols = []
  4252. if isinstance(strs,basestring):
  4253. symbols = strs.split()
  4254. elif isinstance(strs, Iterable):
  4255. symbols = list(strs)
  4256. else:
  4257. warnings.warn("Invalid argument to oneOf, expected string or iterable",
  4258. SyntaxWarning, stacklevel=2)
  4259. if not symbols:
  4260. return NoMatch()
  4261. i = 0
  4262. while i < len(symbols)-1:
  4263. cur = symbols[i]
  4264. for j,other in enumerate(symbols[i+1:]):
  4265. if ( isequal(other, cur) ):
  4266. del symbols[i+j+1]
  4267. break
  4268. elif ( masks(cur, other) ):
  4269. del symbols[i+j+1]
  4270. symbols.insert(i,other)
  4271. cur = other
  4272. break
  4273. else:
  4274. i += 1
  4275. if not caseless and useRegex:
  4276. #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
  4277. try:
  4278. if len(symbols)==len("".join(symbols)):
  4279. return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
  4280. else:
  4281. return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
  4282. except Exception:
  4283. warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
  4284. SyntaxWarning, stacklevel=2)
  4285. # last resort, just use MatchFirst
  4286. return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
  4287. def dictOf( key, value ):
  4288. """Helper to easily and clearly define a dictionary by specifying
  4289. the respective patterns for the key and value. Takes care of
  4290. defining the :class:`Dict`, :class:`ZeroOrMore`, and
  4291. :class:`Group` tokens in the proper order. The key pattern
  4292. can include delimiting markers or punctuation, as long as they are
  4293. suppressed, thereby leaving the significant key text. The value
  4294. pattern can include named results, so that the :class:`Dict` results
  4295. can include named token fields.
  4296. Example::
  4297. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  4298. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  4299. print(OneOrMore(attr_expr).parseString(text).dump())
  4300. attr_label = label
  4301. attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
  4302. # similar to Dict, but simpler call format
  4303. result = dictOf(attr_label, attr_value).parseString(text)
  4304. print(result.dump())
  4305. print(result['shape'])
  4306. print(result.shape) # object attribute access works too
  4307. print(result.asDict())
  4308. prints::
  4309. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  4310. - color: light blue
  4311. - posn: upper left
  4312. - shape: SQUARE
  4313. - texture: burlap
  4314. SQUARE
  4315. SQUARE
  4316. {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
  4317. """
  4318. return Dict(OneOrMore(Group(key + value)))
  4319. def originalTextFor(expr, asString=True):
  4320. """Helper to return the original, untokenized text for a given
  4321. expression. Useful to restore the parsed fields of an HTML start
  4322. tag into the raw tag text itself, or to revert separate tokens with
  4323. intervening whitespace back to the original matching input text. By
  4324. default, returns astring containing the original parsed text.
  4325. If the optional ``asString`` argument is passed as
  4326. ``False``, then the return value is
  4327. a :class:`ParseResults` containing any results names that
  4328. were originally matched, and a single token containing the original
  4329. matched text from the input string. So if the expression passed to
  4330. :class:`originalTextFor` contains expressions with defined
  4331. results names, you must set ``asString`` to ``False`` if you
  4332. want to preserve those results name values.
  4333. Example::
  4334. src = "this is test <b> bold <i>text</i> </b> normal text "
  4335. for tag in ("b","i"):
  4336. opener,closer = makeHTMLTags(tag)
  4337. patt = originalTextFor(opener + SkipTo(closer) + closer)
  4338. print(patt.searchString(src)[0])
  4339. prints::
  4340. ['<b> bold <i>text</i> </b>']
  4341. ['<i>text</i>']
  4342. """
  4343. locMarker = Empty().setParseAction(lambda s,loc,t: loc)
  4344. endlocMarker = locMarker.copy()
  4345. endlocMarker.callPreparse = False
  4346. matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
  4347. if asString:
  4348. extractText = lambda s,l,t: s[t._original_start:t._original_end]
  4349. else:
  4350. def extractText(s,l,t):
  4351. t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
  4352. matchExpr.setParseAction(extractText)
  4353. matchExpr.ignoreExprs = expr.ignoreExprs
  4354. return matchExpr
  4355. def ungroup(expr):
  4356. """Helper to undo pyparsing's default grouping of And expressions,
  4357. even if all but one are non-empty.
  4358. """
  4359. return TokenConverter(expr).addParseAction(lambda t:t[0])
  4360. def locatedExpr(expr):
  4361. """Helper to decorate a returned token with its starting and ending
  4362. locations in the input string.
  4363. This helper adds the following results names:
  4364. - locn_start = location where matched expression begins
  4365. - locn_end = location where matched expression ends
  4366. - value = the actual parsed results
  4367. Be careful if the input text contains ``<TAB>`` characters, you
  4368. may want to call :class:`ParserElement.parseWithTabs`
  4369. Example::
  4370. wd = Word(alphas)
  4371. for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
  4372. print(match)
  4373. prints::
  4374. [[0, 'ljsdf', 5]]
  4375. [[8, 'lksdjjf', 15]]
  4376. [[18, 'lkkjj', 23]]
  4377. """
  4378. locator = Empty().setParseAction(lambda s,l,t: l)
  4379. return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
  4380. # convenience constants for positional expressions
  4381. empty = Empty().setName("empty")
  4382. lineStart = LineStart().setName("lineStart")
  4383. lineEnd = LineEnd().setName("lineEnd")
  4384. stringStart = StringStart().setName("stringStart")
  4385. stringEnd = StringEnd().setName("stringEnd")
  4386. _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
  4387. _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
  4388. _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
  4389. _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
  4390. _charRange = Group(_singleChar + Suppress("-") + _singleChar)
  4391. _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
  4392. def srange(s):
  4393. r"""Helper to easily define string ranges for use in Word
  4394. construction. Borrows syntax from regexp '[]' string range
  4395. definitions::
  4396. srange("[0-9]") -> "0123456789"
  4397. srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
  4398. srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
  4399. The input string must be enclosed in []'s, and the returned string
  4400. is the expanded character set joined into a single string. The
  4401. values enclosed in the []'s may be:
  4402. - a single character
  4403. - an escaped character with a leading backslash (such as ``\-``
  4404. or ``\]``)
  4405. - an escaped hex character with a leading ``'\x'``
  4406. (``\x21``, which is a ``'!'`` character) (``\0x##``
  4407. is also supported for backwards compatibility)
  4408. - an escaped octal character with a leading ``'\0'``
  4409. (``\041``, which is a ``'!'`` character)
  4410. - a range of any of the above, separated by a dash (``'a-z'``,
  4411. etc.)
  4412. - any combination of the above (``'aeiouy'``,
  4413. ``'a-zA-Z0-9_$'``, etc.)
  4414. """
  4415. _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
  4416. try:
  4417. return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
  4418. except Exception:
  4419. return ""
  4420. def matchOnlyAtCol(n):
  4421. """Helper method for defining parse actions that require matching at
  4422. a specific column in the input text.
  4423. """
  4424. def verifyCol(strg,locn,toks):
  4425. if col(locn,strg) != n:
  4426. raise ParseException(strg,locn,"matched token not at column %d" % n)
  4427. return verifyCol
  4428. def replaceWith(replStr):
  4429. """Helper method for common parse actions that simply return
  4430. a literal value. Especially useful when used with
  4431. :class:`transformString<ParserElement.transformString>` ().
  4432. Example::
  4433. num = Word(nums).setParseAction(lambda toks: int(toks[0]))
  4434. na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
  4435. term = na | num
  4436. OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
  4437. """
  4438. return lambda s,l,t: [replStr]
  4439. def removeQuotes(s,l,t):
  4440. """Helper parse action for removing quotation marks from parsed
  4441. quoted strings.
  4442. Example::
  4443. # by default, quotation marks are included in parsed results
  4444. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
  4445. # use removeQuotes to strip quotation marks from parsed results
  4446. quotedString.setParseAction(removeQuotes)
  4447. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
  4448. """
  4449. return t[0][1:-1]
  4450. def tokenMap(func, *args):
  4451. """Helper to define a parse action by mapping a function to all
  4452. elements of a ParseResults list. If any additional args are passed,
  4453. they are forwarded to the given function as additional arguments
  4454. after the token, as in
  4455. ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``,
  4456. which will convert the parsed data to an integer using base 16.
  4457. Example (compare the last to example in :class:`ParserElement.transformString`::
  4458. hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
  4459. hex_ints.runTests('''
  4460. 00 11 22 aa FF 0a 0d 1a
  4461. ''')
  4462. upperword = Word(alphas).setParseAction(tokenMap(str.upper))
  4463. OneOrMore(upperword).runTests('''
  4464. my kingdom for a horse
  4465. ''')
  4466. wd = Word(alphas).setParseAction(tokenMap(str.title))
  4467. OneOrMore(wd).setParseAction(' '.join).runTests('''
  4468. now is the winter of our discontent made glorious summer by this sun of york
  4469. ''')
  4470. prints::
  4471. 00 11 22 aa FF 0a 0d 1a
  4472. [0, 17, 34, 170, 255, 10, 13, 26]
  4473. my kingdom for a horse
  4474. ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
  4475. now is the winter of our discontent made glorious summer by this sun of york
  4476. ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
  4477. """
  4478. def pa(s,l,t):
  4479. return [func(tokn, *args) for tokn in t]
  4480. try:
  4481. func_name = getattr(func, '__name__',
  4482. getattr(func, '__class__').__name__)
  4483. except Exception:
  4484. func_name = str(func)
  4485. pa.__name__ = func_name
  4486. return pa
  4487. upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
  4488. """(Deprecated) Helper parse action to convert tokens to upper case.
  4489. Deprecated in favor of :class:`pyparsing_common.upcaseTokens`"""
  4490. downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
  4491. """(Deprecated) Helper parse action to convert tokens to lower case.
  4492. Deprecated in favor of :class:`pyparsing_common.downcaseTokens`"""
  4493. def _makeTags(tagStr, xml,
  4494. suppress_LT=Suppress("<"),
  4495. suppress_GT=Suppress(">")):
  4496. """Internal helper to construct opening and closing tag expressions, given a tag name"""
  4497. if isinstance(tagStr,basestring):
  4498. resname = tagStr
  4499. tagStr = Keyword(tagStr, caseless=not xml)
  4500. else:
  4501. resname = tagStr.name
  4502. tagAttrName = Word(alphas,alphanums+"_-:")
  4503. if (xml):
  4504. tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
  4505. openTag = (suppress_LT
  4506. + tagStr("tag")
  4507. + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue )))
  4508. + Optional("/", default=[False])("empty").setParseAction(lambda s,l,t:t[0]=='/')
  4509. + suppress_GT)
  4510. else:
  4511. tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printables, excludeChars=">")
  4512. openTag = (suppress_LT
  4513. + tagStr("tag")
  4514. + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens)
  4515. + Optional(Suppress("=") + tagAttrValue))))
  4516. + Optional("/",default=[False])("empty").setParseAction(lambda s,l,t:t[0]=='/')
  4517. + suppress_GT)
  4518. closeTag = Combine(_L("</") + tagStr + ">", adjacent=False)
  4519. openTag.setName("<%s>" % resname)
  4520. # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
  4521. openTag.addParseAction(lambda t: t.__setitem__("start"+"".join(resname.replace(":"," ").title().split()), t.copy()))
  4522. closeTag = closeTag("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
  4523. openTag.tag = resname
  4524. closeTag.tag = resname
  4525. openTag.tag_body = SkipTo(closeTag())
  4526. return openTag, closeTag
  4527. def makeHTMLTags(tagStr):
  4528. """Helper to construct opening and closing tag expressions for HTML,
  4529. given a tag name. Matches tags in either upper or lower case,
  4530. attributes with namespaces and with quoted or unquoted values.
  4531. Example::
  4532. text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
  4533. # makeHTMLTags returns pyparsing expressions for the opening and
  4534. # closing tags as a 2-tuple
  4535. a,a_end = makeHTMLTags("A")
  4536. link_expr = a + SkipTo(a_end)("link_text") + a_end
  4537. for link in link_expr.searchString(text):
  4538. # attributes in the <A> tag (like "href" shown here) are
  4539. # also accessible as named results
  4540. print(link.link_text, '->', link.href)
  4541. prints::
  4542. pyparsing -> https://github.com/pyparsing/pyparsing/wiki
  4543. """
  4544. return _makeTags( tagStr, False )
  4545. def makeXMLTags(tagStr):
  4546. """Helper to construct opening and closing tag expressions for XML,
  4547. given a tag name. Matches tags only in the given upper/lower case.
  4548. Example: similar to :class:`makeHTMLTags`
  4549. """
  4550. return _makeTags( tagStr, True )
  4551. def withAttribute(*args,**attrDict):
  4552. """Helper to create a validating parse action to be used with start
  4553. tags created with :class:`makeXMLTags` or
  4554. :class:`makeHTMLTags`. Use ``withAttribute`` to qualify
  4555. a starting tag with a required attribute value, to avoid false
  4556. matches on common tags such as ``<TD>`` or ``<DIV>``.
  4557. Call ``withAttribute`` with a series of attribute names and
  4558. values. Specify the list of filter attributes names and values as:
  4559. - keyword arguments, as in ``(align="right")``, or
  4560. - as an explicit dict with ``**`` operator, when an attribute
  4561. name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
  4562. - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align","right"))``
  4563. For attribute names with a namespace prefix, you must use the second
  4564. form. Attribute names are matched insensitive to upper/lower case.
  4565. If just testing for ``class`` (with or without a namespace), use
  4566. :class:`withClass`.
  4567. To verify that the attribute exists, but without specifying a value,
  4568. pass ``withAttribute.ANY_VALUE`` as the value.
  4569. Example::
  4570. html = '''
  4571. <div>
  4572. Some text
  4573. <div type="grid">1 4 0 1 0</div>
  4574. <div type="graph">1,3 2,3 1,1</div>
  4575. <div>this has no type</div>
  4576. </div>
  4577. '''
  4578. div,div_end = makeHTMLTags("div")
  4579. # only match div tag having a type attribute with value "grid"
  4580. div_grid = div().setParseAction(withAttribute(type="grid"))
  4581. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4582. for grid_header in grid_expr.searchString(html):
  4583. print(grid_header.body)
  4584. # construct a match with any div tag having a type attribute, regardless of the value
  4585. div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
  4586. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4587. for div_header in div_expr.searchString(html):
  4588. print(div_header.body)
  4589. prints::
  4590. 1 4 0 1 0
  4591. 1 4 0 1 0
  4592. 1,3 2,3 1,1
  4593. """
  4594. if args:
  4595. attrs = args[:]
  4596. else:
  4597. attrs = attrDict.items()
  4598. attrs = [(k,v) for k,v in attrs]
  4599. def pa(s,l,tokens):
  4600. for attrName,attrValue in attrs:
  4601. if attrName not in tokens:
  4602. raise ParseException(s,l,"no matching attribute " + attrName)
  4603. if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
  4604. raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
  4605. (attrName, tokens[attrName], attrValue))
  4606. return pa
  4607. withAttribute.ANY_VALUE = object()
  4608. def withClass(classname, namespace=''):
  4609. """Simplified version of :class:`withAttribute` when
  4610. matching on a div class - made difficult because ``class`` is
  4611. a reserved word in Python.
  4612. Example::
  4613. html = '''
  4614. <div>
  4615. Some text
  4616. <div class="grid">1 4 0 1 0</div>
  4617. <div class="graph">1,3 2,3 1,1</div>
  4618. <div>this &lt;div&gt; has no class</div>
  4619. </div>
  4620. '''
  4621. div,div_end = makeHTMLTags("div")
  4622. div_grid = div().setParseAction(withClass("grid"))
  4623. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4624. for grid_header in grid_expr.searchString(html):
  4625. print(grid_header.body)
  4626. div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
  4627. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4628. for div_header in div_expr.searchString(html):
  4629. print(div_header.body)
  4630. prints::
  4631. 1 4 0 1 0
  4632. 1 4 0 1 0
  4633. 1,3 2,3 1,1
  4634. """
  4635. classattr = "%s:class" % namespace if namespace else "class"
  4636. return withAttribute(**{classattr : classname})
  4637. opAssoc = SimpleNamespace()
  4638. opAssoc.LEFT = object()
  4639. opAssoc.RIGHT = object()
  4640. def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
  4641. """Helper method for constructing grammars of expressions made up of
  4642. operators working in a precedence hierarchy. Operators may be unary
  4643. or binary, left- or right-associative. Parse actions can also be
  4644. attached to operator expressions. The generated parser will also
  4645. recognize the use of parentheses to override operator precedences
  4646. (see example below).
  4647. Note: if you define a deep operator list, you may see performance
  4648. issues when using infixNotation. See
  4649. :class:`ParserElement.enablePackrat` for a mechanism to potentially
  4650. improve your parser performance.
  4651. Parameters:
  4652. - baseExpr - expression representing the most basic element for the
  4653. nested
  4654. - opList - list of tuples, one for each operator precedence level
  4655. in the expression grammar; each tuple is of the form ``(opExpr,
  4656. numTerms, rightLeftAssoc, parseAction)``, where:
  4657. - opExpr is the pyparsing expression for the operator; may also
  4658. be a string, which will be converted to a Literal; if numTerms
  4659. is 3, opExpr is a tuple of two expressions, for the two
  4660. operators separating the 3 terms
  4661. - numTerms is the number of terms for this operator (must be 1,
  4662. 2, or 3)
  4663. - rightLeftAssoc is the indicator whether the operator is right
  4664. or left associative, using the pyparsing-defined constants
  4665. ``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
  4666. - parseAction is the parse action to be associated with
  4667. expressions matching this operator expression (the parse action
  4668. tuple member may be omitted); if the parse action is passed
  4669. a tuple or list of functions, this is equivalent to calling
  4670. ``setParseAction(*fn)``
  4671. (:class:`ParserElement.setParseAction`)
  4672. - lpar - expression for matching left-parentheses
  4673. (default= ``Suppress('(')``)
  4674. - rpar - expression for matching right-parentheses
  4675. (default= ``Suppress(')')``)
  4676. Example::
  4677. # simple example of four-function arithmetic with ints and
  4678. # variable names
  4679. integer = pyparsing_common.signed_integer
  4680. varname = pyparsing_common.identifier
  4681. arith_expr = infixNotation(integer | varname,
  4682. [
  4683. ('-', 1, opAssoc.RIGHT),
  4684. (oneOf('* /'), 2, opAssoc.LEFT),
  4685. (oneOf('+ -'), 2, opAssoc.LEFT),
  4686. ])
  4687. arith_expr.runTests('''
  4688. 5+3*6
  4689. (5+3)*6
  4690. -2--11
  4691. ''', fullDump=False)
  4692. prints::
  4693. 5+3*6
  4694. [[5, '+', [3, '*', 6]]]
  4695. (5+3)*6
  4696. [[[5, '+', 3], '*', 6]]
  4697. -2--11
  4698. [[['-', 2], '-', ['-', 11]]]
  4699. """
  4700. # captive version of FollowedBy that does not do parse actions or capture results names
  4701. class _FB(FollowedBy):
  4702. def parseImpl(self, instring, loc, doActions=True):
  4703. self.expr.tryParse(instring, loc)
  4704. return loc, []
  4705. ret = Forward()
  4706. lastExpr = baseExpr | ( lpar + ret + rpar )
  4707. for i,operDef in enumerate(opList):
  4708. opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
  4709. termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
  4710. if arity == 3:
  4711. if opExpr is None or len(opExpr) != 2:
  4712. raise ValueError(
  4713. "if numterms=3, opExpr must be a tuple or list of two expressions")
  4714. opExpr1, opExpr2 = opExpr
  4715. thisExpr = Forward().setName(termName)
  4716. if rightLeftAssoc == opAssoc.LEFT:
  4717. if arity == 1:
  4718. matchExpr = _FB(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
  4719. elif arity == 2:
  4720. if opExpr is not None:
  4721. matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
  4722. else:
  4723. matchExpr = _FB(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
  4724. elif arity == 3:
  4725. matchExpr = _FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
  4726. Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
  4727. else:
  4728. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4729. elif rightLeftAssoc == opAssoc.RIGHT:
  4730. if arity == 1:
  4731. # try to avoid LR with this extra test
  4732. if not isinstance(opExpr, Optional):
  4733. opExpr = Optional(opExpr)
  4734. matchExpr = _FB(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
  4735. elif arity == 2:
  4736. if opExpr is not None:
  4737. matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
  4738. else:
  4739. matchExpr = _FB(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
  4740. elif arity == 3:
  4741. matchExpr = _FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
  4742. Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
  4743. else:
  4744. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4745. else:
  4746. raise ValueError("operator must indicate right or left associativity")
  4747. if pa:
  4748. if isinstance(pa, (tuple, list)):
  4749. matchExpr.setParseAction(*pa)
  4750. else:
  4751. matchExpr.setParseAction(pa)
  4752. thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
  4753. lastExpr = thisExpr
  4754. ret <<= lastExpr
  4755. return ret
  4756. operatorPrecedence = infixNotation
  4757. """(Deprecated) Former name of :class:`infixNotation`, will be
  4758. dropped in a future release."""
  4759. dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
  4760. sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
  4761. quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
  4762. Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
  4763. unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
  4764. def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
  4765. """Helper method for defining nested lists enclosed in opening and
  4766. closing delimiters ("(" and ")" are the default).
  4767. Parameters:
  4768. - opener - opening character for a nested list
  4769. (default= ``"("``); can also be a pyparsing expression
  4770. - closer - closing character for a nested list
  4771. (default= ``")"``); can also be a pyparsing expression
  4772. - content - expression for items within the nested lists
  4773. (default= ``None``)
  4774. - ignoreExpr - expression for ignoring opening and closing
  4775. delimiters (default= :class:`quotedString`)
  4776. If an expression is not provided for the content argument, the
  4777. nested expression will capture all whitespace-delimited content
  4778. between delimiters as a list of separate values.
  4779. Use the ``ignoreExpr`` argument to define expressions that may
  4780. contain opening or closing characters that should not be treated as
  4781. opening or closing characters for nesting, such as quotedString or
  4782. a comment expression. Specify multiple expressions using an
  4783. :class:`Or` or :class:`MatchFirst`. The default is
  4784. :class:`quotedString`, but if no expressions are to be ignored, then
  4785. pass ``None`` for this argument.
  4786. Example::
  4787. data_type = oneOf("void int short long char float double")
  4788. decl_data_type = Combine(data_type + Optional(Word('*')))
  4789. ident = Word(alphas+'_', alphanums+'_')
  4790. number = pyparsing_common.number
  4791. arg = Group(decl_data_type + ident)
  4792. LPAR,RPAR = map(Suppress, "()")
  4793. code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
  4794. c_function = (decl_data_type("type")
  4795. + ident("name")
  4796. + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
  4797. + code_body("body"))
  4798. c_function.ignore(cStyleComment)
  4799. source_code = '''
  4800. int is_odd(int x) {
  4801. return (x%2);
  4802. }
  4803. int dec_to_hex(char hchar) {
  4804. if (hchar >= '0' && hchar <= '9') {
  4805. return (ord(hchar)-ord('0'));
  4806. } else {
  4807. return (10+ord(hchar)-ord('A'));
  4808. }
  4809. }
  4810. '''
  4811. for func in c_function.searchString(source_code):
  4812. print("%(name)s (%(type)s) args: %(args)s" % func)
  4813. prints::
  4814. is_odd (int) args: [['int', 'x']]
  4815. dec_to_hex (int) args: [['char', 'hchar']]
  4816. """
  4817. if opener == closer:
  4818. raise ValueError("opening and closing strings cannot be the same")
  4819. if content is None:
  4820. if isinstance(opener,basestring) and isinstance(closer,basestring):
  4821. if len(opener) == 1 and len(closer)==1:
  4822. if ignoreExpr is not None:
  4823. content = (Combine(OneOrMore(~ignoreExpr +
  4824. CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4825. ).setParseAction(lambda t:t[0].strip()))
  4826. else:
  4827. content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
  4828. ).setParseAction(lambda t:t[0].strip()))
  4829. else:
  4830. if ignoreExpr is not None:
  4831. content = (Combine(OneOrMore(~ignoreExpr +
  4832. ~Literal(opener) + ~Literal(closer) +
  4833. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4834. ).setParseAction(lambda t:t[0].strip()))
  4835. else:
  4836. content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
  4837. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4838. ).setParseAction(lambda t:t[0].strip()))
  4839. else:
  4840. raise ValueError("opening and closing arguments must be strings if no content expression is given")
  4841. ret = Forward()
  4842. if ignoreExpr is not None:
  4843. ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
  4844. else:
  4845. ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
  4846. ret.setName('nested %s%s expression' % (opener,closer))
  4847. return ret
  4848. def indentedBlock(blockStatementExpr, indentStack, indent=True):
  4849. """Helper method for defining space-delimited indentation blocks,
  4850. such as those used to define block statements in Python source code.
  4851. Parameters:
  4852. - blockStatementExpr - expression defining syntax of statement that
  4853. is repeated within the indented block
  4854. - indentStack - list created by caller to manage indentation stack
  4855. (multiple statementWithIndentedBlock expressions within a single
  4856. grammar should share a common indentStack)
  4857. - indent - boolean indicating whether block must be indented beyond
  4858. the the current level; set to False for block of left-most
  4859. statements (default= ``True``)
  4860. A valid block must contain at least one ``blockStatement``.
  4861. Example::
  4862. data = '''
  4863. def A(z):
  4864. A1
  4865. B = 100
  4866. G = A2
  4867. A2
  4868. A3
  4869. B
  4870. def BB(a,b,c):
  4871. BB1
  4872. def BBA():
  4873. bba1
  4874. bba2
  4875. bba3
  4876. C
  4877. D
  4878. def spam(x,y):
  4879. def eggs(z):
  4880. pass
  4881. '''
  4882. indentStack = [1]
  4883. stmt = Forward()
  4884. identifier = Word(alphas, alphanums)
  4885. funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
  4886. func_body = indentedBlock(stmt, indentStack)
  4887. funcDef = Group( funcDecl + func_body )
  4888. rvalue = Forward()
  4889. funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
  4890. rvalue << (funcCall | identifier | Word(nums))
  4891. assignment = Group(identifier + "=" + rvalue)
  4892. stmt << ( funcDef | assignment | identifier )
  4893. module_body = OneOrMore(stmt)
  4894. parseTree = module_body.parseString(data)
  4895. parseTree.pprint()
  4896. prints::
  4897. [['def',
  4898. 'A',
  4899. ['(', 'z', ')'],
  4900. ':',
  4901. [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
  4902. 'B',
  4903. ['def',
  4904. 'BB',
  4905. ['(', 'a', 'b', 'c', ')'],
  4906. ':',
  4907. [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
  4908. 'C',
  4909. 'D',
  4910. ['def',
  4911. 'spam',
  4912. ['(', 'x', 'y', ')'],
  4913. ':',
  4914. [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
  4915. """
  4916. backup_stack = indentStack[:]
  4917. def reset_stack():
  4918. indentStack[:] = backup_stack
  4919. def checkPeerIndent(s,l,t):
  4920. if l >= len(s): return
  4921. curCol = col(l,s)
  4922. if curCol != indentStack[-1]:
  4923. if curCol > indentStack[-1]:
  4924. raise ParseException(s,l,"illegal nesting")
  4925. raise ParseException(s,l,"not a peer entry")
  4926. def checkSubIndent(s,l,t):
  4927. curCol = col(l,s)
  4928. if curCol > indentStack[-1]:
  4929. indentStack.append( curCol )
  4930. else:
  4931. raise ParseException(s,l,"not a subentry")
  4932. def checkUnindent(s,l,t):
  4933. if l >= len(s): return
  4934. curCol = col(l,s)
  4935. if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
  4936. raise ParseException(s,l,"not an unindent")
  4937. indentStack.pop()
  4938. NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
  4939. INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
  4940. PEER = Empty().setParseAction(checkPeerIndent).setName('')
  4941. UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
  4942. if indent:
  4943. smExpr = Group( Optional(NL) +
  4944. #~ FollowedBy(blockStatementExpr) +
  4945. INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
  4946. else:
  4947. smExpr = Group( Optional(NL) +
  4948. (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
  4949. smExpr.setFailAction(lambda a, b, c, d: reset_stack())
  4950. blockStatementExpr.ignore(_bslash + LineEnd())
  4951. return smExpr.setName('indented block')
  4952. alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
  4953. punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
  4954. anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
  4955. _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
  4956. commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
  4957. def replaceHTMLEntity(t):
  4958. """Helper parser action to replace common HTML entities with their special characters"""
  4959. return _htmlEntityMap.get(t.entity)
  4960. # it's easy to get these comment structures wrong - they're very common, so may as well make them available
  4961. cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
  4962. "Comment of the form ``/* ... */``"
  4963. htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
  4964. "Comment of the form ``<!-- ... -->``"
  4965. restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
  4966. dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
  4967. "Comment of the form ``// ... (to end of line)``"
  4968. cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
  4969. "Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"
  4970. javaStyleComment = cppStyleComment
  4971. "Same as :class:`cppStyleComment`"
  4972. pythonStyleComment = Regex(r"#.*").setName("Python style comment")
  4973. "Comment of the form ``# ... (to end of line)``"
  4974. _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
  4975. Optional( Word(" \t") +
  4976. ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
  4977. commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
  4978. """(Deprecated) Predefined expression of 1 or more printable words or
  4979. quoted strings, separated by commas.
  4980. This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`.
  4981. """
  4982. # some other useful expressions - using lower-case class name since we are really using this as a namespace
  4983. class pyparsing_common:
  4984. """Here are some common low-level expressions that may be useful in
  4985. jump-starting parser development:
  4986. - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
  4987. :class:`scientific notation<sci_real>`)
  4988. - common :class:`programming identifiers<identifier>`
  4989. - network addresses (:class:`MAC<mac_address>`,
  4990. :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
  4991. - ISO8601 :class:`dates<iso8601_date>` and
  4992. :class:`datetime<iso8601_datetime>`
  4993. - :class:`UUID<uuid>`
  4994. - :class:`comma-separated list<comma_separated_list>`
  4995. Parse actions:
  4996. - :class:`convertToInteger`
  4997. - :class:`convertToFloat`
  4998. - :class:`convertToDate`
  4999. - :class:`convertToDatetime`
  5000. - :class:`stripHTMLTags`
  5001. - :class:`upcaseTokens`
  5002. - :class:`downcaseTokens`
  5003. Example::
  5004. pyparsing_common.number.runTests('''
  5005. # any int or real number, returned as the appropriate type
  5006. 100
  5007. -100
  5008. +100
  5009. 3.14159
  5010. 6.02e23
  5011. 1e-12
  5012. ''')
  5013. pyparsing_common.fnumber.runTests('''
  5014. # any int or real number, returned as float
  5015. 100
  5016. -100
  5017. +100
  5018. 3.14159
  5019. 6.02e23
  5020. 1e-12
  5021. ''')
  5022. pyparsing_common.hex_integer.runTests('''
  5023. # hex numbers
  5024. 100
  5025. FF
  5026. ''')
  5027. pyparsing_common.fraction.runTests('''
  5028. # fractions
  5029. 1/2
  5030. -3/4
  5031. ''')
  5032. pyparsing_common.mixed_integer.runTests('''
  5033. # mixed fractions
  5034. 1
  5035. 1/2
  5036. -3/4
  5037. 1-3/4
  5038. ''')
  5039. import uuid
  5040. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  5041. pyparsing_common.uuid.runTests('''
  5042. # uuid
  5043. 12345678-1234-5678-1234-567812345678
  5044. ''')
  5045. prints::
  5046. # any int or real number, returned as the appropriate type
  5047. 100
  5048. [100]
  5049. -100
  5050. [-100]
  5051. +100
  5052. [100]
  5053. 3.14159
  5054. [3.14159]
  5055. 6.02e23
  5056. [6.02e+23]
  5057. 1e-12
  5058. [1e-12]
  5059. # any int or real number, returned as float
  5060. 100
  5061. [100.0]
  5062. -100
  5063. [-100.0]
  5064. +100
  5065. [100.0]
  5066. 3.14159
  5067. [3.14159]
  5068. 6.02e23
  5069. [6.02e+23]
  5070. 1e-12
  5071. [1e-12]
  5072. # hex numbers
  5073. 100
  5074. [256]
  5075. FF
  5076. [255]
  5077. # fractions
  5078. 1/2
  5079. [0.5]
  5080. -3/4
  5081. [-0.75]
  5082. # mixed fractions
  5083. 1
  5084. [1]
  5085. 1/2
  5086. [0.5]
  5087. -3/4
  5088. [-0.75]
  5089. 1-3/4
  5090. [1.75]
  5091. # uuid
  5092. 12345678-1234-5678-1234-567812345678
  5093. [UUID('12345678-1234-5678-1234-567812345678')]
  5094. """
  5095. convertToInteger = tokenMap(int)
  5096. """
  5097. Parse action for converting parsed integers to Python int
  5098. """
  5099. convertToFloat = tokenMap(float)
  5100. """
  5101. Parse action for converting parsed numbers to Python float
  5102. """
  5103. integer = Word(nums).setName("integer").setParseAction(convertToInteger)
  5104. """expression that parses an unsigned integer, returns an int"""
  5105. hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
  5106. """expression that parses a hexadecimal integer, returns an int"""
  5107. signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
  5108. """expression that parses an integer with optional leading sign, returns an int"""
  5109. fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
  5110. """fractional expression of an integer divided by an integer, returns a float"""
  5111. fraction.addParseAction(lambda t: t[0]/t[-1])
  5112. mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
  5113. """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
  5114. mixed_integer.addParseAction(sum)
  5115. real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
  5116. """expression that parses a floating point number and returns a float"""
  5117. sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
  5118. """expression that parses a floating point number with optional
  5119. scientific notation and returns a float"""
  5120. # streamlining this expression makes the docs nicer-looking
  5121. number = (sci_real | real | signed_integer).streamline()
  5122. """any numeric expression, returns the corresponding Python type"""
  5123. fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
  5124. """any int or real number, returned as float"""
  5125. identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
  5126. """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
  5127. ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
  5128. "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
  5129. _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
  5130. _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
  5131. _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
  5132. _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
  5133. _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
  5134. ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
  5135. "IPv6 address (long, short, or mixed form)"
  5136. mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
  5137. "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
  5138. @staticmethod
  5139. def convertToDate(fmt="%Y-%m-%d"):
  5140. """
  5141. Helper to create a parse action for converting parsed date string to Python datetime.date
  5142. Params -
  5143. - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
  5144. Example::
  5145. date_expr = pyparsing_common.iso8601_date.copy()
  5146. date_expr.setParseAction(pyparsing_common.convertToDate())
  5147. print(date_expr.parseString("1999-12-31"))
  5148. prints::
  5149. [datetime.date(1999, 12, 31)]
  5150. """
  5151. def cvt_fn(s,l,t):
  5152. try:
  5153. return datetime.strptime(t[0], fmt).date()
  5154. except ValueError as ve:
  5155. raise ParseException(s, l, str(ve))
  5156. return cvt_fn
  5157. @staticmethod
  5158. def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
  5159. """Helper to create a parse action for converting parsed
  5160. datetime string to Python datetime.datetime
  5161. Params -
  5162. - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
  5163. Example::
  5164. dt_expr = pyparsing_common.iso8601_datetime.copy()
  5165. dt_expr.setParseAction(pyparsing_common.convertToDatetime())
  5166. print(dt_expr.parseString("1999-12-31T23:59:59.999"))
  5167. prints::
  5168. [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
  5169. """
  5170. def cvt_fn(s,l,t):
  5171. try:
  5172. return datetime.strptime(t[0], fmt)
  5173. except ValueError as ve:
  5174. raise ParseException(s, l, str(ve))
  5175. return cvt_fn
  5176. iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
  5177. "ISO8601 date (``yyyy-mm-dd``)"
  5178. iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
  5179. "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
  5180. uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
  5181. "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
  5182. _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
  5183. @staticmethod
  5184. def stripHTMLTags(s, l, tokens):
  5185. """Parse action to remove HTML tags from web page HTML source
  5186. Example::
  5187. # strip HTML links from normal text
  5188. text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
  5189. td,td_end = makeHTMLTags("TD")
  5190. table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
  5191. print(table_text.parseString(text).body)
  5192. Prints::
  5193. More info at the pyparsing wiki page
  5194. """
  5195. return pyparsing_common._html_stripper.transformString(tokens[0])
  5196. _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
  5197. + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
  5198. comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
  5199. """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
  5200. upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
  5201. """Parse action to convert tokens to upper case."""
  5202. downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
  5203. """Parse action to convert tokens to lower case."""
  5204. class _lazyclassproperty(object):
  5205. def __init__(self, fn):
  5206. self.fn = fn
  5207. self.__doc__ = fn.__doc__
  5208. self.__name__ = fn.__name__
  5209. def __get__(self, obj, cls):
  5210. if cls is None:
  5211. cls = type(obj)
  5212. if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', []) for superclass in cls.__mro__[1:]):
  5213. cls._intern = {}
  5214. attrname = self.fn.__name__
  5215. if attrname not in cls._intern:
  5216. cls._intern[attrname] = self.fn(cls)
  5217. return cls._intern[attrname]
  5218. class unicode_set(object):
  5219. """
  5220. A set of Unicode characters, for language-specific strings for
  5221. ``alphas``, ``nums``, ``alphanums``, and ``printables``.
  5222. A unicode_set is defined by a list of ranges in the Unicode character
  5223. set, in a class attribute ``_ranges``, such as::
  5224. _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
  5225. A unicode set can also be defined using multiple inheritance of other unicode sets::
  5226. class CJK(Chinese, Japanese, Korean):
  5227. pass
  5228. """
  5229. _ranges = []
  5230. @classmethod
  5231. def _get_chars_for_ranges(cls):
  5232. ret = []
  5233. for cc in cls.__mro__:
  5234. if cc is unicode_set:
  5235. break
  5236. for rr in cc._ranges:
  5237. ret.extend(range(rr[0], rr[-1]+1))
  5238. return [unichr(c) for c in sorted(set(ret))]
  5239. @_lazyclassproperty
  5240. def printables(cls):
  5241. "all non-whitespace characters in this range"
  5242. return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges()))
  5243. @_lazyclassproperty
  5244. def alphas(cls):
  5245. "all alphabetic characters in this range"
  5246. return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges()))
  5247. @_lazyclassproperty
  5248. def nums(cls):
  5249. "all numeric digit characters in this range"
  5250. return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges()))
  5251. @_lazyclassproperty
  5252. def alphanums(cls):
  5253. "all alphanumeric characters in this range"
  5254. return cls.alphas + cls.nums
  5255. class pyparsing_unicode(unicode_set):
  5256. """
  5257. A namespace class for defining common language unicode_sets.
  5258. """
  5259. _ranges = [(32, sys.maxunicode)]
  5260. class Latin1(unicode_set):
  5261. "Unicode set for Latin-1 Unicode Character Range"
  5262. _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
  5263. class LatinA(unicode_set):
  5264. "Unicode set for Latin-A Unicode Character Range"
  5265. _ranges = [(0x0100, 0x017f),]
  5266. class LatinB(unicode_set):
  5267. "Unicode set for Latin-B Unicode Character Range"
  5268. _ranges = [(0x0180, 0x024f),]
  5269. class Greek(unicode_set):
  5270. "Unicode set for Greek Unicode Character Ranges"
  5271. _ranges = [
  5272. (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
  5273. (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
  5274. (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
  5275. ]
  5276. class Cyrillic(unicode_set):
  5277. "Unicode set for Cyrillic Unicode Character Range"
  5278. _ranges = [(0x0400, 0x04ff)]
  5279. class Chinese(unicode_set):
  5280. "Unicode set for Chinese Unicode Character Range"
  5281. _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f), ]
  5282. class Japanese(unicode_set):
  5283. "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
  5284. _ranges = [ ]
  5285. class Kanji(unicode_set):
  5286. "Unicode set for Kanji Unicode Character Range"
  5287. _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f), ]
  5288. class Hiragana(unicode_set):
  5289. "Unicode set for Hiragana Unicode Character Range"
  5290. _ranges = [(0x3040, 0x309f), ]
  5291. class Katakana(unicode_set):
  5292. "Unicode set for Katakana Unicode Character Range"
  5293. _ranges = [(0x30a0, 0x30ff), ]
  5294. class Korean(unicode_set):
  5295. "Unicode set for Korean Unicode Character Range"
  5296. _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f), ]
  5297. class CJK(Chinese, Japanese, Korean):
  5298. "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
  5299. pass
  5300. class Thai(unicode_set):
  5301. "Unicode set for Thai Unicode Character Range"
  5302. _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b), ]
  5303. class Arabic(unicode_set):
  5304. "Unicode set for Arabic Unicode Character Range"
  5305. _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f), ]
  5306. class Hebrew(unicode_set):
  5307. "Unicode set for Hebrew Unicode Character Range"
  5308. _ranges = [(0x0590, 0x05ff), ]
  5309. class Devanagari(unicode_set):
  5310. "Unicode set for Devanagari Unicode Character Range"
  5311. _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
  5312. pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges
  5313. + pyparsing_unicode.Japanese.Hiragana._ranges
  5314. + pyparsing_unicode.Japanese.Katakana._ranges)
  5315. # define ranges in language character sets
  5316. if PY_3:
  5317. setattr(pyparsing_unicode, "العربية", pyparsing_unicode.Arabic)
  5318. setattr(pyparsing_unicode, "中文", pyparsing_unicode.Chinese)
  5319. setattr(pyparsing_unicode, "кириллица", pyparsing_unicode.Cyrillic)
  5320. setattr(pyparsing_unicode, "Ελληνικά", pyparsing_unicode.Greek)
  5321. setattr(pyparsing_unicode, "עִברִית", pyparsing_unicode.Hebrew)
  5322. setattr(pyparsing_unicode, "日本語", pyparsing_unicode.Japanese)
  5323. setattr(pyparsing_unicode.Japanese, "漢字", pyparsing_unicode.Japanese.Kanji)
  5324. setattr(pyparsing_unicode.Japanese, "カタカナ", pyparsing_unicode.Japanese.Katakana)
  5325. setattr(pyparsing_unicode.Japanese, "ひらがな", pyparsing_unicode.Japanese.Hiragana)
  5326. setattr(pyparsing_unicode, "한국어", pyparsing_unicode.Korean)
  5327. setattr(pyparsing_unicode, "ไทย", pyparsing_unicode.Thai)
  5328. setattr(pyparsing_unicode, "देवनागरी", pyparsing_unicode.Devanagari)
  5329. if __name__ == "__main__":
  5330. selectToken = CaselessLiteral("select")
  5331. fromToken = CaselessLiteral("from")
  5332. ident = Word(alphas, alphanums + "_$")
  5333. columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  5334. columnNameList = Group(delimitedList(columnName)).setName("columns")
  5335. columnSpec = ('*' | columnNameList)
  5336. tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  5337. tableNameList = Group(delimitedList(tableName)).setName("tables")
  5338. simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
  5339. # demo runTests method, including embedded comments in test string
  5340. simpleSQL.runTests("""
  5341. # '*' as column list and dotted table name
  5342. select * from SYS.XYZZY
  5343. # caseless match on "SELECT", and casts back to "select"
  5344. SELECT * from XYZZY, ABC
  5345. # list of column names, and mixed case SELECT keyword
  5346. Select AA,BB,CC from Sys.dual
  5347. # multiple tables
  5348. Select A, B, C from Sys.dual, Table2
  5349. # invalid SELECT keyword - should fail
  5350. Xelect A, B, C from Sys.dual
  5351. # incomplete command - should fail
  5352. Select
  5353. # invalid column name - should fail
  5354. Select ^^^ frox Sys.dual
  5355. """)
  5356. pyparsing_common.number.runTests("""
  5357. 100
  5358. -100
  5359. +100
  5360. 3.14159
  5361. 6.02e23
  5362. 1e-12
  5363. """)
  5364. # any int or real number, returned as float
  5365. pyparsing_common.fnumber.runTests("""
  5366. 100
  5367. -100
  5368. +100
  5369. 3.14159
  5370. 6.02e23
  5371. 1e-12
  5372. """)
  5373. pyparsing_common.hex_integer.runTests("""
  5374. 100
  5375. FF
  5376. """)
  5377. import uuid
  5378. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  5379. pyparsing_common.uuid.runTests("""
  5380. 12345678-1234-5678-1234-567812345678
  5381. """)