}. Call C{withAttribute} with a series of attribute names and values. Specify the list of filter attributes names and values as: - keyword arguments, as in C{(align="right")}, or - as an explicit dict with C{**} operator, when an attribute name is also a Python reserved word, as in C{**{"class":"Customer", "align":"right"}} - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) For attribute names with a namespace prefix, you must use the second form. Attribute names are matched insensitive to upper/lower case. If just testing for C{class} (with or without a namespace), use C{L{withClass}}. To verify that the attribute exists, but without specifying a value, pass C{withAttribute.ANY_VALUE} as the value. Example:: html = '''

Some text

1 4 0 1 0

1,3 2,3 1,1

this has no type

''' div,div_end = makeHTMLTags("div") # only match div tag having a type attribute with value "grid" div_grid = div().setParseAction(withAttribute(type="grid")) grid_expr = div_grid + SkipTo(div | div_end)("body") for grid_header in grid_expr.searchString(html): print(grid_header.body) # construct a match with any div tag having a type attribute, regardless of the value div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) div_expr = div_any_type + SkipTo(div | div_end)("body") for div_header in div_expr.searchString(html): print(div_header.body) prints:: 1 4 0 1 0 1 4 0 1 0 1,3 2,3 1,1 """ if args: attrs = args[:] else: attrs = attrDict.items() attrs = [(k,v) for k,v in attrs] def pa(s,l,tokens): for attrName,attrValue in attrs: if attrName not in tokens: raise ParseException(s,l,"no matching attribute " + attrName) if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % (attrName, tokens[attrName], attrValue)) return pa withAttribute.ANY_VALUE = object() def withClass(classname, namespace=''): """ Simplified version of C{L{withAttribute}} when matching on a div class - made difficult because C{class} is a reserved word in Python. Example:: html = '''

Some text

1 4 0 1 0

1,3 2,3 1,1

this <div> has no class

''' div,div_end = makeHTMLTags("div") div_grid = div().setParseAction(withClass("grid")) grid_expr = div_grid + SkipTo(div | div_end)("body") for grid_header in grid_expr.searchString(html): print(grid_header.body) div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) div_expr = div_any_type + SkipTo(div | div_end)("body") for div_header in div_expr.searchString(html): print(div_header.body) prints:: 1 4 0 1 0 1 4 0 1 0 1,3 2,3 1,1 """ classattr = "%s:class" % namespace if namespace else "class" return withAttribute(**{classattr : classname}) opAssoc = _Constants() opAssoc.LEFT = object() opAssoc.RIGHT = object() def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): """ Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be attached to operator expressions. The generated parser will also recognize the use of parentheses to override operator precedences (see example below). Note: if you define a deep operator list, you may see performance issues when using infixNotation. See L{ParserElement.enablePackrat} for a mechanism to potentially improve your parser performance. Parameters: - baseExpr - expression representing the most basic element for the nested - opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form (opExpr, numTerms, rightLeftAssoc, parseAction), where: - opExpr is the pyparsing expression for the operator; may also be a string, which will be converted to a Literal; if numTerms is 3, opExpr is a tuple of two expressions, for the two operators separating the 3 terms - numTerms is the number of terms for this operator (must be 1, 2, or 3) - rightLeftAssoc is the indicator whether the operator is right or left associative, using the pyparsing-defined constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. - parseAction is the parse action to be associated with expressions matching this operator expression (the parse action tuple member may be omitted) - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) Example:: # simple example of four-function arithmetic with ints and variable names integer = pyparsing_common.signed_integer varname = pyparsing_common.identifier arith_expr = infixNotation(integer | varname, [ ('-', 1, opAssoc.RIGHT), (oneOf('* /'), 2, opAssoc.LEFT), (oneOf('+ -'), 2, opAssoc.LEFT), ]) arith_expr.runTests(''' 5+3*6 (5+3)*6 -2--11 ''', fullDump=False) prints:: 5+3*6 [[5, '+', [3, '*', 6]]] (5+3)*6 [[[5, '+', 3], '*', 6]] -2--11 [[['-', 2], '-', ['-', 11]]] """ ret = Forward() lastExpr = baseExpr | ( lpar + ret + rpar ) for i,operDef in enumerate(opList): opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr if arity == 3: if opExpr is None or len(opExpr) != 2: raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") opExpr1, opExpr2 = opExpr thisExpr = Forward().setName(termName) if rightLeftAssoc == opAssoc.LEFT: if arity == 1: matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) elif arity == 2: if opExpr is not None: matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) else: matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) elif arity == 3: matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) else: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") elif rightLeftAssoc == opAssoc.RIGHT: if arity == 1: # try to avoid LR with this extra test if not isinstance(opExpr, Optional): opExpr = Optional(opExpr) matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) elif arity == 2: if opExpr is not None: matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) else: matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) elif arity == 3: matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) else: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") else: raise ValueError("operator must indicate right or left associativity") if pa: matchExpr.setParseAction( pa ) thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) lastExpr = thisExpr ret <<= lastExpr return ret operatorPrecedence = infixNotation """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): """ Helper method for defining nested lists enclosed in opening and closing delimiters ("(" and ")" are the default). Parameters: - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression - content - expression for items within the nested lists (default=C{None}) - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) If an expression is not provided for the content argument, the nested expression will capture all whitespace-delimited content between delimiters as a list of separate values. Use the C{ignoreExpr} argument to define expressions that may contain opening or closing characters that should not be treated as opening or closing characters for nesting, such as quotedString or a comment expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. The default is L{quotedString}, but if no expressions are to be ignored, then pass C{None} for this argument. Example:: data_type = oneOf("void int short long char float double") decl_data_type = Combine(data_type + Optional(Word('*'))) ident = Word(alphas+'_', alphanums+'_') number = pyparsing_common.number arg = Group(decl_data_type + ident) LPAR,RPAR = map(Suppress, "()") code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) c_function = (decl_data_type("type") + ident("name") + LPAR + Optional(delimitedList(arg), [])("args") + RPAR + code_body("body")) c_function.ignore(cStyleComment) source_code = ''' int is_odd(int x) { return (x%2); } int dec_to_hex(char hchar) { if (hchar >= '0' && hchar <= '9') { return (ord(hchar)-ord('0')); } else { return (10+ord(hchar)-ord('A')); } } ''' for func in c_function.searchString(source_code): print("%(name)s (%(type)s) args: %(args)s" % func) prints:: is_odd (int) args: [['int', 'x']] dec_to_hex (int) args: [['char', 'hchar']] """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if isinstance(opener,basestring) and isinstance(closer,basestring): if len(opener) == 1 and len(closer)==1: if ignoreExpr is not None: content = (Combine(OneOrMore(~ignoreExpr + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS ).setParseAction(lambda t:t[0].strip())) else: if ignoreExpr is not None: content = (Combine(OneOrMore(~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: raise ValueError("opening and closing arguments must be strings if no content expression is given") ret = Forward() if ignoreExpr is not None: ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) else: ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) ret.setName('nested %s%s expression' % (opener,closer)) return ret def indentedBlock(blockStatementExpr, indentStack, indent=True): """ Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the the current level; set to False for block of left-most statements (default=C{True}) A valid block must contain at least one C{blockStatement}. Example:: data = ''' def A(z): A1 B = 100 G = A2 A2 A3 B def BB(a,b,c): BB1 def BBA(): bba1 bba2 bba3 C D def spam(x,y): def eggs(z): pass ''' indentStack = [1] stmt = Forward() identifier = Word(alphas, alphanums) funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") func_body = indentedBlock(stmt, indentStack) funcDef = Group( funcDecl + func_body ) rvalue = Forward() funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") rvalue << (funcCall | identifier | Word(nums)) assignment = Group(identifier + "=" + rvalue) stmt << ( funcDef | assignment | identifier ) module_body = OneOrMore(stmt) parseTree = module_body.parseString(data) parseTree.pprint() prints:: [['def', 'A', ['(', 'z', ')'], ':', [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 'B', ['def', 'BB', ['(', 'a', 'b', 'c', ')'], ':', [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 'C', 'D', ['def', 'spam', ['(', 'x', 'y', ')'], ':', [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] """ def checkPeerIndent(s,l,t): if l >= len(s): return curCol = col(l,s) if curCol != indentStack[-1]: if curCol > indentStack[-1]: raise ParseFatalException(s,l,"illegal nesting") raise ParseException(s,l,"not a peer entry") def checkSubIndent(s,l,t): curCol = col(l,s) if curCol > indentStack[-1]: indentStack.append( curCol ) else: raise ParseException(s,l,"not a subentry") def checkUnindent(s,l,t): if l >= len(s): return curCol = col(l,s) if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): raise ParseException(s,l,"not an unindent") indentStack.pop() NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') PEER = Empty().setParseAction(checkPeerIndent).setName('') UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') if indent: smExpr = Group( Optional(NL) + #~ FollowedBy(blockStatementExpr) + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) else: smExpr = Group( Optional(NL) + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) blockStatementExpr.ignore(_bslash + LineEnd()) return smExpr.setName('indented block') alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") def replaceHTMLEntity(t): """Helper parser action to replace common HTML entities with their special characters""" return _htmlEntityMap.get(t.entity) # it's easy to get these comment structures wrong - they're very common, so may as well make them available cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") "Comment of the form C{/* ... */}" htmlComment = Regex(r"").setName("HTML comment") "Comment of the form C{}" restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") "Comment of the form C{// ... (to end of line)}" cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" javaStyleComment = cppStyleComment "Same as C{L{cppStyleComment}}" pythonStyleComment = Regex(r"#.*").setName("Python style comment") "Comment of the form C{# ... (to end of line)}" _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + Optional( Word(" \t") + ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" # some other useful expressions - using lower-case class name since we are really using this as a namespace class pyparsing_common: """ Here are some common low-level expressions that may be useful in jump-starting parser development: - numeric forms (L{integers}, L{reals}, L{scientific notation}) - common L{programming identifiers} - network addresses (L{MAC}, L{IPv4}, L{IPv6}) - ISO8601 L{dates} and L{datetime} - L{UUID} - L{comma-separated list} Parse actions: - C{L{convertToInteger}} - C{L{convertToFloat}} - C{L{convertToDate}} - C{L{convertToDatetime}} - C{L{stripHTMLTags}} - C{L{upcaseTokens}} - C{L{downcaseTokens}} Example:: pyparsing_common.number.runTests(''' # any int or real number, returned as the appropriate type 100 -100 +100 3.14159 6.02e23 1e-12 ''') pyparsing_common.fnumber.runTests(''' # any int or real number, returned as float 100 -100 +100 3.14159 6.02e23 1e-12 ''') pyparsing_common.hex_integer.runTests(''' # hex numbers 100 FF ''') pyparsing_common.fraction.runTests(''' # fractions 1/2 -3/4 ''') pyparsing_common.mixed_integer.runTests(''' # mixed fractions 1 1/2 -3/4 1-3/4 ''') import uuid pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) pyparsing_common.uuid.runTests(''' # uuid 12345678-1234-5678-1234-567812345678 ''') prints:: # any int or real number, returned as the appropriate type 100 [100] -100 [-100] +100 [100] 3.14159 [3.14159] 6.02e23 [6.02e+23] 1e-12 [1e-12] # any int or real number, returned as float 100 [100.0] -100 [-100.0] +100 [100.0] 3.14159 [3.14159] 6.02e23 [6.02e+23] 1e-12 [1e-12] # hex numbers 100 [256] FF [255] # fractions 1/2 [0.5] -3/4 [-0.75] # mixed fractions 1 [1] 1/2 [0.5] -3/4 [-0.75] 1-3/4 [1.75] # uuid 12345678-1234-5678-1234-567812345678 [UUID('12345678-1234-5678-1234-567812345678')] """ convertToInteger = tokenMap(int) """ Parse action for converting parsed integers to Python int """ convertToFloat = tokenMap(float) """ Parse action for converting parsed numbers to Python float """ integer = Word(nums).setName("integer").setParseAction(convertToInteger) """expression that parses an unsigned integer, returns an int""" hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) """expression that parses a hexadecimal integer, returns an int""" signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) """expression that parses an integer with optional leading sign, returns an int""" fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") """fractional expression of an integer divided by an integer, returns a float""" fraction.addParseAction(lambda t: t[0]/t[-1]) mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" mixed_integer.addParseAction(sum) real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) """expression that parses a floating point number and returns a float""" sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) """expression that parses a floating point number with optional scientific notation and returns a float""" # streamlining this expression makes the docs nicer-looking number = (sci_real | real | signed_integer).streamline() """any numeric expression, returns the corresponding Python type""" fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) """any int or real number, returned as float""" identifier = Word(alphas+'_', alphanums+'_').setName("identifier") """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") "IPv4 address (C{0.0.0.0 - 255.255.255.255})" _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") "IPv6 address (long, short, or mixed form)" mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" @staticmethod def convertToDate(fmt="%Y-%m-%d"): """ Helper to create a parse action for converting parsed date string to Python datetime.date Params - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) Example:: date_expr = pyparsing_common.iso8601_date.copy() date_expr.setParseAction(pyparsing_common.convertToDate()) print(date_expr.parseString("1999-12-31")) prints:: [datetime.date(1999, 12, 31)] """ def cvt_fn(s,l,t): try: return datetime.strptime(t[0], fmt).date() except ValueError as ve: raise ParseException(s, l, str(ve)) return cvt_fn @staticmethod def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): """ Helper to create a parse action for converting parsed datetime string to Python datetime.datetime Params - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) Example:: dt_expr = pyparsing_common.iso8601_datetime.copy() dt_expr.setParseAction(pyparsing_common.convertToDatetime()) print(dt_expr.parseString("1999-12-31T23:59:59.999")) prints:: [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] """ def cvt_fn(s,l,t): try: return datetime.strptime(t[0], fmt) except ValueError as ve: raise ParseException(s, l, str(ve)) return cvt_fn iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") "ISO8601 date (C{yyyy-mm-dd})" iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() @staticmethod def stripHTMLTags(s, l, tokens): """ Parse action to remove HTML tags from web page HTML source Example:: # strip HTML links from normal text text = 'More info at the pyparsing wiki page' td,td_end = makeHTMLTags("TD") table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' """ return pyparsing_common._html_stripper.transformString(tokens[0]) _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') + Optional( White(" \t") ) ) ).streamline().setName("commaItem") comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) """Parse action to convert tokens to upper case.""" downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) """Parse action to convert tokens to lower case.""" if __name__ == "__main__": selectToken = CaselessLiteral("select") fromToken = CaselessLiteral("from") ident = Word(alphas, alphanums + "_$") columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) columnNameList = Group(delimitedList(columnName)).setName("columns") columnSpec = ('*' | columnNameList) tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) tableNameList = Group(delimitedList(tableName)).setName("tables") simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") # demo runTests method, including embedded comments in test string simpleSQL.runTests(""" # '*' as column list and dotted table name select * from SYS.XYZZY # caseless match on "SELECT", and casts back to "select" SELECT * from XYZZY, ABC # list of column names, and mixed case SELECT keyword Select AA,BB,CC from Sys.dual # multiple tables Select A, B, C from Sys.dual, Table2 # invalid SELECT keyword - should fail Xelect A, B, C from Sys.dual # incomplete command - should fail Select # invalid column name - should fail Select ^^^ frox Sys.dual """) pyparsing_common.number.runTests(""" 100 -100 +100 3.14159 6.02e23 1e-12 """) # any int or real number, returned as float pyparsing_common.fnumber.runTests(""" 100 -100 +100 3.14159 6.02e23 1e-12 """) pyparsing_common.hex_integer.runTests(""" 100 FF """) import uuid pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) pyparsing_common.uuid.runTests(""" 12345678-1234-5678-1234-567812345678 """)