Source code for pyexpander.parser

"""implement the parser for pyexpander.
"""
import bisect
import re

__version__= "2.1.2" #VERSION#

# pylint: disable= invalid-name consider-using-f-string

# we use '\n' as line separator and rely on python's built in line end
# conversion to '\n' on all platforms.
# You may howver, use change_linesep() to change the line separator.

LINESEP= "\n"
LINESEP_LEN= len(LINESEP)

[docs]def change_linesep(sep):
    """change line separator, this is here just for tests.
    """
    # pylint: disable= global-statement
    global LINESEP, LINESEP_LEN
    LINESEP= sep
    LINESEP_LEN= len(sep)

[docs]class IndexedString:
    """a string together with row column information.

    Here is an example:

    >>> txt='''01234
    ... 67
    ... 9abcd'''
    >>> l=IndexedString(txt)
    >>> l.rowcol(0)
    (1, 1)
    >>> l.rowcol(1)
    (1, 2)
    >>> l.rowcol(4)
    (1, 5)
    >>> l.rowcol(5)
    (1, 6)
    >>> l.rowcol(6)
    (2, 1)
    >>> l.rowcol(7)
    (2, 2)
    >>> l.rowcol(8)
    (2, 3)
    >>> l.rowcol(9)
    (3, 1)
    >>> l.rowcol(13)
    (3, 5)
    >>> l.rowcol(14)
    (3, 6)
    >>> l.rowcol(16)
    (3, 8)
    """
    # pylint: disable= too-few-public-methods
    def __init__(self, st):
        self._st=st
        self._lines=None
        self._positions=None
    def _list(self):
        """calculate and remember positions where lines begin."""
        l= len(self._st)
        pos=0
        self._lines=[1]
        self._positions=[0]
        lineno=1
        while True:
            # look for the standard lineseparator in the string:
            p= self._st.find(LINESEP, pos)
            if p<0:
                # not found
                break
            pos= p+LINESEP_LEN
            if pos>=l:
                break
            lineno+=1
            self._lines.append(lineno)
            self._positions.append(pos)
[docs]    def rowcol(self,pos):
        """calculate (row,column) from a string position."""
        if self._lines is None:
            self._list()
        idx= bisect.bisect_right(self._positions, pos)-1
        off= self._positions[idx]
        return(self._lines[idx], pos-off+1)
[docs]    def st(self):
        """return the raw string."""
        return self._st
    def __str__(self):
        return "IndexedString(...)"
    def __repr__(self):
        # Note: if repr(some object) gets too long since
        # repr(IndexedString(..)) basically prints the whole input file
        # you may in-comment the following line in order to make
        # the output shorter:
        #return "IndexedString(...)"
        return "IndexedString(%s)" % repr(self._st)

[docs]class ParseException(Exception):
    """used for Exceptions in this module."""
    def __init__(self, value, pos=None, rowcol=None):
        super().__init__(value, pos, rowcol)
        self.value = value
        self.pos= pos
        self.rowcol= rowcol
    def __str__(self):
        """return string representation"""
        if self.rowcol is not None:
            return "%s line %d, col %d" % \
                   (self.value,self.rowcol[0],self.rowcol[1])
        if self.pos is not None:
            return "%s position: %d" % (self.value,self.pos)
        return "%s" % self.value

rx_pyIdent= re.compile(r'([A-Za-z_][\w\.]*)$')

rx_csv=re.compile(r'\s*,\s*')

[docs]def scanPyIdentList(st):
    """scan a list of python identifiers.

    Here are some examples:

    >>> scanPyIdentList("a,b")
    ['a', 'b']
    >>> scanPyIdentList("a,b.d, c")
    ['a', 'b.d', 'c']
    >>> scanPyIdentList("a,b.d, c&")
    Traceback (most recent call last):
        ...
    ParseException: list of python identifiers expected
    """
    lst= re.split(rx_csv, st)
    for elm in lst:
        m= rx_pyIdent.match(elm)
        if m is None:
            raise ParseException("list of python identifiers expected")
    return lst

rx_py_in= re.compile(r'^\s*(.*?)\s*\b(in)\b\s*(.*?)\s*$')
[docs]def scanPyIn(st):
    """scan a python "in" statement.

    Here are some examples:

    >>> scanPyIn(" (a,b) in k.items() ")
    ('(a,b)', 'in', 'k.items()')
    """
    m= rx_py_in.match(st)
    if m is None:
        raise ParseException("python \"in\" expression expected")
    return m.groups()

rx_bracketed= re.compile(r'\{[A-Za-z_]\w*\}')

[docs]def parseBracketed(idxst,pos):
    """parse an identifier in curly brackets.

    Here are some examples:

    >>> def test(st,pos):
    ...     idxst= IndexedString(st)
    ...     (a,b)= parseBracketed(idxst,pos)
    ...     print(st[a:b])
    ...
    >>> test(r'{abc}',0)
    {abc}
    >>> test(r'{ab8c}',0)
    {ab8c}
    >>> test(r'{c}',0)
    {c}
    >>> test(r'{}',0)
    Traceback (most recent call last):
        ...
    ParseException: command enclosed in curly brackets at line 1, col 1
    >>> test(r'{abc',0)
    Traceback (most recent call last):
        ...
    ParseException: command enclosed in curly brackets at line 1, col 1
    >>> test(r'x{ab8c}',1)
    {ab8c}
    """
    if not isinstance(idxst, IndexedString):
        raise TypeError("idxst par wrong: %s" % repr(idxst))
    st= idxst.st()
    m= rx_bracketed.match(st,pos)
    if m is None:
        raise ParseException("command enclosed in curly brackets at",
                             rowcol= idxst.rowcol(pos))
    return(pos,m.end())

# from python 3 documentation:
# stringprefix    ::=  "r" | "u" | "R" | "U" | "f" | "F"
#                      | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF"

rx_StringLiteralStart= re.compile(r'''(fr|fR|Fr|FR|rf|rF|Rf|RF|f|F|r|R|u|U|)("""|''' + \
                                  """'''""" + \
                                  r'''|'|")''')
[docs]def parseStringLiteral(idxst,pos):
    r"""parse a python string literal.

    returns 2 numbers, the index where the string starts and
    the index of the first character *after* the string

    Here are some examples:

    >>> def test(st,pos):
    ...     idxst= IndexedString(st)
    ...     (a,b)= parseStringLiteral(idxst,pos)
    ...     print(st[a:b])
    ...

    >>> test(r'''"abc"''',0)
    "abc"

    >>> test("'''ab'c'd'''",0)
    '''ab'c'd'''
    >>> test("'''ab'cd''''",0)
    '''ab'cd'''

    >>> test(r'''F"abc"''',0)
    F"abc"
    >>> test(r'''xF"abc"''',1)
    F"abc"
    >>> test(r'''xFr"abc"''',1)
    Fr"abc"

    >>> test(r'''xFr"ab\\"c"''',1)
    Fr"ab\\"

    >>> test(r'''xFr"ab\"c"''',1)
    Fr"ab\"c"
    >>> test(r'''xFr"ab\"c"''',0)
    Traceback (most recent call last):
        ...
    ParseException: start of string expected at line 1, col 1
    >>> test(r'''"ab''',0)
    Traceback (most recent call last):
        ...
    ParseException: end of string not found at line 1, col 1
    >>> test(r"'''ab'",0)
    Traceback (most recent call last):
        ...
    ParseException: end of string not found at line 1, col 1
    >>> test(r'''"ab\"''',0)
    Traceback (most recent call last):
        ...
    ParseException: end of string not found at line 1, col 1
    """
    if not isinstance(idxst, IndexedString):
        raise TypeError("idxst par wrong: %s" % repr(idxst))
    st= idxst.st()
    m= rx_StringLiteralStart.match(st,pos)
    if m is None:
        raise ParseException("start of string expected at",
                             rowcol= idxst.rowcol(pos))
    prefix= m.group(1)
    starter= m.group(2) # """ or ''' or " or '
    #is_unicode= False
    #is_raw= False
    #if -1!=prefix.find("r"):
    #    is_raw= True
    #elif -1!=prefix.find("R"):
    #    is_raw= True
    #if -1!=prefix.find("u"):
    #    is_unicode= True
    #elif -1!=prefix.find("U"):
    #    is_unicode= True
    startpos= pos+len(prefix)+len(starter)
    while True:
        idx= st.find(starter, startpos)
        # if startpos>len(st), idx is also -1
        if idx==-1:
            raise ParseException("end of string not found at",
                                 rowcol= idxst.rowcol(pos))
        if st[idx-1]=="\\":
            # maybe escaped quote char
            e= None
            try:
                if st[idx-2]!="\\":
                    # only then it is an escaped quote char
                    startpos= idx+1
                    continue
            except IndexError as _e:
                e= _e
            if e is not None:
                raise ParseException("end of string not found at",
                                     rowcol= idxst.rowcol(pos))
        break
    if len(starter)==1:
        # simple single quoted string
        return(pos,idx+1)
    return(pos,idx+3)

[docs]def parseComment(idxst,pos):
    r"""parse a python comment.

    Here are some examples:

    >>> import os
    >>> def test(st,pos,sep=None):
    ...     if sep:
    ...         change_linesep(sep)
    ...     idxst= IndexedString(st)
    ...     (a,b)= parseComment(idxst,pos)
    ...     print(repr(st[a:b]))
    ...     change_linesep(os.linesep)
    ...
    >>> test("#abc",0)
    '#abc'
    >>> test("#abc\nef",0,"\n")
    '#abc\n'
    >>> test("#abc\r\nef",0,"\r\n")
    '#abc\r\n'
    >>> test("xy#abc",2)
    '#abc'
    >>> test("xy#abc\nef",2,"\n")
    '#abc\n'
    >>> test("xy#abc\nef",3)
    Traceback (most recent call last):
        ...
    ParseException: start of comment not found at line 1, col 4
    """
    if not isinstance(idxst, IndexedString):
        raise TypeError("idxst par wrong: %s" % repr(idxst))
    st= idxst.st()
    if st[pos]!="#":
        raise ParseException("start of comment not found at",
                             rowcol= idxst.rowcol(pos))
    idx_lf= st.find(LINESEP,pos+1)
    if idx_lf==-1:
        return(pos, len(st))
    return(pos,idx_lf+LINESEP_LEN)

rx_CodePart= re.compile(r'''((?:UR|Ur|uR|ur|r|u|R|U|)(?:"""|''' + """'''""" + \
                        r'''|'|")|#|\(|\))''')
[docs]def parseCode(idxst,pos):
    r"""parse python code, it MUST start with a '('.

    Here are some examples:

    >>> def test(st,pos):
    ...     idxst= IndexedString(st)
    ...     (a,b)= parseCode(idxst,pos)
    ...     print(st[a:b])
    ...
    >>> test(r'(a+b)',0)
    (a+b)
    >>> test(r'(a+(b*c))',0)
    (a+(b*c))
    >>> test(r'(a+(b*c)+")")',0)
    (a+(b*c)+")")
    >>> test(r"(a+(b*c)+''')''')",0)
    (a+(b*c)+''')''')
    >>> test(r"(a+(b*c)+''')'''+# comment )\n)",0)
    Traceback (most recent call last):
        ...
    ParseException: end of bracket expression not found at line 1, col 1
    >>>
    >>> test("(a+(b*c)+''')'''+# comment )\n)",0)
    (a+(b*c)+''')'''+# comment )
    )
    """
    if not isinstance(idxst, IndexedString):
        raise TypeError("idxst par wrong: %s" % repr(idxst))
    st= idxst.st()
    if st[pos]!="(":
        raise ParseException("start of bracket expression not found at",
                             rowcol= idxst.rowcol(pos))
    startpos= pos+1
    while True:
        m= rx_CodePart.search(st, startpos)
        if m is None:
            raise ParseException("end of bracket expression not found at",
                                 rowcol= idxst.rowcol(pos))
        matched= m.group(1)
        if matched=="#":
            # a comment
            (_,b)= parseComment(idxst, m.start())
            startpos= b
            continue
        if matched=="(":
            # an inner bracket
            (_,b)= parseCode(idxst, m.start())
            startpos= b
            continue
        if matched==")":
            return(pos,m.start()+1)
        # from here it must be a string literal
        (_,b)= parseStringLiteral(idxst, m.start())
        startpos= b
        continue

[docs]class ParsedItem:
    """base class of parsed items."""
    def __init__(self, idxst, start, end):
        if not isinstance(idxst, IndexedString):
            raise TypeError("idxst par wrong: %s" % repr(idxst))
        self._idxst= idxst
        self._start= start
        self._end= end
[docs]    def string(self):
        """return the string that represents the ParsedItem."""
        return self._idxst.st()[self._start:self._end+1]
[docs]    def start(self):
        """return the start of the ParsedItem in the source string."""
        return self._start
[docs]    def end(self):
        """return the end of the ParsedItem in the source string."""
        return self._end
[docs]    def rowcol(self, pos= None):
        """calculate (row,column) from a string position."""
        if pos is None:
            pos= self.start()
        return self._idxst.rowcol(pos)
[docs]    def positions(self):
        """return start and end of ParsedItem in the source string."""
        return "(%d, %d)" % (self._start, self._end)
    def __str__(self):
        return "('%s', %s, %s)" % (self.__class__.__name__, \
               self.positions(), repr(self.string()))
    def __repr__(self):
        return "%s(%s, %s, %s)" % (self.__class__.__name__, \
                repr(self._idxst), repr(self._start), repr(self._end))

[docs]class ParsedLiteral(ParsedItem):
    """class of a parsed literal.

    A literal is a substring in the input that shouldn't be modified by
    pyexpander.
    """
    def __init__(self, idxst, start, end):
        ParsedItem.__init__(self, idxst, start, end)

[docs]class ParsedComment(ParsedItem):
    """class of a parsed comment.

    A comment in pyexpander starts with '$#'.
    """
    def __init__(self, idxst, start, end):
        ParsedItem.__init__(self, idxst, start, end)

[docs]class ParsedVar(ParsedItem):
    """class of a parsed variable.

    A variable in pyexpander has the form "$(identifier)".
    """
    def __init__(self, idxst, start, end):
        ParsedItem.__init__(self, idxst, start, end)

[docs]class ParsedEval(ParsedItem):
    """class of an pyexpander expression.

    A pyexpander expression has the form "$(expression)" e.g. "$(a+1)". This is
    different from ParsedVar where the string within the brackets is a simple
    identifier.
    """
    def __init__(self, idxst, start, end):
        ParsedItem.__init__(self, idxst, start, end)

[docs]class ParsedPureCommand(ParsedItem):
    """class of a pyexpander command without arguments.

    A pure command has the form "$name". Such a command has no arguments which
    would be enclosed in round brackets immediately following the name.
    """
    def __init__(self, idxst, start, end):
        ParsedItem.__init__(self, idxst, start, end)

[docs]class ParsedCommand(ParsedItem):
    """class of a pyexpander command with arguments.

    A command has the form "$name(argument1, argument2, ...)".
    """
    def __init__(self, idxst, start, end, ident):
        ParsedItem.__init__(self, idxst, start, end)
        self.ident= ident
[docs]    def args(self):
        """return the arguments of the command."""
        return self.string()
    def __str__(self):
        return "('%s', %s, %s, %s)" % (self.__class__.__name__, \
               self.positions(), repr(self.string()), \
               repr(self.ident))
    def __repr__(self):
        return "%s(%s, %s, %s, %s)" % (self.__class__.__name__, \
                repr(self._idxst), repr(self._start), repr(self._end), \
                repr(self.ident))


rx_DollarFollows= re.compile(r'\s*([A-Za-z_]\w*|\(|\{|#)')
rx_Bracket= re.compile(r'\s*\(')

[docs]def parseDollar(idxst, pos):
    r"""parse things that follow a dollar.

    Here are some examples:

    >>> def test(st,pos):
    ...   idxst= IndexedString(st)
    ...   (p,elm)= parseDollar(idxst,pos)
    ...   print("Parsed: %s" % elm)
    ...   print("rest of string:%s" % st[p:])
    ...
    >>> test("$abc",0)
    Parsed: ('ParsedPureCommand', (1, 3), 'abc')
    rest of string:
    >>> test("$abc%&/",0)
    Parsed: ('ParsedPureCommand', (1, 3), 'abc')
    rest of string:%&/
    >>> test("$abc(2*3)",0)
    Parsed: ('ParsedCommand', (5, 7), '2*3', 'abc')
    rest of string:
    >>> test(" $abc(2*sin(x))",1)
    Parsed: ('ParsedCommand', (6, 13), '2*sin(x)', 'abc')
    rest of string:
    >>> test(" $abc(2*sin(x))bn",1)
    Parsed: ('ParsedCommand', (6, 13), '2*sin(x)', 'abc')
    rest of string:bn
    >>> test(" $# a comment\nnew line",1)
    Parsed: ('ParsedComment', (3, 13), ' a comment\n')
    rest of string:new line
    >>> test("$(abc)",0)
    Parsed: ('ParsedVar', (2, 4), 'abc')
    rest of string:
    >>> test("$(abc*2)",0)
    Parsed: ('ParsedEval', (2, 6), 'abc*2')
    rest of string:
    >>> test(" $(2*x(y))abc",1)
    Parsed: ('ParsedEval', (3, 8), '2*x(y)')
    rest of string:abc
    >>> test(" $   (2*x(y))abc",1)
    Parsed: ('ParsedEval', (6, 11), '2*x(y)')
    rest of string:abc
    """
    if not isinstance(idxst, IndexedString):
        raise TypeError("idxst par wrong: %s" % repr(idxst))
    st= idxst.st()
    if st[pos]!="$":
        raise ParseException("'$' expected at",
                             rowcol= idxst.rowcol(pos))
    m= rx_DollarFollows.match(st, pos+1)
    if m is None:
        raise ParseException("unexpected characters after '$' at",
                             rowcol= idxst.rowcol(pos))
    pos= m.start(1) # start of group 1
    matched= m.group(1)
    if matched=="#":
        # an expander comment
        (a, b)= parseComment(idxst, pos)
        elm= ParsedComment(idxst, a+1, b-1)
        return (b, elm)
    if matched=="(":
        (a, b)= parseCode(idxst, pos)
        m_ident= rx_pyIdent.match(st, a+1, b-1)
        if m_ident is not None:
            elm= ParsedVar(idxst, a+1, b-2)
        else:
            elm= ParsedEval(idxst, a+1, b-2)
        return (b, elm)
    if matched=="{":
        # a purecommand enclosed in "{}" brackets
        (a, b)= parseBracketed(idxst, pos)
        elm= ParsedPureCommand(idxst, a+1, b-2)
        return (b, elm)
    # from here: a purecommand or a command
    # skip spaces and look for an opening bracket:
    mb= rx_Bracket.match(st, m.end())
    if mb is not None:
        # the bracket was found, parse the python code enclosed in brackets:
        (a, b)= parseCode(idxst, mb.end()-1)
        elm= ParsedCommand(idxst, a+1, b-2, matched)
        return (b, elm)
    elm= ParsedPureCommand(idxst, pos, m.end()-1)
    return (m.end(), elm)

[docs]def parseBackslash(idxst, pos):
    r"""parses a backslash.

    >>> import os
    >>> def test(st,pos,sep=None):
    ...     if sep:
    ...         change_linesep(sep)
    ...     idxst= IndexedString(st)
    ...     (p,elm)= parseBackslash(idxst,pos)
    ...     print("Parsed: %s" % elm)
    ...     print("rest of string:%s" % repr(st[p:]))
    ...     change_linesep(os.linesep)
    ...
    >>> test(r"\abc",0)
    Parsed: ('ParsedLiteral', (0, 0), '\\')
    rest of string:'abc'
    >>> test("\\",0)
    Parsed: ('ParsedLiteral', (0, 0), '\\')
    rest of string:''
    >>> test("\\\rab",0,"\r")
    Parsed: None
    rest of string:'ab'
    >>> test("\\\rab",0,"\n")
    Parsed: ('ParsedLiteral', (0, 0), '\\')
    rest of string:'\rab'
    >>> test("\\\rab",0,"\r\n")
    Parsed: ('ParsedLiteral', (0, 0), '\\')
    rest of string:'\rab'
    >>> test("\\\nab",0,"\r")
    Parsed: ('ParsedLiteral', (0, 0), '\\')
    rest of string:'\nab'
    >>> test("\\\nab",0,"\n")
    Parsed: None
    rest of string:'ab'
    >>> test("\\\nab",0,"\r\n")
    Parsed: ('ParsedLiteral', (0, 0), '\\')
    rest of string:'\nab'
    >>> test("\\\r\nab",0,"\r")
    Parsed: None
    rest of string:'\nab'
    >>> test("\\\r\nab",0,"\n")
    Parsed: ('ParsedLiteral', (0, 0), '\\')
    rest of string:'\r\nab'
    >>> test("\\\r\nab",0,"\r\n")
    Parsed: None
    rest of string:'ab'
    """
    # backslash found
    if not isinstance(idxst, IndexedString):
        raise TypeError("idxst par wrong: %s" % repr(idxst))
    st= idxst.st()
    if st[pos]!="\\":
        raise ParseException("backslash expected at",
                             rowcol= idxst.rowcol(pos))
    l_= len(st)
    # pylint: disable= no-else-return
    if pos+1>=l_:
        # no more characters
        elm= ParsedLiteral(idxst, pos, pos)
        return (pos+1, elm)
    else:
        # at least one more character
        nextchar= st[pos+1]
        if nextchar=="\\":
            elm= ParsedLiteral(idxst, pos, pos)
            return (pos+2, elm)
        if nextchar=="$":
            elm= ParsedLiteral(idxst, pos+1, pos+1)
            return (pos+2, elm)
        if pos+LINESEP_LEN>=l_:
            # not enough characters for LINESEP
            elm= ParsedLiteral(idxst, pos, pos)
            return (pos+1, elm)
        else:
            for i in range(LINESEP_LEN):
                if st[pos+1+i]!=LINESEP[i]:
                    # no line separator follows
                    elm= ParsedLiteral(idxst, pos, pos)
                    return (pos+1, elm)
            return(pos+LINESEP_LEN+1, None)

rx_top= re.compile(r'(\$|\\)')
[docs]def parseAll(idxst, pos):
    r"""parse everything.

    >>> def test(st,pos):
    ...     idxst= IndexedString(st)
    ...     pprint(parseAll(idxst,pos))
    ...

    >>> test("abc",0)
    ('ParsedLiteral', (0, 2), 'abc')
    >>> test("abc$xyz",0)
    ('ParsedLiteral', (0, 2), 'abc')
    ('ParsedPureCommand', (4, 6), 'xyz')
    >>> test("abc${xyz}efg",0)
    ('ParsedLiteral', (0, 2), 'abc')
    ('ParsedPureCommand', (5, 7), 'xyz')
    ('ParsedLiteral', (9, 11), 'efg')
    >>> test("abc$xyz(2*4)",0)
    ('ParsedLiteral', (0, 2), 'abc')
    ('ParsedCommand', (8, 10), '2*4', 'xyz')
    >>> test("abc$(2*4)ab",0)
    ('ParsedLiteral', (0, 2), 'abc')
    ('ParsedEval', (5, 7), '2*4')
    ('ParsedLiteral', (9, 10), 'ab')
    >>> test("abc\\$(2*4)ab",0)
    ('ParsedLiteral', (0, 2), 'abc')
    ('ParsedLiteral', (4, 4), '$')
    ('ParsedLiteral', (5, 11), '(2*4)ab')
    >>> test("ab$func(1+2)\\\nnew line",0)
    ('ParsedLiteral', (0, 1), 'ab')
    ('ParsedCommand', (8, 10), '1+2', 'func')
    ('ParsedLiteral', (14, 21), 'new line')
    >>> test("ab$func(1+2)\nnew line",0)
    ('ParsedLiteral', (0, 1), 'ab')
    ('ParsedCommand', (8, 10), '1+2', 'func')
    ('ParsedLiteral', (12, 20), '\nnew line')
    >>> test("ab$(xyz)(56)",0)
    ('ParsedLiteral', (0, 1), 'ab')
    ('ParsedVar', (4, 6), 'xyz')
    ('ParsedLiteral', (8, 11), '(56)')

    >>> test(r'''
    ... Some text with a macro: $(xy)
    ... an escaped dollar: \$(xy)
    ... a macro within letters: abc${xy}def
    ... a pyexpander command structure:
    ... $if(a=1)
    ... here
    ... $else
    ... there
    ... $endif
    ... now a continued\
    ... line
    ... from here:$# the rest is a comment
    ... now an escaped continued\\
    ... line
    ... ''',0)
    ('ParsedLiteral', (0, 24), '\nSome text with a macro: ')
    ('ParsedVar', (27, 28), 'xy')
    ('ParsedLiteral', (30, 49), '\nan escaped dollar: ')
    ('ParsedLiteral', (51, 51), '$')
    ('ParsedLiteral', (52, 83), '(xy)\na macro within letters: abc')
    ('ParsedPureCommand', (86, 87), 'xy')
    ('ParsedLiteral', (89, 124), 'def\na pyexpander command structure:\n')
    ('ParsedCommand', (129, 131), 'a=1', 'if')
    ('ParsedLiteral', (133, 138), '\nhere\n')
    ('ParsedPureCommand', (140, 143), 'else')
    ('ParsedLiteral', (144, 150), '\nthere\n')
    ('ParsedPureCommand', (152, 156), 'endif')
    ('ParsedLiteral', (157, 172), '\nnow a continued')
    ('ParsedLiteral', (175, 189), 'line\nfrom here:')
    ('ParsedComment', (192, 214), ' the rest is a comment\n')
    ('ParsedLiteral', (215, 238), 'now an escaped continued')
    ('ParsedLiteral', (239, 239), '\\')
    ('ParsedLiteral', (241, 246), '\nline\n')
    """
    if not isinstance(idxst, IndexedString):
        raise TypeError("idxst par wrong: %s" % repr(idxst))
    st= idxst.st()
    parselist=[]
    l= len(st)
    while True:
        if pos>=l:
            return parselist
        m= rx_top.search(st, pos)
        if m is None:
            parselist.append(ParsedLiteral(idxst, pos, len(st)-1))
            return parselist
        if m.start()>pos:
            parselist.append(ParsedLiteral(idxst, pos, m.start()-1))
        if m.group(1)=="\\":
            (p, elm)= parseBackslash(idxst, m.start())
            if elm is not None:
                parselist.append(elm)
            pos= p
            continue
        # from here it must be a dollar sign
        (pos, elm)= parseDollar(idxst, m.start())
        parselist.append(elm)
        continue

[docs]def pprint(parselist):
    """pretty print a parselist."""
    for elm in parselist:
        print(str(elm))

def _test():
    """perform the doctest tests."""
    # pylint: disable= import-outside-toplevel
    import doctest
    print("testing...")
    doctest.testmod()
    print("done")

if __name__ == "__main__":
    _test()