1. 3

  2. 1

    So, here is my attempt at a left recursive PEG parser following Guido’s approach, but simplified to the extreme (original without left recursion handling from here), also posted previously. Rather than embedding actions, I think a better approach is to postpone actions to a later time after parsing.

    First, the grammar for the PEG (left recursive keys are indicated with a single element tuple):

    term_grammar = {
        'expr': [
            [('expr',), 'add_op', 'expr'],
            [('expr',), 'mul_op', 'expr'],
        'fact': [
        'digits': [
        'digit': [[str(i)] for i in list(range(10))],
        'add_op': [['+'], ['-']],
        'mul_op': [['*'], ['/']]

    Now the parser

    prev = {}
    class peg_parse:
        def __init__(self, grammar):
            self.grammar = {k:[tuple(l) for l in rules] for k,rules in grammar.items()}
        def literal_match(self, part, text, tfrom):
            return (tfrom + len(part), (part, [])) if text[tfrom:].startswith(part) else (tfrom, None)
        def unify_key(self, key, text, at):
            if key not in self.grammar:
                return (at + len(key), (key, [])) if text[at:].startswith(key) else (at, None)
            rules = self.grammar[key]
            for rule in rules:
                l, res = self.unify_rule(rule, text, at)
                if res is not None: return l, (key, res)
            return (0, None)
        def unify_rule(self, parts, text, tfrom):
            results = []
            for i,part in enumerate(parts):
                if isinstance(part, tuple):
                    tfrom, res = self.unify_key_recursive(part[0], text, tfrom)
                    tfrom, res = self.unify_key(part, text, tfrom)
                if res is None: return tfrom, None
            return tfrom, results
        def unify_key_recursive(self, key, text, at):
            if prev.get((key, at), None) is not None:
                return prev[(key, at)]
            current = (0, None)
            prev[(key, at)] = current
            rules = self.grammar[key]
            while True:
                l = None
                for rule in rules:
                    l, res = self.unify_rule(rule, text, at)
                    if res is not None: break
                if l is None: return current
                if l > current[0]:
                    prev[(key, at)] = (l, res)
                    current = (l, res)
                    prev[(key, at)] = None
                    return current

    Using it:

    def main(to_parse):
        result = peg_parse(term_grammar).unify_key_recursive('expr', to_parse, 0)
        assert (len(to_parse) - result[0]) == 0
    if __name__ == '__main__':

    It parses 1+(22*356-1) to

      [('fact', [('digits', [('digit', [('1', [])])])])],
      ('add_op', [('+', [])]),
          [('(', []),
                [('fact', [('digits', [('digit', [('2', [])]), ('digits', [('digit', [('2', [])])])])])],
                ('mul_op', [('*', [])]),
                  [('fact', [('digits', [
                      ('digit', [('3', [])]),
                      ('digits', [('digit', [('5', [])]), ('digits', [('digit', [('6', [])])])])])])])],
              ('add_op', [('-', [])]),
              ('expr', [('fact', [('digits', [('digit', [('1', [])])])])])],
           (')', [])])])

    Anything I missed?