DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

Redivider Interpreter In Python

11.14.2008
| 1730 views |
  • submit to reddit
        An interpreter for the Redivider esoteric language. Written by MizardX.

More information: http://esolangs.org/wiki/Redivider

#!/bin/env python

# Author: MizardX
# 
# Feel free to do whatever you want with it.

from __future__ import with_statement

import sys
import os
import re
import getopt
import functools
import traceback

class HardFail(RuntimeError):
    pass

class ParserError(SyntaxError):
    pass


class Environment(object):
    def __init__(self,parent=None):
        self.vars = {}
        self.parent = parent
    def lookup(self,name):
        if name in self.vars:
            return 'var', self.vars[name]
        if self.parent:
            return self.parent.lookup(name)
        raise NameError("No variable or declaration named %r" % name)
    def assign(self,name,value):
        # overwriting or shadowing any other vars with same name
        self.vars[name] = value
    def assign_all(self,dict):
        for name,value in dict.iteritems():
            self.assign(name,value)
    def __contains__(self,name):
        if name in self.vars:
            return True
        elif self.parent and name in self.parent:
            return True
        return False
    def has_localy(self,name):
        return name in self.vars


class Node(object):
    pass

# <program> ::= {<declaration>}
class Program(Environment,Node):
    def __init__(self):
        super(Program,self).__init__()
        self.parent = None
    def assign(self,name,value):
        if self.has_localy(name):
            raise KeyError("Duplicate declaration: %s" % name)
        super(Program,self).assign(name,value)
    def lookup(self,name):
        if name in self.vars:
            return 'decl', self.vars[name]
        raise NameError("No variable or declaration named %r" % name)
    def run(self,name,input):
        type, parser = self.lookup(name)
        return parser.bind(()).run(self,input)
    def __str__(self):
        return '\n'.join(str(d) for d in self.vars.values())

# <declaration> ::= <word> ["(" [<word> {"," <word>}] ")"] ":" <piexpr> "."
class Declaration(Node):
    def __init__(self,name,params,expr):
        self.name = name
        self.params = tuple(params)
        self.expr = expr
    def bind(self,args):
        if len(args) != len(self.params):
            raise ValueError("Invalid number of arguments to %r" % self.name)
        arg_dict = dict(zip(self.params,args))
        return BoundExpression(self, arg_dict)
    def __str__(self):
        return '%s(%s): %s' % (self.name, ', '.join(str(p) for p in self.params), self.expr)

class BoundExpression(Node):
    def __init__(self,decl,arg_dict):
        self.decl = decl
        self.arg_dict = arg_dict
    def run(self,env,input):
        # Throw away any block scopes, keep only the global scope
        while env.parent is not None:
            env = env.parent
        env = Environment(env)
        env.assign_all(self.arg_dict)
        return self.decl.expr.run(env,input)
    def __str__(self):
        return repr(self.arg_dict) + ' => ' + str(self.expr)

# <piexpr> ::= <plexpr> "|" <piexpr> | <plexpr>
class Alternation(Node):
    def __init__(self,branches):
        self.branches = tuple(branches)
    def run(self,env,input):
        for branch in self.branches:
            result = branch.run(env,input)
            if result is not None:
                return result
        return None
    def __str__(self):
        return '(%s)' % ' | '.join(str(b) for b in self.branches)

# <plexpr> ::= <wexpr> "+" <plexpr> | <wexpr>
class Concaternation(Node):
    def __init__(self,joins):
        self.joins = tuple(joins)
    def run(self,env,input):
        parts = []
        for index,join in enumerate(self.joins):
            result = join.run(env,input)
            if result is None:
                if index > 0:
                    raise HardFail("Sub-expression failed: %s" % join)
                return None
            value,input = result
            parts.append(value)
        return ''.join(parts), input
    def __str__(self):
        return '(%s)' % ' + '.join(str(j) for j in self.joins)

# <wexpr> ::= "(" <piexpr> ")"
# <wexpr> ::= <string>
class String(Node):
    def __init__(self,value):
        self.value = value
    def run(self,env,input):
        return self.value, input
    def __str__(self):
        return repr(self.value)

# <wexpr> ::= <regex>
class Regex(Node):
    def __init__(self,pattern):
        self.source = pattern
        self.pattern = re.compile(pattern)
    def run(self,env,input):
        match = self.pattern.match(input)
        if match:
            return match.group(), input[match.end():]
        else:
            return None
    def __str__(self):
        return '/%s/' % self.source

# <wexpr> ::= <word>
class Name(Node):
    def __init__(self,value):
        self.value = value
    def run(self,env,input):
        if self.value in env:
            type, value = env.lookup(self.value)
            if type == 'var':
                return value, input
            elif type == 'decl':
                return value.bind(()).run(env,input)
        raise HardFail("Undefined name: %r" % self.value)
    def __str__(self):
        return self.value

# <wexpr> ::= <word> <params>
# <params> ::= ["(" [<piexpr> {"," <piexpr>}] ")"]
class Call(Node):
    def __init__(self,name,argexprs):
        self.name = name
        self.argexprs = argexprs
    def run(self,env,input):
        arguments = []
        type, parser = env.lookup(self.name)
        if type != 'decl':
            raise HardFail("Not a parser: %r" % self.name)
        # Evaluate each argument in the current scope
        for index,arg in enumerate(self.argexprs):
            result = arg.run(env,input)
            if result is None:
                if index > 0:
                    raise HardFail("Sub expression soft failed: %s" % self)
                return None
            value, input = result
            arguments.append(value)
        return parser.bind(arguments).run(env,input)
    def __str__(self):
        return "%s(%s)" % (self.name, ', '.join(str(a) for a in self.argexprs))

# <wexpr> ::= "{" <blockguts> "}"
# <blockguts> ::= {<blockline> ";"} <piexpr>
# <blockline> ::= [<word> ":"] <piexpr>
class Block(Node):
    def __init__(self,lines):
        self.lines = tuple(lines)
    def run(self,env,input):
        value = None
        # Create a new scope for the block
        env2 = Environment(env)
        for index,(name,expr) in enumerate(self.lines):
            result = expr.run(env2,input)
            if result is None:
                if index > 0:
                    raise HardFail("Sub expression failed: %s" % expr)
                return None
            value, input = result
            if name:
                env2.assign(name,value)
        return value, input
    def __str__(self):
        return "{ %s }" % '; '.join('%s%s' % (('%s: ' % n) if n else '', e) for n,e in self.lines)

# <wexpr> ::= <wexpr> "[" <piexpr> "]"
class Run(Node):
    def __init__(self,parser,expr):
        self.parser = parser
        self.expr = expr
    def run(self,env,input):
        result = self.expr.run(env,input)
        if result is None:
            return None
        value1, input1 = result

        result = self.parser.run(env,value1)
        if result is None:
            return None
        value2, input2 = result

        return value2, input1
    def __str__(self):
        return '%s[ %s ]' % (self.parser, self.expr)

class Lexer(object):
    SKIP = {
        'whitespace': re.compile(r'[^\S\n]+'),
        'newline': re.compile(r'\n'),
        'comment': re.compile(r'#.*'),
    }
    
    TOKENS = {
        'word': re.compile(r'(\w+)'),
        'string': re.compile(r'"((?:\\.|[^"])*)"'),
        'regex': re.compile(r'/((?:\\.|[^/])*)/'),
        'lparen': re.compile(r'(\()'),
        'comma': re.compile(r'(,)'),
        'rparen': re.compile(r'(\))'),
        'colon': re.compile(r'(:)'),
        'bar': re.compile(r'(\|)'),
        'plus': re.compile(r'(\+)'),
        'lbrace': re.compile(r'(\{)'),
        'semicolon': re.compile(r'(;)'),
        'rbrace': re.compile(r'(})'),
        'dot': re.compile(r'(\.)'),
        'lbracket': re.compile(r'(\[)'),
        'rbracket': re.compile(r'(\])'),
        'eof': re.compile(r'()$'),
    }

    def __init__(self,text,pos=0):
        self.text = text
        self.pos = 0
        self.line = 1

    def __iter__(self):
        return self

    def next(self):
        if self.pos >= len(self.text):
            return 'eof', ''

        self.skip_whitespace()

        for name,pattern in self.TOKENS.iteritems():
            match = pattern.match(self.text,self.pos)
            if match:
                self.pos = match.end()
                return name, match.group(1)

        token = self.text[self.pos:self.pos+1]
        token = repr(token) if token else '<eof>'
        raise SyntaxError("Invalid token: %s" % token)

    def skip_whitespace(self):
        while True:
            for name,pattern in self.SKIP.iteritems():
                match = pattern.match(self.text,self.pos)
                if match:
                    self.pos = match.end()
                    if name == 'newline': self.line += 1
                    break
            else:
                return

class Parser(object):
    def __init__(self,text,pos=0):
        self.text = text
        self.lexer = Lexer(text,pos)
        self.buffer = []
        self._next()

    def _error(self,format,*args):
        message = "Line %d: %s" % (self.lexer.line, format % args)
        raise ParserError(message)

    def _next(self):
        if self.buffer:
            self.ttype, self.tval = self.buffer.pop()
        else:
            self.ttype, self.tval = self.lexer.next()

    def _pushback(self,ttype,tval):
        self.buffer.append((self.ttype,self.tval))
        self.ttype,self.tval = ttype,tval

    def _expect(self,ttype):
        if self.ttype == ttype:
            value = self.tval
            self._next()
            return value
        self._error('Expected: %s, found: %s', ttype, self.ttype)

    def _peek(self,*ttypes):
        if self.ttype in ttypes:
            return True
        else:
            return False

    def _match(self,ttype):
        if self.ttype == ttype:
            value = self.tval
            self._next()
            return value
        return None

    def parse(self):
        program = Program()
        while not self._peek('eof'):
            name = self._expect('word')
            params = []
            if self._match('lparen'):
                if not self._peek('rparen'):
                    params.append(self._expect('word'))
                    while self._match('comma'):
                        params.append(self._expect('word'))
                self._expect('rparen')
            self._expect('colon')
            expr = self.parse_piexpr()
            self._expect('dot')
            decl = Declaration(name,params,expr)
            program.assign(name,decl)
        return program

    def parse_piexpr(self):
        alternatives = []

        plexpr = self.parse_plexpr()
        alternatives.append(plexpr)
        while self._match('bar'):
            plexpr = self.parse_plexpr()
            alternatives.append(plexpr)


        if len(alternatives) == 1:
            return alternatives[0]

        return Alternation(alternatives)

    def parse_plexpr(self):
        joins = []

        cexpr = self.parse_cexpr()
        joins.append(cexpr)
        while self._match('plus'):
            cexpr = self.parse_cexpr()
            joins.append(cexpr)


        if len(joins) == 1:
            return joins[0]

        return Concaternation(joins)

    def parse_cexpr(self):
        wexpr = self.parse_wexpr()
        while self._match('lbracket'):
            argument = self.parse_piexpr()
            self._expect('rbracket')
            wexpr = Run(wexpr,argument)
        return wexpr

    def parse_wexpr(self):
        name = self._match('word')
        if name:
            # Name, possibly with parenthesis
            if self._match('lparen'):
                params = []
                if not self._peek('rparen'):
                    params.append(self.parse_piexpr())
                    while self._match('comma'):
                        params.append(self.parse_piexpr())
                self._expect('rparen')
                return Call(name, params)
            return Name(name)
        elif self._match('lbrace'):
            # Block
            lines = []
            while not self._peek('rbrace'):
                name = self._match('word')
                if name and not self._match('colon'):
                    self._pushback('word',name)
                    name = None
                expr = self.parse_piexpr()
                lines.append((name,expr))
                if not self._match('semicolon'):
                    break
            self._expect('rbrace')
            return Block(lines)
        elif self._match('lparen'):
            # Simple parenthesis
            expr = self.parse_piexpr()
            self._expect('rparen')
            return expr

        # String
        string = self._match('string')
        if string is not None:
            return String(unescape_string(string))

        # Regex
        regex = self._match('regex')
        if regex is not None:
            return Regex(regex)

        # Nothing matched ?!
        self._error("Unknown token: %s", self.ttype)

def unescape_string(s):
    escapes_re = re.compile(r'\\([abfnrv\\"])|\\0([0-7]{3})|\\0x([\da-fA-F]{2})')
    def unescaper(match):
        if match.group(1):
            return "\a\b\f\n\r\v\\\""["abfnrv\\\"".index(match.group(1))]
        elif match.group(2):
            return chr(int(match.group(2),8))
        elif match.group(3):
            return chr(int(match.group(3),16))
    return escapes_re.sub(unescaper,s)

def run(source,name,input):
   program = Parser(source).parse()
   result = program.run(name,input)
   return result

def main(argv):
    try:
        if len(argv) < 2:
            raise ValueError

        fn = argv[1]
        if not os.path.isfile(fn):
            print 'File not found: %s' % fn
            raise ValueError

        with open(fn,'r') as f:
            program = f.read()

        inputs = argv[2:]

    except ValueError:
        print_syntax()
        return

    program = Parser(program).parse()
    predicate = "main"

    if inputs:
        for fn in inputs:
            print>>sys.stderr,':%s:' % fn
            with open(fn,'r') as f:
                text = f.read()
            try:
                result = program.run(predicate,text)
            except HardFail, exc:
                print exc
                continue
            if result is not None:
                value, tail = result
                print value
            else:
                print '%s: Invalid input. (Soft fail)' % fn
    else:
        text = sys.stdin.read()
        try:
            result = program.run(predicate,text)
        except HardFail, exc:
            print exc
            return
        if result is not None:
            value, tail = result
            print value
        else:
            print 'Invalid input. (Soft fail)'

def print_syntax():
    print """
Syntax: python %(file)s program.rd [infiles...]

program.rd          Program read from program.rd.

infiles...          Input will read from these files, or the standard
                    input if no infiles are specified.
""" % {'file':__file__}

if __name__ == '__main__':
    sys.setrecursionlimit(10000)
    main(sys.argv)