diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..21fb2c6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ + __pycache__ +.DS_Store +*.o +*.log +*.dot +.env + diff --git a/README.md b/README.md index 305027e..6a80c49 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,61 @@ -# simple_interpreter -simple demo for python interpreter +# simple_interpreter (A python interpreter implemented in python.) +一个用 python 实现的简单python解释器,分版本(与分支对应)逐步实现一个简单的python解释器功能,适合初学者了解解释器的工作原理 + +## 版本说明 +为了方便渐进式学习进度,每一个版本都创建了一个独立的分支,比如 v1.0版本对应的分支名为 v1.0, 该分支只实现了 v1.0 的功能,以此类推,逐步进行功能迭代。 +### v1.0 +only support single-digit integers + + +支持整数的加法运算 +### v2.0 +support multi-digit integers +/-, support process whitespace + +支持加法减法,支持处理表达式中的空格 +### v3.0 +support to parse (recognize) and interpret arithmetic expressions that have any number of plus or minus operators in it, for example “7 - 3 + 2 - 1”. + +支持包含多个数字的加减表达式 +### v4.0 +support to parse and interpret arithmetic expressions with any number of multiplication and division operators in them, for example “7 * 4 / 2 * 3” + +支持包含多个数字的乘除表达式 +### v5.0 +support to handle valid arithmetic expressions containing integers and any number of addition, subtraction, multiplication, and division operators. + +支持包含多个数字的加减乘除混合表达式 +### v6.0 +support to evaluates arithmetic expressions that have different operators and parentheses. + +支持包含括号的混合表达式处理 +### v7.0 +using ASTs represent the operator-operand model of arithmetic expressions. +支持使用 AST (abstract syntax tree 抽象语法树)来表示算术表达式 +#### 语法树可视化 +```shell +python genastdot.py "7 + 3 * (10 / (12 / (3 + 1) - 1))" > ast.dot && dot -Tpng -o ast.png ast.dot +``` +![ast.png](ast.png) + +执行之前需要先按照dot, 参考:https://graphviz.org/ + +### v8.0 +support unary operators (+, -) +```shell + python genastdot.py "5---2" > ast.dot && dot -Tpng -o ast_v8.png ast.dot +``` +![ast_v8.png](ast_v8.png) + + +### v9.0 +support to handle python assignment statements. + +```shell +python interpreter.py assignments.txt +``` + +```shell +python genastdot.py assignments.txt > ast.dot && dot -Tpng -o ast_v9.png ast.dot +``` + +### v10.0 +增加符号表记录变量的定义,以处理使用未处理的变量 diff --git a/abs_syntax_tree.py b/abs_syntax_tree.py new file mode 100644 index 0000000..73d7a25 --- /dev/null +++ b/abs_syntax_tree.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +@file: abs_syntax_tree.py +@author: amazing coder +@date: 2024/8/28 +@desc: abstract-syntax tree (AST) 抽象语法树节点定义 +eg: 2 + 3 * 4 +mul_token = Token(MUL, '*') +plus_token = Token(PLUS, '+') +node_mul = BinOp(Num(Token(INTEGER, 3)), mul_token, Num(Token(INTEGER, 4))) +node_plus = BinOp(Num(Token(INTEGER, 2), plus_token, node_mul)) +""" + + +class AST(object): + """ + ASTs represent the operator-operand model. + 每一个 AST 节点都代表一个运算符和一个操作数 + """ + def __init__(self): + pass + + +class BinOp(AST): + """ + 二元运算符节点,也是非叶子节点,代表一个二元运算符 + 比如 2 + 3 这个表达式,2 和 3 都是叶子节点,+ 是二元运算符节点 + """ + def __init__(self, left, op, right): + self.left = left + self.token = self.op = op + self.right = right + + +class Num(AST): + """ + 数字节点, 也是叶子节点,代表一个数字 + """ + def __init__(self, token): + self.token = token + self.value = token.value + + +class UnaryOp(AST): + """ + 一元运算符节点,也是非叶子节点,代表一个一元运算符 + 比如 -2 这个表达式,- 是一元运算符节点,2 是叶子节点 + """ + def __init__(self, op, expr): + self.token = self.op = op + self.expr = expr + +class Assign(AST): + """ + 赋值运算符节点,也是非叶子节点,代表一个赋值运算符 + 比如 a = 2 这个表达式,a 是变量,2是值, 都是叶子节点,= 是赋值运算符节点 + """ + def __init__(self, left, op, right): + self.left = left + self.token = self.op = op + self.right = right + +class Var(AST): + """ + 变量节点,也是叶子节点,代表一个变量 + """ + def __init__(self, token): + self.token = token + self.value = token.value + +class NoOp(AST): + pass + +class Compound(AST): + def __init__(self): + self.children = [] \ No newline at end of file diff --git a/assignments.txt b/assignments.txt new file mode 100644 index 0000000..f5cba79 --- /dev/null +++ b/assignments.txt @@ -0,0 +1,5 @@ +a=1.34 +b=2 +c=a+b +d=a+b-c +e=45+f \ No newline at end of file diff --git a/ast.png b/ast.png new file mode 100644 index 0000000..d4ffb2d Binary files /dev/null and b/ast.png differ diff --git a/ast_v8.png b/ast_v8.png new file mode 100644 index 0000000..b784d91 Binary files /dev/null and b/ast_v8.png differ diff --git a/ast_v9.png b/ast_v9.png new file mode 100644 index 0000000..7089939 Binary files /dev/null and b/ast_v9.png differ diff --git a/func_test.py b/func_test.py new file mode 100644 index 0000000..6d0a32a --- /dev/null +++ b/func_test.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +@file: func_test.py +@author: amazing coder +@date: 2024/8/29 +@desc: +""" +from spi_token import Token +from abs_syntax_tree import Num, BinOp, UnaryOp +from interpreter import Analyzer, Parser, Interpreter, INTEGER, MINUS + +def test_unary_op(): + """ + 测试一元运算符, text=6---1 + """ + text = '5---2' + six_tok = Num(Token(INTEGER, 5)) + one_tok = Num(Token(INTEGER, 2)) + minus_tok = Token(MINUS, '-') + exp_node = BinOp(six_tok, minus_tok, UnaryOp(minus_tok, UnaryOp(minus_tok, one_tok))) + interpreter = Interpreter(None) + print(interpreter.visit(exp_node)) + +def test_analyzer(): + """ + 测试语法分析器 + """ + text = "a=45" + print(text) + analyzer = Analyzer(text) + token = analyzer.get_next_token() + while token.symbol_type != 'EOF': + print(token) + token = analyzer.get_next_token() + +def test_interpret_py_statements(): + """ + 测试解释器 + """ + text = """a=1 + b=2 + c=a+b + d=a+b+c + e=45 + """ + print(text) + print(repr(text)) + analyzer = Analyzer(text) + parser = Parser(analyzer) + interpreter = Interpreter(parser) + interpreter.interpret() + print(interpreter.GLOBAL_SCOPE) + + + +if __name__ == '__main__': + test_interpret_py_statements() \ No newline at end of file diff --git a/genastdot.py b/genastdot.py new file mode 100644 index 0000000..9fd8895 --- /dev/null +++ b/genastdot.py @@ -0,0 +1,119 @@ +############################################################################### +# AST visualizer - generates a DOT file for Graphviz. # +# # +# To generate an image from the DOT file run $ dot -Tpng -o ast.png ast.dot # +# # +############################################################################### +import argparse +import textwrap + +from interpreter import Analyzer, Parser, NodeVisitor + + +class ASTVisualizer(NodeVisitor): + def __init__(self, parser): + self.parser = parser + self.ncount = 1 + self.dot_header = [textwrap.dedent("""\ + digraph astgraph { + node [shape=circle, fontsize=12, fontname="Courier", height=.1]; + ranksep=.3; + edge [arrowsize=.5] + + """)] + self.dot_body = [] + self.dot_footer = ['}'] + + def visit_Num(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.token.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + def visit_BinOp(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.left) + self.visit(node.right) + + for child_node in (node.left, node.right): + s = ' node{} -> node{}\n'.format(node._num, child_node._num) + self.dot_body.append(s) + + def visit_UnaryOp(self, node): + s = ' node{} [label="unary {}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.expr) + s = ' node{} -> node{}\n'.format(node._num, node.expr._num) + self.dot_body.append(s) + + def visit_Compound(self, node): + s = ' node{} [label="Compound"]\n'.format(self.ncount) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + for child in node.children: + self.visit(child) + s = ' node{} -> node{}\n'.format(node._num, child._num) + self.dot_body.append(s) + + def visit_Assign(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.left) + self.visit(node.right) + + for child_node in (node.left, node.right): + s = ' node{} -> node{}\n'.format(node._num, child_node._num) + self.dot_body.append(s) + + def visit_Var(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + def visit_NoOp(self, node): + s = ' node{} [label="NoOp"]\n'.format(self.ncount) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + + def gendot(self): + tree = self.parser.parse() + self.visit(tree) + return ''.join(self.dot_header + self.dot_body + self.dot_footer) + + +def main(): + argparser = argparse.ArgumentParser( + description='Generate an AST DOT file.' + ) + argparser.add_argument( + 'fname', + help='Pascal source file' + ) + args = argparser.parse_args() + fname = args.fname + text = open(fname, 'r').read() + + lexer = Analyzer(text) + parser = Parser(lexer) + viz = ASTVisualizer(parser) + content = viz.gendot() + print(content) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/genptdot.py b/genptdot.py new file mode 100644 index 0000000..f4792c8 --- /dev/null +++ b/genptdot.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +@file: genptdot.py +@author: amazing coder +@date: 2024/8/28 +@desc: +""" + +############################################################################### +# # +# Parse Tree visualizer # +# # +# To generate an image from the DOT file run: # +# $ dot -Tpng -o parsetree.png parsetree.dot # +# # +############################################################################### +import argparse +import textwrap + +from interpreter import PLUS, MINUS, MUL, DIV, INTEGER, LPAREN, RPAREN, Analyzer + + +class Node(object): + def __init__(self, name): + self.name = name + self.children = [] + + def add(self, node): + self.children.append(node) + + +class RuleNode(Node): + pass + + +class TokenNode(Node): + pass + + +class Parser(object): + """Parses the input and builds a parse tree.""" + + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + # Parse tree root + self.root = None + self.current_node = None + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.symbol_type == token_type: + self.current_node.add(TokenNode(self.current_token.value)) + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def factor(self): + """factor : INTEGER | LPAREN expr RPAREN""" + node = RuleNode('factor') + self.current_node.add(node) + _save = self.current_node + self.current_node = node + + token = self.current_token + if token.symbol_type == INTEGER: + self.eat(INTEGER) + elif token.symbol_type == LPAREN: + self.eat(LPAREN) + self.expr() + self.eat(RPAREN) + + self.current_node = _save + + def term(self): + """term : factor ((MUL | DIV) factor)*""" + node = RuleNode('term') + self.current_node.add(node) + _save = self.current_node + self.current_node = node + + self.factor() + + while self.current_token.symbol_type in (MUL, DIV): + token = self.current_token + if token.symbol_type == MUL: + self.eat(MUL) + elif token.symbol_type == DIV: + self.eat(DIV) + + self.factor() + + self.current_node = _save + + def expr(self): + """ + expr : term ((PLUS | MINUS) term)* + term : factor ((MUL | DIV) factor)* + factor : INTEGER | LPAREN expr RPAREN + """ + node = RuleNode('expr') + if self.root is None: + self.root = node + else: + self.current_node.add(node) + + _save = self.current_node + self.current_node = node + + self.term() + + while self.current_token.symbol_type in (PLUS, MINUS): + token = self.current_token + if token.symbol_type == PLUS: + self.eat(PLUS) + elif token.symbol_type == MINUS: + self.eat(MINUS) + + self.term() + + self.current_node = _save + + def parse(self): + self.expr() + return self.root + + +class ParseTreeVisualizer(object): + def __init__(self, parser): + self.parser = parser + self.ncount = 1 + self.dot_header = [textwrap.dedent("""\ + digraph astgraph { + node [shape=none, fontsize=12, fontname="Courier", height=.1]; + ranksep=.3; + edge [arrowsize=.5] + + """)] + self.dot_body = [] + self.dot_footer = ['}'] + + def bfs(self, node): + ncount = 1 + queue = [] + queue.append(node) + s = ' node{} [label="{}"]\n'.format(ncount, node.name) + self.dot_body.append(s) + node._num = ncount + ncount += 1 + + while queue: + node = queue.pop(0) + for child_node in node.children: + s = ' node{} [label="{}"]\n'.format(ncount, child_node.name) + self.dot_body.append(s) + child_node._num = ncount + ncount += 1 + s = ' node{} -> node{}\n'.format(node._num, child_node._num) + self.dot_body.append(s) + queue.append(child_node) + + def gendot(self): + tree = self.parser.parse() + self.bfs(tree) + return ''.join(self.dot_header + self.dot_body + self.dot_footer) + + +def main(): + argparser = argparse.ArgumentParser( + description='Generate a Parse Tree DOT file.' + ) + argparser.add_argument( + 'text', + help='Arithmetic expression (in quotes): "1 + 2 * 3"' + ) + args = argparser.parse_args() + text = args.text + + lexer = Lexer(text) + parser = Parser(lexer) + + viz = ParseTreeVisualizer(parser) + content = viz.gendot() + print(content) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/interpreter.py b/interpreter.py index bf41d06..c120f49 100644 --- a/interpreter.py +++ b/interpreter.py @@ -8,37 +8,37 @@ @desc: simple demo for python interpreter v2.0 v1.0 : only support single-digit integers + v2.0 : support multi-digit integers +/-, support process whitespace -v3.0 : support parse (recognize) and interpret arithmetic expressions that have any number of plus or minus operators in it, for example “7 - 3 + 2 - 1”. +v3.0 : support to parse (recognize) and interpret arithmetic expressions that have any number of plus or minus operators in it, for example “7 - 3 + 2 - 1”. +v4.0 : support to parse and interpret arithmetic expressions with any number of multiplication and division operators in them, for example “7 * 4 / 2 * 3” +v5.0 : support to handle valid arithmetic expressions containing integers and any number of addition, subtraction, multiplication, and division operators. +v6.0 : support to evaluates arithmetic expressions that have different operators and parentheses. +v7.0 : using ASTs represent the operator-operand model of arithmetic expressions. +v8.0 : support unary operators (+, -) +v9.0 : support to handle python assignment statements. +v10.0 : handle variable not defined error """ -INTEGER, PLUS, EOF, MINUS = 'INTEGER', 'PLUS', 'EOF', 'MINUS' +import keyword +from abs_syntax_tree import BinOp, Num, UnaryOp, Var, NoOp, Compound, Assign +from spi_token import Token +from spi_symbol import VarSymbol, SymbolTable -class Token(object): - def __init__(self, type, value): - self.type = type - self.value = value - def __str__(self): - return 'Token({type}, {value})'.format( - type=self.type, - value=repr(self.value) - ) - - def __repr__(self): - return self.__str__() - -class Interpreter(object): +INTEGER, FLOAT, PLUS, EOF, MINUS, MUL, DIV, LPAREN, RPAREN, ID, ASSIGN, REPL = 'INTEGER', 'FLOAT', 'PLUS', 'EOF', 'MINUS', 'MUL', 'DIV', 'LPAREN', 'RPAREN', 'ID', 'ASSIGN', 'REPL' +PYTHON_RESERVED_KEYWORDS = {key: Token(key, key) for key in keyword.kwlist} +class Analyzer(object): + """Lexical analyzer 表达式的语法解析器,用于将表达式解析成token流""" def __init__(self, text): self.text = text self.pos = 0 - self.current_token = None self.current_char = self.text[self.pos] def error(self): - raise Exception('Error parsing input') + return SyntaxError("invalid syntax") def advance(self): + """Advance the 'pos' pointer and set the 'current_char' variable.""" self.pos += 1 if self.pos > len(self.text) - 1: self.current_char = None @@ -46,79 +46,367 @@ def advance(self): self.current_char = self.text[self.pos] def skip_whitespace(self): - while self.current_char is not None and self.current_char == ' ': + """Skip whitespace, tab, newline.""" + while self.current_char is not None and self.current_char.isspace(): self.advance() - def integer(self): + def peek(self): + peek_pos = self.pos + 1 + if peek_pos > len(self.text) - 1: + return None + else: + return self.text[peek_pos] + + def number(self): """return a multi-digit integer""" result = '' while self.current_char is not None and self.current_char.isdigit(): result += self.current_char self.advance() - return int(result) + if self.current_char == '.': + result += self.current_char + self.advance() + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return float(result) + else: + return int(result) + + def identifier(self): + """return a multi-digit identifier""" + result = '' + while self.current_char is not None and self.current_char.isalnum(): + result += self.current_char + self.advance() + + if result in PYTHON_RESERVED_KEYWORDS: + return self.error() + return Token(ID, result) def get_next_token(self): - """Lexical analyzer / scanner / tokenizer, this function breaking a sentence apart into tokens.""" + """this function breaking a sentence apart into tokens.""" while self.current_char is not None: if self.current_char.isspace(): self.skip_whitespace() continue if self.current_char.isdigit(): - return Token(INTEGER, self.integer()) + number = self.number() + return Token(INTEGER, number) if isinstance(number, int) else Token(FLOAT, number) if self.current_char == '+': self.advance() return Token(PLUS, '+') if self.current_char == '-': self.advance() return Token(MINUS, '-') + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + if self.current_char == '/': + self.advance() + return Token(DIV, '/') + if self.current_char == '(': + self.advance() + return Token(LPAREN, '(') + if self.current_char == ')': + self.advance() + return Token(RPAREN, ')') + if self.current_char.isalpha(): + return self.identifier() + if self.current_char == '=': + self.advance() + return Token(ASSIGN, '=') + if self.current_char == '\\' and self.peek() == 'n': + self.advance() + self.advance() + return Token(REPL, '\\n') + self.error() return Token(EOF, None) + +class Parser(object): + def __init__(self, analyzer): + self.analyzer = analyzer + self.current_token = self.analyzer.get_next_token() + + def error(self): + raise Exception('Invalid Syntax') + def eat(self, token_type): """compare the current token type with the passed token type and if they match then "eat" the current token and assign the next token to the self.current_token, otherwise raise an exception.""" if self.current_token.type == token_type: - self.current_token = self.get_next_token() + self.current_token = self.analyzer.get_next_token() else: self.error() def term(self): - if self.current_token.type != INTEGER: - self.error() - term = self.current_token - self.eat(INTEGER) - return term.value + """计算乘除表达块: factor((MUL|DIV) factor)* """ + node = self.factor() + while self.current_token.type in (MUL, DIV): + token = self.current_token + if self.current_token.type == MUL: + self.eat(MUL) + elif self.current_token.type == DIV: + self.eat(DIV) + node = BinOp(left=node, op=token, right=self.factor()) + return node + + def variable(self): + node = Var(self.current_token) + self.eat(ID) + return node + + def empty(self): + return NoOp() + + def assignment_statement(self): + """ + assignment_statement : variable ASSIGN expr + """ + left = self.variable() + token = self.current_token + self.eat(ASSIGN) + right = self.expr() + node = Assign(left=left, op=token, right=right) + return node + + def statement(self): + """statement : assignment_statement | empty""" + if self.current_token.type == ID: + node = self.assignment_statement() + else: + node = self.empty() + return node + + def statements(self): + """ + statements : statement + | statement REPL statement_list + """ + node = self.statement() + results = [node] + while self.current_token.type == ID: + results.append(self.statement()) + + return results + + def compound_statement(self): + """ + compound_statement : statement_list + """ + # self.eat(REPL) + nodes = self.statements() + # self.eat(REPL) + + root = Compound() + for node in nodes: + root.children.append(node) + return root + + def program(self): + """program : compound_statement """ + node = self.compound_statement() + return node def expr(self): - """Parser / Parser / Interpreter, this function takes a tokenized stream - and produces an abstract syntax tree, or more commonly a "value".""" - self.current_token = self.get_next_token() - result = self.term() - while True: - op = self.current_token - if op.type == PLUS: - result += self.term() - elif op.type == MINUS: - result -= self.term() - else: - break - return result + """表达式解析:term((PLUS|MINUS) term)* . + expr : term ((PLUS | MINUS) term)* + term : factor ((MUL | DIV) factor)* + factor : INTEGER | LPAREN expr RPAREN | (PLUS|MINUS) factor + """ + node = self.term() + while self.current_token.type in (PLUS, MINUS): + token = self.current_token + if self.current_token.type == PLUS: + self.eat(PLUS) + elif self.current_token.type == MINUS: + self.eat(MINUS) + node = BinOp(left=node, op=token, right=self.term()) + return node -def main(): - while True: - try: - text = input('input a express like "1+2"(Only single digit integers are allowed in the input)> ') - except EOFError: - break + def factor(self): + """返回参与运算的数,支持整型或者带括号的表达式 INTEGER | LPAREN expr RPAREN | (PLUS|MINUS) factor | variable""" + token = self.current_token + if self.current_token.type == PLUS: + self.eat(PLUS) + return UnaryOp(op=token, expr=self.factor()) + elif self.current_token.type == MINUS: + self.eat(MINUS) + return UnaryOp(op=token, expr=self.factor()) + elif self.current_token.type == INTEGER: + self.eat(INTEGER) + return Num(token) + elif self.current_token.type == FLOAT: + self.eat(FLOAT) + return Num(token) + elif self.current_token.type == LPAREN: + self.eat(LPAREN) + node = self.expr() + self.eat(RPAREN) + return node + elif self.current_token.type == ID: + node = self.variable() + return node + else: + self.error() + + def parse(self): + node = self.program() + if self.current_token.type != EOF: + self.error() + return node + + +class NodeVisitor(object): + def visit(self, node): + method_name = 'visit_' + type(node).__name__ + visitor = getattr(self, method_name, self.generic_visit) + return visitor(node) + + def generic_visit(self, node): + raise Exception('No visit_{} method'.format(type(node).__name__)) + + +class Interpreter(NodeVisitor): + def __init__(self, parser): + self.parser = parser + self.GLOBAL_SCOPE = {} + + def visit_BinOp(self, node): + if node.op.type == PLUS: + return self.visit(node.left) + self.visit(node.right) + elif node.op.type == MINUS: + return self.visit(node.left) - self.visit(node.right) + elif node.op.type == MUL: + return self.visit(node.left) * self.visit(node.right) + elif node.op.type == DIV: + return self.visit(node.left) / self.visit(node.right) + + def visit_Num(self, node): + return node.token.value + + def visit_UnaryOp(self, node): + if node.op.type == PLUS: + return self.visit(node.expr) + elif node.op.type == MINUS: + return -self.visit(node.expr) + + def visit_Assign(self, node): + var_name = node.left.value + self.GLOBAL_SCOPE[var_name] = self.visit(node.right) + + def visit_NoOp(self, node): + pass + + def visit_Compound(self, node): + for child in node.children: + self.visit(child) + + def visit_Var(self, node): + var_name = node.value + val = self.GLOBAL_SCOPE.get(var_name) + if val is None: + raise NameError(repr(var_name)) + else: + return val - if not text: - continue + def visit(self, node): + if isinstance(node, BinOp): + return self.visit_BinOp(node) + elif isinstance(node, Num): + return self.visit_Num(node) + elif isinstance(node, UnaryOp): + return self.visit_UnaryOp(node) + elif isinstance(node, Var): + return self.visit_Var(node) + elif isinstance(node, Assign): + return self.visit_Assign(node) + elif isinstance(node, Compound): + return self.visit_Compound(node) + elif isinstance(node, NoOp): + return self.visit_NoOp(node) + + def interpret(self): + tree = self.parser.parse() + symbol_builder = SymbolTableBuilder() + symbol_builder.visit(tree) + return self.visit(tree) + + +class SymbolTableBuilder(NodeVisitor): + def __init__(self): + self.symtab = SymbolTable() + + def visit_BinOp(self, node): + self.visit(node.left) + self.visit(node.right) + + def visit_Num(self, node): + pass + + def visit_UnaryOp(self, node): + self.visit(node.expr) + + def visit_Compound(self, node): + for child in node.children: + self.visit(child) + + def visit_NoOp(self, node): + pass + + def visit_VarDecl(self, node): + type_name = node.type_node.value + type_symbol = self.symtab.lookup(type_name) + var_name = node.var_node.value + var_symbol = VarSymbol(var_name, type_symbol) + self.symtab.define(var_symbol) + + def visit_Assign(self, node): + # python代码中赋值就是定义,没有声明 + var_name = node.left.value + var_symbol = VarSymbol(var_name, None) + self.symtab.define(var_symbol) + self.visit(node.right) + + def visit_Var(self, node): + var_name = node.value + var_symbol = self.symtab.lookup(var_name) + if var_symbol is None: + raise NameError(repr(var_name)) + + def visit(self, node): + if isinstance(node, BinOp): + return self.visit_BinOp(node) + elif isinstance(node, Num): + return self.visit_Num(node) + elif isinstance(node, UnaryOp): + return self.visit_UnaryOp(node) + elif isinstance(node, Var): + return self.visit_Var(node) + elif isinstance(node, Assign): + return self.visit_Assign(node) + elif isinstance(node, Compound): + return self.visit_Compound(node) + elif isinstance(node, NoOp): + return self.visit_NoOp(node) + + + +def main(): + import sys + py_file = sys.argv[1] + # py_file = 'assignments.txt' + text = open(py_file, 'r').read() + print(f"begin parse input: {text}") + lexer = Analyzer(text) + parser = Parser(lexer) + interpreter = Interpreter(parser) + result = interpreter.interpret() + print(interpreter.GLOBAL_SCOPE) - interpreter = Interpreter(text) - result = interpreter.expr() - print(result) if __name__ == '__main__': main() - diff --git a/spi_symbol.py b/spi_symbol.py new file mode 100644 index 0000000..791cd08 --- /dev/null +++ b/spi_symbol.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +@file: spi_symbol.py +@author: amazing coder +@date: 2024/8/31 +@desc: 增加通用符号类 +""" + + +class Symbol(object): + def __int__(self, name, type=None): + self.name = name + self.symbol_type = type + + +class BuiltinTypeSymbol(Symbol): + def __init__(self, name): + super().__int__(name) + + def __str__(self): + return self.name + + __repr__ = __str__ + + +class VarSymbol(Symbol): + def __init__(self, name, type=None): + # python 定义时可以不指定类型 + super().__int__(name, type) + + def __str__(self): + return f'VarSymbol:name={self.name}: type={self.symbol_type}' + + __repr__ = __str__ + + +class SymbolTable(object): + def __init__(self): + self._symbols = {} + + def __str__(self): + return 'Symbols: {symbols}'.format(symbols=[value for value in self._symbols.values()]) + + __repr__ = __str__ + + def define(self, symbol): + print('Define: %s' % symbol) + self._symbols[symbol.name] = symbol + return symbol + + def lookup(self, name): + print('Lookup: %s' % name) + symbol = self._symbols.get(name) + return symbol + + +def test_class(): + int_type = BuiltinTypeSymbol('INTEGER') + float_type = BuiltinTypeSymbol('FLOAT') + var_x = VarSymbol('x', int_type) + var_y = VarSymbol('y', float_type) + print(var_x) + print(var_y) + + +if __name__ == '__main__': + test_class() diff --git a/spi_token.py b/spi_token.py new file mode 100644 index 0000000..87bada4 --- /dev/null +++ b/spi_token.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +@file: spi_token.py +@author: amazing coder +@date: 2024/8/29 +@desc: +""" + + +class Token(object): + def __init__(self, type, value): + self.type = type + self.value = value + + def __str__(self): + return 'Token({type}, {value})'.format( + type=self.type, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__()