import re from dataclasses import dataclass from typing import Any @dataclass class Number: value: float @dataclass class Ident: name: str @dataclass class Call: func: str args: list kwargs: dict @dataclass class BinOp: op: str left: Any right: Any @dataclass class UnaryOp: op: str operand: Any @dataclass class NodeDecl: name: str expr: Any @dataclass class OutDecl: expr: Any @dataclass class ListExpr: items: list @dataclass class VoiceDecl: name: str body: list # list of NodeDecl + OutDecl TOKEN_SPEC = [ ('NUMBER', r'\d+\.\d+|\d+'), ('ARROW', r'<-'), ('IDENT', r'[a-zA-Z_][a-zA-Z0-9_]*'), ('OP', r'[+\-*/=,()\[\]\{\}]'), ('NEWLINE', r'\r?\n'), ('SKIP', r'[ \t]+'), ('COMMENT', r'\#[^\n]*'), ('MISMATCH', r'.'), ] TOKEN_RE = re.compile('|'.join(f'(?P<{n}>{p})' for n, p in TOKEN_SPEC)) KEYWORDS = {'node', 'out', 'voice'} def tokenize(src): tokens = [] for m in TOKEN_RE.finditer(src): kind = m.lastgroup val = m.group() if kind in ('SKIP', 'COMMENT'): continue if kind == 'NEWLINE': tokens.append(('NEWLINE', '\n')) continue if kind == 'MISMATCH': raise SyntaxError(f'Unexpected character: {val!r}') if kind == 'IDENT' and val in KEYWORDS: tokens.append(('KW', val)) continue if kind == 'NUMBER': tokens.append(('NUMBER', float(val))) continue tokens.append((kind, val)) tokens.append(('EOF', None)) return tokens class Parser: def __init__(self, tokens): self.tokens = tokens self.i = 0 def peek(self, offset=0): return self.tokens[self.i + offset] def advance(self): tok = self.tokens[self.i] self.i += 1 return tok def expect(self, kind, val=None): tok = self.advance() if tok[0] != kind or (val is not None and tok[1] != val): raise SyntaxError(f'Expected {kind} {val!r}, got {tok!r}') return tok def skip_newlines(self): while self.peek()[0] == 'NEWLINE': self.advance() def parse(self): out = [] self.skip_newlines() while self.peek()[0] != 'EOF': out.append(self.parse_statement()) self.skip_newlines() return out def parse_statement(self): tok = self.peek() if tok == ('KW', 'node'): self.advance() name = self.expect('IDENT')[1] self.expect('OP', '=') return NodeDecl(name, self.parse_expr()) if tok == ('KW', 'out'): self.advance() self.expect('ARROW') return OutDecl(self.parse_expr()) if tok == ('KW', 'voice'): self.advance() name = self.expect('IDENT')[1] self.expect('OP', '{') body = [] self.skip_newlines() while self.peek() != ('OP', '}'): inner = self.parse_statement() if isinstance(inner, VoiceDecl): raise SyntaxError('voice blocks cannot be nested') body.append(inner) self.skip_newlines() self.expect('OP', '}') return VoiceDecl(name, body) raise SyntaxError(f'Unexpected token at start of statement: {tok!r}') def parse_call_expr(self): name = self.expect('IDENT')[1] self.expect('OP', '(') self.skip_newlines() args, kwargs = [], {} while self.peek() != ('OP', ')'): first = self.peek() # kwarg pattern: NAME = expr. NAME is normally IDENT, but we also let the # 'voice' keyword be used as a kwarg key (poly(voice=...)). is_kwarg_key = ( (first[0] == 'IDENT' or first == ('KW', 'voice')) and self.peek(1) == ('OP', '=') ) if is_kwarg_key: key = self.advance()[1] self.advance() # '=' kwargs[key] = self.parse_expr() else: args.append(self.parse_expr()) if self.peek() == ('OP', ','): self.advance() self.skip_newlines() else: break self.skip_newlines() self.expect('OP', ')') return Call(name, args, kwargs) def parse_expr(self): return self.parse_addsub() def parse_addsub(self): left = self.parse_muldiv() while self.peek() in (('OP', '+'), ('OP', '-')): op = self.advance()[1] left = BinOp(op, left, self.parse_muldiv()) return left def parse_muldiv(self): left = self.parse_unary() while self.peek() in (('OP', '*'), ('OP', '/')): op = self.advance()[1] left = BinOp(op, left, self.parse_unary()) return left def parse_unary(self): if self.peek() == ('OP', '-'): self.advance() return UnaryOp('-', self.parse_atom()) return self.parse_atom() def parse_atom(self): tok = self.peek() if tok[0] == 'NUMBER': self.advance() return Number(tok[1]) if tok[0] == 'IDENT': if self.peek(1) == ('OP', '('): return self.parse_call_expr() self.advance() return Ident(tok[1]) if tok == ('OP', '('): self.advance() self.skip_newlines() e = self.parse_expr() self.skip_newlines() self.expect('OP', ')') return e if tok == ('OP', '['): self.advance() self.skip_newlines() items = [] while self.peek() != ('OP', ']'): items.append(self.parse_expr()) if self.peek() == ('OP', ','): self.advance() self.skip_newlines() else: break self.skip_newlines() self.expect('OP', ']') return ListExpr(items) raise SyntaxError(f'Unexpected token in expression: {tok!r}') def parse(src): return Parser(tokenize(src)).parse()