diff --git a/src/parser/ast.mbt b/src/parser/ast.mbt index 3777bc7..3c69d86 100644 --- a/src/parser/ast.mbt +++ b/src/parser/ast.mbt @@ -1,10 +1,6 @@ ///| suberror ParseError String -// region Context - -// region Type - ///| pub(all) enum Type { Unit @@ -18,23 +14,14 @@ pub(all) enum Type { Generic(String, Type) } derive(Show) -// region Components - ///| -pub(all) enum Literal { +enum Literal { Unit Bool(Bool) Int(Int) Double(Double) } derive(Show) -///| -enum LeftValue { - Identifier(String) - FieldAccess(LeftValue, String) - IndexAccess(LeftValue, Expr) -} derive(Show) - ///| enum AddSubOp { Add @@ -48,6 +35,18 @@ enum MulDivRemOp { Rem } derive(Show) +///| +struct Block(Array[Stmt], Expr?) derive(Show) + +///| +enum Pattern { + Wildcard + Identifier(String) + Literal(Literal) + Tuple(Array[Pattern]) + Enum(String?, String, Array[Pattern]) +} derive(Show) + ///| enum Expr { Or(Expr, Expr) @@ -55,28 +54,78 @@ enum Expr { Compare(CompareOperator, Expr, Expr) AddSub(AddSubOp, Expr, Expr) MulDivRem(MulDivRemOp, Expr, Expr) + Neg(Expr) + Not(Expr) If(Expr, Expr, Expr?) Match(Expr, Array[(Pattern, Expr)]) + IndexAccess(Expr, Expr) + FieldAccess(Expr, String) + FunctionCall(Expr, Array[Expr]) + ArrayMake(Expr, Expr) + StructConstruct(String, Array[(String, Expr)]) + EnumConstruct(String?, String, Array[Expr]) + Literal(Literal) + Tuple(Array[Expr]) + Array(Array[Expr]) + Identifier(String) + Block(Block) } derive(Show) ///| -enum TopLevel { - TopLetDecl(id~ : String, type_~ : Type?, expr~ : Expr) - TopFn(Function) - Struct( - id~ : String, - user_defined_type~ : Type?, - fields~ : Array[(String, Type)] - ) - Enum( - id~ : String, - user_defined_type~ : Type?, - variants~ : Array[(String, Array[Type])] - ) -} +struct Function { + id : String + user_defined_type : Type? + params : Array[(String, Type?)] + return_type : Type? + body : Block +} derive(Show) ///| -struct Program(Array[TopLevel]) +enum Binding { + Identifier(String) + Wildcard +} derive(Show) + +///| +enum Stmt { + Let(Binding, Type?, Expr) + LetTuple(Array[Binding], Type?, Expr) + LetMut(String, Type?, Expr) + Assign(Expr, Expr) + While(Expr, Array[Stmt]) + Expr(Expr) + Return(Expr?) + LocalFunction(Function) +} derive(Show) + +///| +struct TopLet { + id : String + type_ : Type? + expr : Expr +} derive(Show) + +///| +struct StructDef { + id : String + user_defined_type : Type? + fields : Array[(String, Type)] +} derive(Show) + +///| +struct EnumDef { + id : String + user_defined_type : Type? + variants : Array[(String, Array[Type])] +} derive(Show) + +///| +struct Program { + top_lets : Map[String, TopLet] + top_functions : Map[String, Function] + struct_defs : Map[String, StructDef] + enum_defs : Map[String, EnumDef] +} derive(Show) ///| fn parse_type( @@ -95,6 +144,7 @@ fn parse_type( (Array(elem_type), rest) } [LParen, .. rest] => { + // XXX: function_type has at least one type in the argument list? let (first_type, rest) = parse_type(rest) let types = [first_type] loop rest { @@ -127,11 +177,11 @@ fn parse_type( } ///| -fn parse_struct( +fn parse_struct_decl( tokens : ArrayView[Token], -) -> (TopLevel, ArrayView[Token]) raise ParseError { +) -> (StructDef, ArrayView[Token]) raise ParseError { guard tokens is [Struct, UpperIdentifier(id), .. rest] else { - raise ParseError("Expected 'struct' followed by struct name") + raise ParseError("Expected upper case struct name after 'struct'") } let (user_defined_type, rest) = if rest is [LBracket, UpperIdentifier(type_), RBracket, .. r] { @@ -144,7 +194,7 @@ fn parse_struct( } let fields = [] loop rest { - [RCurlyBracket, .. r] => (Struct(id~, user_defined_type~, fields~), r) + [RCurlyBracket, .. r] => ({ id, user_defined_type, fields }, r) [UpperIdentifier(field_name) | LowerIdentifier(field_name), Colon, .. r] => { let (field_type, r) = parse_type(r) fields.push((field_name, field_type)) @@ -159,11 +209,11 @@ fn parse_struct( } ///| -fn parse_enum( +fn parse_enum_decl( tokens : ArrayView[Token], -) -> (TopLevel, ArrayView[Token]) raise ParseError { +) -> (EnumDef, ArrayView[Token]) raise ParseError { guard tokens is [Enum, UpperIdentifier(id), .. rest] else { - raise ParseError("Expected 'enum' followed by enum name") + raise ParseError("Expected upper case enum name after 'enum'") } let (user_defined_type, rest) = if rest is [LBracket, UpperIdentifier(type_), RBracket, .. r] { @@ -176,7 +226,7 @@ fn parse_enum( } let variants = [] loop rest { - [RCurlyBracket, .. r] => (Enum(id~, user_defined_type~, variants~), r) + [RCurlyBracket, .. r] => ({ id, user_defined_type, variants }, r) [UpperIdentifier(variant_name), .. r] => { let variant_types = [] if r is [LParen, .. r] { @@ -186,10 +236,6 @@ fn parse_enum( let (variant_type, r) = parse_type(r) variant_types.push(variant_type) match r { - [Comma, RParen, ..] => - raise ParseError( - "Trailing comma in enum variant type list is not allowed", - ) [Comma, .. r] => continue r [RParen, ..] => break _ => @@ -211,39 +257,324 @@ fn parse_enum( } } +///| +fn parse_if_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + guard tokens is [If, .. rest] else { + raise ParseError("Expected 'if' at start of if expression") + } + let (cond, rest) = parse_expr(rest) + let (then_branch, rest) = parse_block_expr(rest) + let (else_branch, rest) = match rest { + [Else, If, ..] => { + let (if_expr, r) = parse_if_expr(rest[1:]) + (Some(if_expr), r) + } + [Else, LCurlyBracket, ..] => { + let (block_expr, r) = parse_block_expr(rest[1:]) + (Some(block_expr), r) + } + _ => (None, rest) + } + (If(cond, then_branch, else_branch), rest) +} + +///| +fn parse_value_level_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + match tokens { + + // array_make_expr + [Array, DoubleColon, LowerIdentifier("make"), LParen, .. rest] => { + let (size_expr, rest) = parse_expr(rest) + guard rest is [Comma, .. rest] else { + raise ParseError("Expected ',' after size expression in array make") + } + let (init_expr, rest) = parse_expr(rest) + guard rest is [RParen, .. rest] else { + raise ParseError("Expected ')' after init expression in array make") + } + (ArrayMake(size_expr, init_expr), rest) + } + + // struct_construct_expr + [UpperIdentifier(struct_type), DoubleColon, LCurlyBracket, .. rest] => { + let fields = [] + loop rest { + [RCurlyBracket, .. r] => (StructConstruct(struct_type, fields), r) + [LowerIdentifier(field_name) | UpperIdentifier(field_name), Colon, .. r] => { + let (field_expr, r) = parse_expr(r) + fields.push((field_name, field_expr)) + match r { + [Comma, .. r] => continue r + [RCurlyBracket, ..] => continue r + _ => raise ParseError("Expected ',' or '}' after struct field") + } + } + _ => raise ParseError("Unexpected token in struct construction") + } + } + + // TODO: enum_construct_expr + + // unit_expr, bool_expr, int_expr, floating_point_expr + [LParen, RParen, .. rest] => (Literal(Unit), rest) + [BoolLiteral(value), .. rest] => (Literal(Bool(value)), rest) + [IntLiteral(value), .. rest] => (Literal(Int(value)), rest) + [DoubleLiteral(value), .. rest] => (Literal(Double(value)), rest) + + // neg_expr, not_expr + [Sub, .. rest] => { + let (expr, rest) = parse_value_level_expr(rest) + (Neg(expr), rest) + } + [Not, .. rest] => { + let (expr, rest) = parse_expr(rest) + (Not(expr), rest) + } + + // group_expr, tuple_expr + [LParen, .. r] => { + let (first_expr, r) = parse_expr(r) + let exprs = [first_expr] + loop r { + [Comma, .. r] => { + let (next_expr, r) = parse_expr(r) + exprs.push(next_expr) + continue r + } + [RParen, .. r] => + if exprs.length() == 1 { + (exprs[0], r) + } else { + (Tuple(exprs), r) + } + _ => + raise ParseError("Expected ',' or ')' in tuple or grouped expression") + } + } + + // array_expr + [LBracket, .. r] => { + let elements = [] + loop r { + [RBracket, .. r] => (Array(elements), r) + r => { + let (element, r) = parse_expr(r) + elements.push(element) + match r { + [Comma, .. r] => continue r + [RBracket, ..] => (Array(elements), r) + _ => + raise ParseError( + "Expected ',' or ']' in array literal expression", + ) + } + } + } + } + + // block_expr + [LCurlyBracket, ..] => parse_block_expr(tokens) + + // identifier_expr + [LowerIdentifier(name) | UpperIdentifier(name), .. rest] => + (Identifier(name), rest) + } +} + +///| +fn parse_get_or_apply_level_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + let (value, rest) = parse_value_level_expr(tokens) + let mut result = value + loop rest { + [LBracket, .. r] => { + let (index, r) = parse_expr(r) + guard r is [RBracket, .. r] else { + raise ParseError("Expected ']' after index expression") + } + result = IndexAccess(result, index) + continue r + } + [LParen, .. r] => ... + [Dot, UpperIdentifier(field_name) | LowerIdentifier(field_name), .. r] => { + result = FieldAccess(result, field_name) + continue r + } + r => break (result, r) + } +} + +///| +fn parse_if_level_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + match tokens { + [If, ..] => parse_if_expr(tokens) + [Match, ..] => ... + _ => parse_get_or_apply_level_expr(tokens) + } +} + +///| +fn parse_mul_div_level_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + let (first, rest) = parse_if_level_expr(tokens) + let mut result = first + loop rest { + [Mul, .. r] => { + let (next, r) = parse_if_level_expr(r) + result = MulDivRem(Mul, result, next) + continue r + } + [Div, .. r] => { + let (next, r) = parse_if_level_expr(r) + result = MulDivRem(Div, result, next) + continue r + } + [Rem, .. r] => { + let (next, r) = parse_if_level_expr(r) + result = MulDivRem(Rem, result, next) + continue r + } + r => (result, r) + } +} + +///| +fn parse_add_sub_level_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + let (first, rest) = parse_mul_div_level_expr(tokens) + let mut result = first + loop rest { + [Add, .. r] => { + let (next, r) = parse_mul_div_level_expr(r) + result = AddSub(Add, result, next) + continue r + } + [Sub, .. r] => { + let (next, r) = parse_mul_div_level_expr(r) + result = AddSub(Sub, result, next) + continue r + } + r => break (result, r) + } +} + +///| +fn parse_compare_level_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + let (first, rest) = parse_add_sub_level_expr(tokens) + match rest { + [CompareOperator(op), .. r] => { + let (next, r) = parse_add_sub_level_expr(r) + (Compare(op, first, next), r) + } + r => (first, r) + } +} + +///| +fn parse_and_level_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + let (first, rest) = parse_compare_level_expr(tokens) + let mut result = first + loop rest { + [And, .. r] => { + let (next, r) = parse_compare_level_expr(r) + result = And(result, next) + continue r + } + r => break (result, r) + } +} + +///| +fn parse_or_level_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + let (first, rest) = parse_and_level_expr(tokens) + let mut result = first + loop rest { + [Or, .. r] => { + let (next, r) = parse_and_level_expr(r) + result = Or(result, next) + continue r + } + r => break (result, r) + } +} + +///| +fn parse_expr( + tokens : ArrayView[Token], +) -> (Expr, ArrayView[Token]) raise ParseError { + parse_or_level_expr(tokens) +} + ///| pub fn parse_program(tokens : Array[Token]) -> Program raise ParseError { - let program = [] + let top_lets = Map::new() + let top_functions = Map::new() + let struct_defs = Map::new() + let enum_defs = Map::new() loop tokens[:] { - [EOF] => program - [Let, LowerIdentifier(id) | UpperIdentifier(id), Colon, .. rest] => { - let (type_, rest) = parse_type(rest) + [EOF] => { top_lets, top_functions, struct_defs, enum_defs } + [Let, LowerIdentifier(id) | UpperIdentifier(id), .. rest] => { + let (type_, rest) = match rest { + [Colon, .. r] => { + let (t, r) = parse_type(r) + (Some(t), r) + } + [Assign, ..] => (None, rest) + _ => + raise ParseError( + "Expected ':' or '=' after identifier in let declaration", + ) + } guard rest is [Assign, .. rest] else { raise ParseError( "Expected '=' after type annotation in let declaration", ) } let (expr, rest) = parse_expr(rest) - program.push(TopLetDecl(id~, type_=Some(type_), expr~)) + top_lets[id] = { id, type_, expr } + guard rest is [Semicolon, .. rest] else { + raise ParseError("Expected ';' after top let declaration") + } continue rest } - [Let, LowerIdentifier(id) | UpperIdentifier(id), Assign, .. rest] => { - let (expr, rest) = parse_expr(rest) - program.push(TopLetDecl(id~, type_=None, expr~)) + [Fn, LowerIdentifier("main"), ..] => { + let (body, rest) = parse_block_expr(tokens) + top_functions["main"] = { + id: "main", + user_defined_type: None, + params: [], + return_type: Some(Unit), + body, + } continue rest } - [Fn, LowerIdentifier("main"), ..] => ... [Fn, ..] => ... [Struct, ..] as tokens => { - let (struct_, rest) = parse_struct(tokens) - program.push(struct_) + let (struct_, rest) = parse_struct_decl(tokens) + struct_defs[struct_.id] = struct_ continue rest } [Enum, ..] as tokens => { - let (enum_, rest) = parse_enum(tokens) - program.push(enum_) + let (enum_, rest) = parse_enum_decl(tokens) + enum_defs[enum_.id] = enum_ continue rest } - _ => raise ParseError("Unexpected token at top level") + [] => raise ParseError("Unexpected end of token stream") + [t, ..] => raise ParseError("Unexpected token: \{t} at top level") } } diff --git a/src/parser/tokenize.mbt b/src/parser/tokenize.mbt index fa81843..daa19fd 100644 --- a/src/parser/tokenize.mbt +++ b/src/parser/tokenize.mbt @@ -1,3 +1,6 @@ +///| +suberror TokenizeError String + ///| pub(all) enum CompareOperator { Equal @@ -11,7 +14,6 @@ pub(all) enum CompareOperator { ///| pub(all) enum Token { EOF - BoolLiteral(Bool) Unit Bool Int @@ -20,11 +22,17 @@ pub(all) enum Token { Not If Else + Match + While Fn + Return Let + Mut Struct Enum - Number(Int) + BoolLiteral(Bool) + IntLiteral(Int) + DoubleLiteral(Double) UpperIdentifier(String) LowerIdentifier(String) Wildcard @@ -36,6 +44,7 @@ pub(all) enum Token { Sub Mul Div + Rem Assign LParen RParen @@ -44,13 +53,15 @@ pub(all) enum Token { LCurlyBracket RCurlyBracket Arrow + MatchArrow + DoubleColon Colon Semicolon Comma } derive(Show) ///| -pub fn tokenize(input : String) -> Array[Token] { +pub fn tokenize(input : String) -> Array[Token] raise TokenizeError { let tokens = [] loop input[:] { [' ' | '\n' | '\r' | '\t', .. rest] => continue rest @@ -64,17 +75,25 @@ pub fn tokenize(input : String) -> Array[Token] { } ['0'..='9', ..] as pattern => { let number_str = StringBuilder::new() + let mut float_point = false let rest = loop pattern { ['0'..='9' as c, .. r] => { number_str.write_char(c) continue r } - r => { - let number = try! @strconv.parse_int(number_str.to_string()) - tokens.push(Number(number)) - r + ['.', .. r] if !float_point => { + number_str.write_char('.') + float_point = true + continue r } + r => break r } + let number = if float_point { + DoubleLiteral(try! @strconv.parse_double(number_str.to_string())) + } else { + IntLiteral(try! @strconv.parse_int(number_str.to_string())) + } + tokens.push(number) continue rest } ['A'..='Z', ..] as pattern => { @@ -84,19 +103,17 @@ pub fn tokenize(input : String) -> Array[Token] { ident_str.write_char(c) continue r } - r => { - let ident : Token = match ident_str.to_string() { - "Unit" => Unit - "Bool" => Bool - "Int" => Int - "Double" => Double - "Array" => Array - s => UpperIdentifier(s) - } - tokens.push(ident) - r - } + r => break r } + let ident : Token = match ident_str.to_string() { + "Unit" => Unit + "Bool" => Bool + "Int" => Int + "Double" => Double + "Array" => Array + s => UpperIdentifier(s) + } + tokens.push(ident) continue rest } ['a'..='z', ..] | ['_', ..] as pattern => { @@ -106,30 +123,35 @@ pub fn tokenize(input : String) -> Array[Token] { ident_str.write_char(c) continue r } - r => { - let ident = match ident_str.to_string() { - "_" => Wildcard - "true" => BoolLiteral(true) - "false" => BoolLiteral(false) - "not" => Not - "if" => If - "else" => Else - "fn" => Fn - "let" => Let - "struct" => Struct - "enum" => Enum - s => LowerIdentifier(s) - } - tokens.push(ident) - r - } + r => break r } + let ident : Token = match ident_str.to_string() { + "_" => Wildcard + "true" => BoolLiteral(true) + "false" => BoolLiteral(false) + "if" => If + "else" => Else + "match" => Match + "while" => While + "fn" => Fn + "return" => Return + "let" => Let + "mut" => Mut + "struct" => Struct + "enum" => Enum + s => LowerIdentifier(s) + } + tokens.push(ident) continue rest } [.. "->", .. rest] => { tokens.push(Arrow) continue rest } + [.. "=>", .. rest] => { + tokens.push(MatchArrow) + continue rest + } [.. "==", .. rest] => { tokens.push(CompareOperator(Equal)) continue rest @@ -182,6 +204,14 @@ pub fn tokenize(input : String) -> Array[Token] { tokens.push(Div) continue rest } + ['%', .. rest] => { + tokens.push(Rem) + continue rest + } + ['!', .. rest] => { + tokens.push(Not) + continue rest + } ['=', .. rest] => { tokens.push(Assign) continue rest @@ -210,6 +240,10 @@ pub fn tokenize(input : String) -> Array[Token] { tokens.push(RCurlyBracket) continue rest } + [.. "::", .. rest] => { + tokens.push(DoubleColon) + continue rest + } [':', .. rest] => { tokens.push(Colon) continue rest @@ -223,7 +257,7 @@ pub fn tokenize(input : String) -> Array[Token] { continue rest } [] => tokens.push(EOF) - [c, ..] => abort("Unexpected character: " + c.to_string()) + [c, ..] => raise TokenizeError("Unexpected character: \{c}") } tokens } @@ -238,7 +272,7 @@ test "tokenize" { inspect( tokenize(input), content=( - #|[Let, LowerIdentifier("x"), Assign, Number(42), If, LowerIdentifier("x"), CompareOperator(Greater), Number(0), LCurlyBracket, LowerIdentifier("x"), Assign, LowerIdentifier("x"), Sub, Number(1), RCurlyBracket, Else, LCurlyBracket, LowerIdentifier("x"), Assign, Number(0), RCurlyBracket, EOF] + #|[Let, LowerIdentifier("x"), Assign, IntLiteral(42), If, LowerIdentifier("x"), CompareOperator(Greater), IntLiteral(0), LCurlyBracket, LowerIdentifier("x"), Assign, LowerIdentifier("x"), Sub, IntLiteral(1), RCurlyBracket, Else, LCurlyBracket, LowerIdentifier("x"), Assign, IntLiteral(0), RCurlyBracket, EOF] ), ) }