test: parser

This commit is contained in:
2025-11-02 23:42:35 +08:00
parent c7e39c8c1f
commit 26357753f8
4 changed files with 303 additions and 12 deletions

View File

@@ -2,10 +2,12 @@
Lil-Ran's MiniMoonBit to RISC-V compiler for [MGPIC-2025](https://www.moonbitlang.cn/2025-mgpic-compiler).
It is optimized for runtime speed and code size, but not for compilation speed, according to the contest rules.
It is not optimized yet.
## Reference
https://github.com/moonbitlang/contest-2025-data
https://github.com/moonbitlang/BuildYourOwnMBT
https://github.com/moonbitlang/minimoonbit-public

View File

@@ -1,5 +1,5 @@
///|
suberror ParseError String
pub(all) suberror ParseError String derive(Show)
///|
pub(all) enum Type {
@@ -228,15 +228,15 @@ fn parse_enum_decl(
[RCurlyBracket, .. r] => ({ id, user_defined_type, variants }, r)
[UpperIdentifier(variant_name), .. r] => {
let variant_types = []
if r is [LParen, .. r] {
let r = if r is [LParen, .. r] {
loop r {
[RParen, ..] => break
[RParen, .. r] => r
r => {
let (variant_type, r) = parse_type(r)
variant_types.push(variant_type)
match r {
[Comma, .. r] => continue r
[RParen, ..] => break
[RParen, ..] => continue r
_ =>
raise ParseError(
"Expected ',' or ')' in enum variant type list",
@@ -244,6 +244,8 @@ fn parse_enum_decl(
}
}
}
} else {
r
}
variants.push((variant_name, variant_types))
match r {
@@ -288,7 +290,7 @@ fn parse_let_stmt_type_expr(
///|
/// Returns `(Stmt, is_end_expr, rest_tokens)`.
/// Semicolon is consumed for statements.
/// End expr is wrapped in `Stmt::Return`.
/// End expr is wrapped in `Stmt::Expr`.
fn parse_stmt_or_expr_end(
tokens : ArrayView[Token],
) -> (Stmt, Bool, ArrayView[Token]) raise ParseError {
@@ -387,7 +389,7 @@ fn parse_stmt_or_expr_end(
let (stmt, is_end_expr, rest) = parse_stmt_or_expr_end(r)
if is_end_expr {
raise ParseError(
"Unexpected return expression in while statement body",
"Unexpected end expression in while statement body",
)
}
stmts.push(stmt)
@@ -417,7 +419,7 @@ fn parse_stmt_or_expr_end(
(Assign(expr, rhs_expr), false, r)
}
[Semicolon, .. r] => (Expr(expr), false, r)
[RCurlyBracket, ..] => (Return(expr), true, rest)
[RCurlyBracket, ..] => (Expr(expr), true, rest)
_ => raise ParseError("Expected ';' or '}' after expression statement")
}
}
@@ -442,7 +444,7 @@ fn parse_block_expr(
}
break r
} else if r is [RCurlyBracket, .. r] {
stmts.push(Return(Literal(Unit)))
stmts.push(Expr(Literal(Unit)))
break r
}
continue r
@@ -753,8 +755,8 @@ pub fn parse_program(tokens : Array[Token]) -> Program raise ParseError {
}
continue rest
}
[Fn, LowerIdentifier("main"), ..] => {
guard parse_block_expr(tokens) is (Block(body), rest)
[Fn, LowerIdentifier("main"), .. rest] => {
guard parse_block_expr(rest) is (Block(body), rest)
top_functions["main"] = {
id: "main",
user_defined_type: None,

View File

@@ -54,3 +54,290 @@ test "parse_type" {
),
)
}
///|
test "parse_struct_decl" {
inspect(
parse_struct_decl([
Struct,
UpperIdentifier("Point"),
LCurlyBracket,
LowerIdentifier("x"),
Colon,
Int,
Semicolon,
LowerIdentifier("y"),
Colon,
Int,
Semicolon,
RCurlyBracket,
EOF,
]),
content=(
#|({id: "Point", user_defined_type: None, fields: [("x", Int), ("y", Int)]}, [EOF])
),
)
}
///|
test "parse_enum_decl" {
inspect(
parse_enum_decl([
Enum,
UpperIdentifier("Color"),
LCurlyBracket,
UpperIdentifier("Red"),
Semicolon,
UpperIdentifier("RGB"),
LParen,
Int,
Comma,
Int,
Comma,
Int,
RParen,
Semicolon,
RCurlyBracket,
EOF,
]),
content=(
#|({id: "Color", user_defined_type: None, variants: [("Red", []), ("RGB", [Int, Int, Int])]}, [EOF])
),
)
}
///|
test "parse_optional_type_annotation" {
inspect(
parse_optional_type_annotation([Colon, Int, EOF]),
content="(Some(Int), [EOF])",
)
inspect(parse_optional_type_annotation([EOF]), content="(None, [EOF])")
}
///|
test "parse_let_stmt_type_expr" {
inspect(
parse_let_stmt_type_expr([Colon, Int, Assign, IntLiteral(1), Semicolon, EOF]),
content="(Some(Int), Literal(Int(1)), [EOF])",
)
inspect(
parse_let_stmt_type_expr([Assign, IntLiteral(1), Semicolon, EOF]),
content="(None, Literal(Int(1)), [EOF])",
)
}
///|
test "parse_stmt_or_expr_end" {
inspect(
parse_stmt_or_expr_end([
Let,
LowerIdentifier("x"),
Assign,
IntLiteral(1),
Semicolon,
EOF,
]),
content=(
#|(Let(Identifier("x"), None, Literal(Int(1))), false, [EOF])
),
)
}
///|
test "parse_block_expr" {
inspect(
parse_block_expr([
LCurlyBracket,
IntLiteral(1),
Semicolon,
RCurlyBracket,
EOF,
]),
content="(Block([Expr(Literal(Int(1))), Expr(Literal(Unit))]), [EOF])",
)
}
///|
test "parse_if_expr" {
inspect(
parse_if_expr([
If,
IntLiteral(1),
LCurlyBracket,
IntLiteral(2),
RCurlyBracket,
Else,
If,
IntLiteral(3),
LCurlyBracket,
IntLiteral(4),
RCurlyBracket,
Else,
LCurlyBracket,
IntLiteral(5),
RCurlyBracket,
EOF,
]),
content="(If(Literal(Int(1)), Block([Expr(Literal(Int(2)))]), Some(If(Literal(Int(3)), Block([Expr(Literal(Int(4)))]), Some(Block([Expr(Literal(Int(5)))]))))), [EOF])",
)
}
///|
test "parse_value_level_expr" {
inspect(
parse_value_level_expr([
LParen,
IntLiteral(1),
Comma,
IntLiteral(2),
RParen,
EOF,
]),
content="(Tuple([Literal(Int(1)), Literal(Int(2))]), [EOF])",
)
}
///|
test "parse_get_or_apply_level_expr" {
inspect(
parse_get_or_apply_level_expr([
LowerIdentifier("f"),
LParen,
IntLiteral(1),
RParen,
EOF,
]),
content=(
#|(FunctionCall(Identifier("f"), [Literal(Int(1))]), [RParen, EOF])
),
)
}
///|
test "parse_if_level_expr" {
inspect(
parse_if_level_expr([
If,
IntLiteral(0),
LCurlyBracket,
IntLiteral(1),
Semicolon,
RCurlyBracket,
EOF,
]),
content="(If(Literal(Int(0)), Block([Expr(Literal(Int(1))), Expr(Literal(Unit))]), None), [EOF])",
)
}
///|
test "parse_mul_div_level_expr" {
inspect(
parse_mul_div_level_expr([
IntLiteral(6),
Div,
IntLiteral(3),
Mul,
IntLiteral(2),
EOF,
]),
content="(MulDivRem(Mul, MulDivRem(Div, Literal(Int(6)), Literal(Int(3))), Literal(Int(2))), [EOF])",
)
}
///|
test "parse_add_sub_level_expr" {
inspect(
parse_add_sub_level_expr([
IntLiteral(1),
Add,
IntLiteral(2),
Sub,
IntLiteral(3),
EOF,
]),
content="(AddSub(Sub, AddSub(Add, Literal(Int(1)), Literal(Int(2))), Literal(Int(3))), [EOF])",
)
}
///|
test "parse_compare_level_expr" {
inspect(
parse_compare_level_expr([IntLiteral(1), Add, IntLiteral(2), EOF]),
content="(AddSub(Add, Literal(Int(1)), Literal(Int(2))), [EOF])",
)
}
///|
test "parse_and_level_expr" {
inspect(
parse_and_level_expr([BoolLiteral(true), And, BoolLiteral(false), EOF]),
content="(And(Literal(Bool(true)), Literal(Bool(false))), [EOF])",
)
}
///|
test "parse_or_level_expr" {
inspect(
parse_or_level_expr([BoolLiteral(true), Or, BoolLiteral(false), EOF]),
content="(Or(Literal(Bool(true)), Literal(Bool(false))), [EOF])",
)
}
///|
test "parse_expr" {
inspect(
parse_expr([IntLiteral(1), Add, IntLiteral(2), Mul, IntLiteral(3), EOF]),
content="(AddSub(Add, Literal(Int(1)), MulDivRem(Mul, Literal(Int(2)), Literal(Int(3)))), [EOF])",
)
}
///|
test "if_if_else" {
inspect(
parse_block_expr([
LCurlyBracket,
If,
BoolLiteral(true),
LCurlyBracket,
IntLiteral(1),
RCurlyBracket,
Semicolon,
If,
BoolLiteral(false),
LCurlyBracket,
IntLiteral(2),
RCurlyBracket,
Else,
LCurlyBracket,
IntLiteral(3),
RCurlyBracket,
RCurlyBracket,
EOF,
]),
content="(Block([Expr(If(Literal(Bool(true)), Block([Expr(Literal(Int(1)))]), None)), Expr(If(Literal(Bool(false)), Block([Expr(Literal(Int(2)))]), Some(Block([Expr(Literal(Int(3)))]))))]), [EOF])",
)
}
///|
test "parse_program" {
inspect(
parse_program([
Let,
LowerIdentifier("x"),
Assign,
IntLiteral(1),
Semicolon,
Fn,
LowerIdentifier("main"),
LCurlyBracket,
IntLiteral(0),
Semicolon,
RCurlyBracket,
EOF,
]),
content=(
#|{top_lets: {"x": {id: "x", type_: None, expr: Literal(Int(1))}}, top_functions: {"main": {id: "main", user_defined_type: None, params: [], return_type: Some(Unit), body: [Expr(Literal(Int(0))), Expr(Literal(Unit))]}}, struct_defs: {}, enum_defs: {}}
),
)
}

View File

@@ -1,5 +1,5 @@
///|
suberror TokenizeError String
pub(all) suberror TokenizeError String derive(Show)
///|
pub(all) enum CompareOperator {