wip: ast
This commit is contained in:
@@ -1,3 +1,11 @@
|
||||
# Lilunar
|
||||
|
||||
Lil-Ran's MiniMoonBit to RISC-V compiler for [MGPIC-2025](https://www.moonbitlang.cn/2025-mgpic-compiler).
|
||||
|
||||
It is optimized for runtime speed and code size, but not for compilation speed, according to the contest rules.
|
||||
|
||||
## Reference
|
||||
|
||||
https://github.com/moonbitlang/contest-2025-data
|
||||
|
||||
https://github.com/moonbitlang/minimoonbit-public
|
||||
|
||||
148
src/parser/ast.mbt
Normal file
148
src/parser/ast.mbt
Normal file
@@ -0,0 +1,148 @@
|
||||
///|
|
||||
suberror ParseError String
|
||||
|
||||
// region Context
|
||||
|
||||
// region Type
|
||||
|
||||
///|
|
||||
pub(all) enum Type {
|
||||
Unit
|
||||
Bool
|
||||
Int
|
||||
Double
|
||||
Array(Type)
|
||||
Tuple(Array[Type])
|
||||
Function(Array[Type], Type)
|
||||
UserDefined(String)
|
||||
Generic(String, Type)
|
||||
} derive(Show)
|
||||
|
||||
// region Components
|
||||
|
||||
///|
|
||||
pub(all) enum Literal {
|
||||
Unit
|
||||
Bool(Bool)
|
||||
Int(Int)
|
||||
Double(Double)
|
||||
} derive(Show)
|
||||
|
||||
///|
|
||||
enum LeftValue {
|
||||
Identifier(String)
|
||||
FieldAccess(LeftValue, String)
|
||||
IndexAccess(LeftValue, Expr)
|
||||
} derive(Show)
|
||||
|
||||
///|
|
||||
enum AddSubOp {
|
||||
Add
|
||||
Sub
|
||||
} derive(Show)
|
||||
|
||||
///|
|
||||
enum MulDivRemOp {
|
||||
Mul
|
||||
Div
|
||||
Rem
|
||||
} derive(Show)
|
||||
|
||||
///|
|
||||
enum Expr {
|
||||
Or(Expr, Expr)
|
||||
And(Expr, Expr)
|
||||
Compare(CompareOperator, Expr, Expr)
|
||||
AddSub(AddSubOp, Expr, Expr)
|
||||
MulDivRem(MulDivRemOp, Expr, Expr)
|
||||
If(Expr, Expr, Expr?)
|
||||
Match(Expr, Array[(Pattern, Expr)])
|
||||
} derive(Show)
|
||||
|
||||
///|
|
||||
enum TopLevel {
|
||||
TopLetDecl(id~ : String, type_~ : Type?, expr~ : Expr)
|
||||
TopFn(Function)
|
||||
Struct(Struct)
|
||||
Enum(Enum)
|
||||
}
|
||||
|
||||
///|
|
||||
struct Program(Array[TopLevel])
|
||||
|
||||
///|
|
||||
fn parse_type(
|
||||
tokens : ArrayView[Token],
|
||||
) -> (Type, ArrayView[Token]) raise ParseError {
|
||||
match tokens {
|
||||
[Unit, .. rest] => (Unit, rest)
|
||||
[Bool, .. rest] => (Bool, rest)
|
||||
[Int, .. rest] => (Int, rest)
|
||||
[Double, .. rest] => (Double, rest)
|
||||
[Array, LBracket, .. rest] => {
|
||||
let (elem_type, rest) = parse_type(rest)
|
||||
guard rest is [RBracket, .. rest] else {
|
||||
raise ParseError("Expected ']' after array type")
|
||||
}
|
||||
(Array(elem_type), rest)
|
||||
}
|
||||
[LParen, .. rest] => {
|
||||
let (first_type, rest) = parse_type(rest)
|
||||
let types = [first_type]
|
||||
loop rest {
|
||||
[Comma, .. r] => {
|
||||
let (next_type, r) = parse_type(r)
|
||||
types.push(next_type)
|
||||
continue r
|
||||
}
|
||||
[RParen, Arrow, .. r] => {
|
||||
let (return_type, r) = parse_type(r)
|
||||
(Function(types, return_type), r)
|
||||
}
|
||||
[RParen, .. r] => (Tuple(types), r)
|
||||
_ =>
|
||||
raise ParseError(
|
||||
"Expected ',' or ')' or ')' '->' in tuple/function type",
|
||||
)
|
||||
}
|
||||
}
|
||||
[UpperIdentifier(name), LBracket, .. rest] => {
|
||||
let (type_arg, rest) = parse_type(rest)
|
||||
guard rest is [RBracket, .. rest] else {
|
||||
raise ParseError("Expected ']' after generic type argument")
|
||||
}
|
||||
(Generic(name, type_arg), rest)
|
||||
}
|
||||
[UpperIdentifier(name), .. rest] => (UserDefined(name), rest)
|
||||
_ => raise ParseError("Unexpected token while parsing type")
|
||||
}
|
||||
}
|
||||
|
||||
///|
|
||||
pub fn parse_program(tokens : Array[Token]) -> Program raise ParseError {
|
||||
let program = []
|
||||
loop tokens[:] {
|
||||
[EOF] => program
|
||||
[Let, LowerIdentifier(id) | UpperIdentifier(id), Colon, .. rest] => {
|
||||
let (type_, rest) = parse_type(rest)
|
||||
guard rest is [Assign, .. rest] else {
|
||||
raise ParseError(
|
||||
"Expected '=' after type annotation in let declaration",
|
||||
)
|
||||
}
|
||||
let (expr, rest) = parse_expr(rest)
|
||||
program.push(TopLetDecl(id, type_, expr))
|
||||
continue rest
|
||||
}
|
||||
[Let, LowerIdentifier(id) | UpperIdentifier(id), Assign, .. rest] => {
|
||||
let (expr, rest) = parse_expr(rest)
|
||||
program.push(TopLetDecl(id, None, expr))
|
||||
continue rest
|
||||
}
|
||||
[Fn, LowerIdentifier("main"), ..] => ...
|
||||
[Fn, ..] => ...
|
||||
[Struct, ..] => ...
|
||||
[Enum, ..] => ...
|
||||
_ => raise ParseError("Unexpected token at top level")
|
||||
}
|
||||
}
|
||||
56
src/parser/ast_wbtest.mbt
Normal file
56
src/parser/ast_wbtest.mbt
Normal file
@@ -0,0 +1,56 @@
|
||||
///|
|
||||
test "parse_type" {
|
||||
inspect(parse_type([Unit, EOF]), content="(Unit, [EOF])")
|
||||
inspect(parse_type([Bool, EOF]), content="(Bool, [EOF])")
|
||||
inspect(parse_type([Int, EOF]), content="(Int, [EOF])")
|
||||
inspect(parse_type([Double, EOF]), content="(Double, [EOF])")
|
||||
inspect(
|
||||
parse_type([Array, LBracket, Int, RBracket, EOF]),
|
||||
content="(Array(Int), [EOF])",
|
||||
)
|
||||
inspect(
|
||||
parse_type([LParen, Int, Comma, Bool, RParen, EOF]),
|
||||
content="(Tuple([Int, Bool]), [EOF])",
|
||||
)
|
||||
inspect(
|
||||
parse_type([LParen, Int, RParen, EOF]),
|
||||
content="(Tuple([Int]), [EOF])",
|
||||
)
|
||||
inspect(
|
||||
parse_type([LParen, Int, Comma, Bool, RParen, Arrow, Double, EOF]),
|
||||
content="(Function([Int, Bool], Double), [EOF])",
|
||||
)
|
||||
inspect(
|
||||
parse_type([UpperIdentifier("MyType"), EOF]),
|
||||
content=(
|
||||
#|(UserDefined("MyType"), [EOF])
|
||||
),
|
||||
)
|
||||
inspect(
|
||||
parse_type([
|
||||
UpperIdentifier("A"),
|
||||
LBracket,
|
||||
UpperIdentifier("B"),
|
||||
RBracket,
|
||||
EOF,
|
||||
]),
|
||||
content=(
|
||||
#|(Generic("A", UserDefined("B")), [EOF])
|
||||
),
|
||||
)
|
||||
inspect(
|
||||
parse_type([
|
||||
UpperIdentifier("A"),
|
||||
LBracket,
|
||||
UpperIdentifier("B"),
|
||||
LBracket,
|
||||
UpperIdentifier("C"),
|
||||
RBracket,
|
||||
RBracket,
|
||||
EOF,
|
||||
]),
|
||||
content=(
|
||||
#|(Generic("A", Generic("B", UserDefined("C"))), [EOF])
|
||||
),
|
||||
)
|
||||
}
|
||||
1
src/parser/moon.pkg.json
Normal file
1
src/parser/moon.pkg.json
Normal file
@@ -0,0 +1 @@
|
||||
{}
|
||||
244
src/parser/tokenize.mbt
Normal file
244
src/parser/tokenize.mbt
Normal file
@@ -0,0 +1,244 @@
|
||||
///|
|
||||
pub(all) enum CompareOperator {
|
||||
Equal
|
||||
NotEqual
|
||||
GreaterEqual
|
||||
LessEqual
|
||||
Greater
|
||||
Less
|
||||
} derive(Show)
|
||||
|
||||
///|
|
||||
pub(all) enum Token {
|
||||
EOF
|
||||
BoolLiteral(Bool)
|
||||
Unit
|
||||
Bool
|
||||
Int
|
||||
Double
|
||||
Array
|
||||
Not
|
||||
If
|
||||
Else
|
||||
Fn
|
||||
Let
|
||||
Struct
|
||||
Enum
|
||||
Number(Int)
|
||||
UpperIdentifier(String)
|
||||
LowerIdentifier(String)
|
||||
Wildcard
|
||||
CompareOperator(CompareOperator)
|
||||
And
|
||||
Or
|
||||
Dot
|
||||
Add
|
||||
Sub
|
||||
Mul
|
||||
Div
|
||||
Assign
|
||||
LParen
|
||||
RParen
|
||||
LBracket
|
||||
RBracket
|
||||
LCurlyBracket
|
||||
RCurlyBracket
|
||||
Arrow
|
||||
Colon
|
||||
Semicolon
|
||||
Comma
|
||||
} derive(Show)
|
||||
|
||||
///|
|
||||
pub fn tokenize(input : String) -> Array[Token] {
|
||||
let tokens = []
|
||||
loop input[:] {
|
||||
[' ' | '\n' | '\r' | '\t', .. rest] => continue rest
|
||||
[.. "//", .. rest] => {
|
||||
let rest = loop rest {
|
||||
['\n' | '\r', .. r] => r
|
||||
[_, .. r] => continue r
|
||||
[] => []
|
||||
}
|
||||
continue rest
|
||||
}
|
||||
['0'..='9', ..] as pattern => {
|
||||
let number_str = StringBuilder::new()
|
||||
let rest = loop pattern {
|
||||
['0'..='9' as c, .. r] => {
|
||||
number_str.write_char(c)
|
||||
continue r
|
||||
}
|
||||
r => {
|
||||
let number = try! @strconv.parse_int(number_str.to_string())
|
||||
tokens.push(Number(number))
|
||||
r
|
||||
}
|
||||
}
|
||||
continue rest
|
||||
}
|
||||
['A'..='Z', ..] as pattern => {
|
||||
let ident_str = StringBuilder::new()
|
||||
let rest = loop pattern {
|
||||
['a'..='z' | 'A'..='Z' | '0'..='9' | '_' as c, .. r] => {
|
||||
ident_str.write_char(c)
|
||||
continue r
|
||||
}
|
||||
r => {
|
||||
let ident : Token = match ident_str.to_string() {
|
||||
"Unit" => Unit
|
||||
"Bool" => Bool
|
||||
"Int" => Int
|
||||
"Double" => Double
|
||||
"Array" => Array
|
||||
s => UpperIdentifier(s)
|
||||
}
|
||||
tokens.push(ident)
|
||||
r
|
||||
}
|
||||
}
|
||||
continue rest
|
||||
}
|
||||
['a'..='z', ..] | ['_', ..] as pattern => {
|
||||
let ident_str = StringBuilder::new()
|
||||
let rest = loop pattern {
|
||||
['a'..='z' | 'A'..='Z' | '0'..='9' | '_' as c, .. r] => {
|
||||
ident_str.write_char(c)
|
||||
continue r
|
||||
}
|
||||
r => {
|
||||
let ident = match ident_str.to_string() {
|
||||
"_" => Wildcard
|
||||
"true" => BoolLiteral(true)
|
||||
"false" => BoolLiteral(false)
|
||||
"not" => Not
|
||||
"if" => If
|
||||
"else" => Else
|
||||
"fn" => Fn
|
||||
"let" => Let
|
||||
"struct" => Struct
|
||||
"enum" => Enum
|
||||
s => LowerIdentifier(s)
|
||||
}
|
||||
tokens.push(ident)
|
||||
r
|
||||
}
|
||||
}
|
||||
continue rest
|
||||
}
|
||||
[.. "->", .. rest] => {
|
||||
tokens.push(Arrow)
|
||||
continue rest
|
||||
}
|
||||
[.. "==", .. rest] => {
|
||||
tokens.push(CompareOperator(Equal))
|
||||
continue rest
|
||||
}
|
||||
[.. "!=", .. rest] => {
|
||||
tokens.push(CompareOperator(NotEqual))
|
||||
continue rest
|
||||
}
|
||||
[.. ">=", .. rest] => {
|
||||
tokens.push(CompareOperator(GreaterEqual))
|
||||
continue rest
|
||||
}
|
||||
[.. "<=", .. rest] => {
|
||||
tokens.push(CompareOperator(LessEqual))
|
||||
continue rest
|
||||
}
|
||||
['>', .. rest] => {
|
||||
tokens.push(CompareOperator(Greater))
|
||||
continue rest
|
||||
}
|
||||
['<', .. rest] => {
|
||||
tokens.push(CompareOperator(Less))
|
||||
continue rest
|
||||
}
|
||||
[.. "&&", .. rest] => {
|
||||
tokens.push(And)
|
||||
continue rest
|
||||
}
|
||||
[.. "||", .. rest] => {
|
||||
tokens.push(Or)
|
||||
continue rest
|
||||
}
|
||||
['.', .. rest] => {
|
||||
tokens.push(Dot)
|
||||
continue rest
|
||||
}
|
||||
['+', .. rest] => {
|
||||
tokens.push(Add)
|
||||
continue rest
|
||||
}
|
||||
['-', .. rest] => {
|
||||
tokens.push(Sub)
|
||||
continue rest
|
||||
}
|
||||
['*', .. rest] => {
|
||||
tokens.push(Mul)
|
||||
continue rest
|
||||
}
|
||||
['/', .. rest] => {
|
||||
tokens.push(Div)
|
||||
continue rest
|
||||
}
|
||||
['=', .. rest] => {
|
||||
tokens.push(Assign)
|
||||
continue rest
|
||||
}
|
||||
['(', .. rest] => {
|
||||
tokens.push(LParen)
|
||||
continue rest
|
||||
}
|
||||
[')', .. rest] => {
|
||||
tokens.push(RParen)
|
||||
continue rest
|
||||
}
|
||||
['[', .. rest] => {
|
||||
tokens.push(LBracket)
|
||||
continue rest
|
||||
}
|
||||
[']', .. rest] => {
|
||||
tokens.push(RBracket)
|
||||
continue rest
|
||||
}
|
||||
['{', .. rest] => {
|
||||
tokens.push(LCurlyBracket)
|
||||
continue rest
|
||||
}
|
||||
['}', .. rest] => {
|
||||
tokens.push(RCurlyBracket)
|
||||
continue rest
|
||||
}
|
||||
[':', .. rest] => {
|
||||
tokens.push(Colon)
|
||||
continue rest
|
||||
}
|
||||
[';', .. rest] => {
|
||||
tokens.push(Semicolon)
|
||||
continue rest
|
||||
}
|
||||
[',', .. rest] => {
|
||||
tokens.push(Comma)
|
||||
continue rest
|
||||
}
|
||||
[] => tokens.push(EOF)
|
||||
[c, ..] => abort("Unexpected character: " + c.to_string())
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
///|
|
||||
test "tokenize" {
|
||||
let input =
|
||||
#| let x = 42 // A comment.
|
||||
#| if x > 0 {
|
||||
#| x = x - 1
|
||||
#| } else { x = 0 }
|
||||
inspect(
|
||||
tokenize(input),
|
||||
content=(
|
||||
#|[Let, LowerIdentifier("x"), Assign, Number(42), If, LowerIdentifier("x"), CompareOperator(Greater), Number(0), LCurlyBracket, LowerIdentifier("x"), Assign, LowerIdentifier("x"), Sub, Number(1), RCurlyBracket, Else, LCurlyBracket, LowerIdentifier("x"), Assign, Number(0), RCurlyBracket, EOF]
|
||||
),
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user