commit f86300f3ce2b8f78b827cfb10314fc9695b89671 Author: 0264408 Date: Wed Feb 4 16:42:51 2026 +0800 Initialize Aster project with basic structure, including Cargo configuration, lexer, parser, interpreter, and AST definitions. Add a sample script and README documentation. Implement basic error handling and environment management for variable storage. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..06c95fd --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aster" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..fadaf74 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "aster" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/README.md b/README.md new file mode 100644 index 0000000..044a57c --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ +# Aster + +用 Rust 实现的脚本语言解释器。 + +## 项目结构 + +``` +aster/ +├── src/ +│ ├── lexer/ # 词法分析器 +│ ├── ast/ # 抽象语法树 +│ ├── parser/ # 语法分析器 +│ ├── interpreter/# 解释器 +│ ├── error/ # 错误处理 +│ └── main.rs +├── script.ast # 示例脚本 +└── Cargo.toml +``` + +## 运行方式 + +```bash +# 运行脚本文件 +cargo run -- script.ast + +# 从标准输入读取 +cargo run +``` + +## 语言特性 + +### 数据类型 + +- 数字(`Number`) +- 字符串(`String`) +- 布尔值(`true` / `false`) +- 空值(`nil`) + +### 变量与赋值 + +```rust +let x = 42; +let name = "aster"; +``` + +### 控制流 + +- **条件分支**:`if` / `else` +- **循环**:`while` + +### 函数 + +```rust +fn fib(n) { + if (n <= 1) { + return n + } + return fib(n - 1) + fib(n - 2) +} + +print(fib(10)) +``` + +### 运算符 + +- 算术:`+` `-` `*` `/` +- 比较:`<` `<=` `>` `>=` `==` `!=` +- 逻辑:`&&` `||` +- 一元:`!` `-` + +### 内置函数 + +- `print(value)`:打印值 +- `clock()`:返回当前时间戳(秒) + +## 构建 + +```bash +cargo build --release +``` + +构建产物位于 `target/release/aster`,可直接执行: + +```bash +./target/release/aster script.ast +``` + +## 依赖 + +无外部依赖,仅使用 Rust 标准库。 diff --git a/script.ast b/script.ast new file mode 100644 index 0000000..5689684 --- /dev/null +++ b/script.ast @@ -0,0 +1,40 @@ +// 压测:循环、递归、闭包 + +fn fib(n) { + if (n <= 1) { + return n + } + return fib(n - 1) + fib(n - 2) +} + +fn make_adder(x) { + return fn(y) { return x + y } +} + +let t0 = clock() + +// 1. 循环压测:100万次算术 +let sum = 0 +let i = 0 +while (i < 1000000) { + sum = sum + i + i = i + 1 +} +print("loop 1M:", sum) + +// 2. 递归压测:fib(25) +let f = fib(25) +print("fib(25):", f) + +// 3. 闭包压测:100万次调用 +let add10 = make_adder(10) +let j = 0 +let total = 0 +while (j < 1000000) { + total = add10(j) + j = j + 1 +} +print("closure 1M calls:", total) + +let t1 = clock() +print("total time (s):", t1 - t0) diff --git a/src/ast/expr.rs b/src/ast/expr.rs new file mode 100644 index 0000000..836a405 --- /dev/null +++ b/src/ast/expr.rs @@ -0,0 +1,84 @@ +use crate::ast::stmt::Stmt; + +#[derive(Debug, Clone)] +pub enum Expr { + /// 字面量:number / string / bool / nil + Literal(Literal), + + /// 变量引用 + Variable(String), + + /// 赋值表达式:a = b + Assign { + name: String, + value: Box, + }, + + /// 一元运算:!expr / -expr + Unary { + op: UnaryOp, + right: Box, + }, + + /// 二元运算:a + b + Binary { + left: Box, + op: BinaryOp, + right: Box, + }, + + /// 逻辑运算:&& || + Logical { + left: Box, + op: LogicalOp, + right: Box, + }, + + /// 函数调用 + Call { + callee: Box, + arguments: Vec, + }, + + /// 匿名函数(为闭包预留) + Lambda { + params: Vec, + body: Vec, + }, +} + +#[derive(Debug, Clone)] +pub enum Literal { + Number(f64), + String(String), + Bool(bool), + Nil, +} + +#[derive(Debug, Clone, Copy)] +pub enum UnaryOp { + Negate, // - + Not, // ! +} + +#[derive(Debug, Clone, Copy)] +pub enum BinaryOp { + Add, + Sub, + Mul, + Div, + + Greater, + GreaterEqual, + Less, + LessEqual, + + Equal, + NotEqual, +} + +#[derive(Debug, Clone, Copy)] +pub enum LogicalOp { + And, + Or, +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs new file mode 100644 index 0000000..30c7d98 --- /dev/null +++ b/src/ast/mod.rs @@ -0,0 +1,5 @@ +pub mod expr; +pub mod stmt; + +pub use expr::*; +pub use stmt::*; diff --git a/src/ast/stmt.rs b/src/ast/stmt.rs new file mode 100644 index 0000000..46bd417 --- /dev/null +++ b/src/ast/stmt.rs @@ -0,0 +1,39 @@ +use crate::ast::expr::Expr; + +#[derive(Debug, Clone)] +pub enum Stmt { + /// let x = expr; + Let { + name: String, + initializer: Expr, + }, + + /// 表达式语句:expr; + ExprStmt(Expr), + + /// 代码块:{ ... } + Block(Vec), + + /// if 语句 + If { + condition: Expr, + then_branch: Box, + else_branch: Option>, + }, + + /// while 循环 + While { + condition: Expr, + body: Box, + }, + + /// 函数声明 + Function { + name: String, + params: Vec, + body: Vec, + }, + + /// return expr?; + Return(Option), +} diff --git a/src/error/error.rs b/src/error/error.rs new file mode 100644 index 0000000..8b22307 --- /dev/null +++ b/src/error/error.rs @@ -0,0 +1,26 @@ +use crate::lexer::Token; + +#[derive(Debug)] +pub enum RuntimeError { + ParseError { message: String, token: Token }, + RuntimeError { message: String, token: Option }, +} + +impl std::fmt::Display for RuntimeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RuntimeError::ParseError { message, token } => { + write!(f, "[Parse Error] {} at line {}, column {}", message, token.line, token.column) + } + RuntimeError::RuntimeError { message, token } => { + if let Some(tok) = token { + write!(f, "[Runtime Error] {} at line {}, column {}", message, tok.line, tok.column) + } else { + write!(f, "[Runtime Error] {}", message) + } + } + } + } +} + +impl std::error::Error for RuntimeError {} diff --git a/src/error/mod.rs b/src/error/mod.rs new file mode 100644 index 0000000..2aaff46 --- /dev/null +++ b/src/error/mod.rs @@ -0,0 +1,3 @@ +pub mod error; + +pub use error::*; \ No newline at end of file diff --git a/src/interpreter/env.rs b/src/interpreter/env.rs new file mode 100644 index 0000000..666c77d --- /dev/null +++ b/src/interpreter/env.rs @@ -0,0 +1,44 @@ +use super::Value; +use std::collections::HashMap; +use std::rc::Rc; +use std::cell::RefCell; + +#[derive(Debug)] +pub struct Env { + pub values: HashMap, + pub parent: Option>>, +} + +impl Env { + pub fn new(parent: Option>>) -> Self { + Self { + values: HashMap::new(), + parent, + } + } + + pub fn define(&mut self, name: String, val: Value) { + self.values.insert(name, val); + } + + pub fn assign(&mut self, name: &str, val: Value) -> bool { + if self.values.contains_key(name) { + self.values.insert(name.to_string(), val); + true + } else if let Some(parent) = &self.parent { + parent.borrow_mut().assign(name, val) + } else { + false + } + } + + pub fn get(&self, name: &str) -> Option { + if let Some(val) = self.values.get(name) { + Some(val.clone()) + } else if let Some(parent) = &self.parent { + parent.borrow().get(name) + } else { + None + } + } +} diff --git a/src/interpreter/interpreter.rs b/src/interpreter/interpreter.rs new file mode 100644 index 0000000..266c93e --- /dev/null +++ b/src/interpreter/interpreter.rs @@ -0,0 +1,289 @@ +use crate::ast::*; +use crate::error::RuntimeError; +use super::{Value, Env, Function}; +use std::rc::Rc; +use std::cell::RefCell; + +pub struct Interpreter { + pub env: Rc>, +} + +impl Interpreter { + pub fn new() -> Self { + let env = Rc::new(RefCell::new(Env::new(None))); + + // 注册内置函数 + env.borrow_mut().define("print".to_string(), Value::NativeFunction(native_print)); + env.borrow_mut().define("clock".to_string(), Value::NativeFunction(native_clock)); + + Self { env } + } + + pub fn interpret(&mut self, statements: Vec) -> Result<(), RuntimeError> { + for stmt in statements { + if let Some(val) = self.execute(stmt.clone())? { + // 如果是表达式语句且结果不是 Nil,打印结果 + if matches!(stmt, Stmt::ExprStmt(_)) && !matches!(val, Value::Nil) { + println!("{}", val); + } + } + } + Ok(()) + } + + fn execute(&mut self, stmt: Stmt) -> Result, RuntimeError> { + match stmt { + Stmt::Let { name, initializer } => { + let val = self.evaluate(initializer)?; + self.env.borrow_mut().define(name, val); + Ok(None) + } + Stmt::ExprStmt(expr) => { + Ok(Some(self.evaluate(expr)?)) + } + Stmt::Block(stmts) => { + let previous = Rc::clone(&self.env); + self.env = Rc::new(RefCell::new(Env::new(Some(previous)))); + let mut result = None; + for s in stmts { + result = self.execute(s)?; + } + let parent = self.env.borrow().parent.as_ref().unwrap().clone(); + self.env = parent; + Ok(result) + } + Stmt::If { condition, then_branch, else_branch } => { + let cond_val = self.evaluate(condition)?; + if self.is_truthy(&cond_val) { + self.execute(*then_branch) + } else if let Some(else_branch) = else_branch { + self.execute(*else_branch) + } else { + Ok(None) + } + } + Stmt::While { condition, body } => { + loop { + let cond_val = self.evaluate(condition.clone())?; + if !self.is_truthy(&cond_val) { + break; + } + self.execute(*body.clone())?; + } + Ok(None) + } + Stmt::Function { name, params, body } => { + let func = Value::Function(Rc::new(Function { + params, + body, + env: Rc::clone(&self.env), + name: Some(name.clone()), + })); + self.env.borrow_mut().define(name, func); + Ok(None) + } + Stmt::Return(expr_opt) => { + if let Some(expr) = expr_opt { + Ok(Some(self.evaluate(expr)?)) + } else { + Ok(Some(Value::Nil)) + } + } + } + } + + fn evaluate(&mut self, expr: Expr) -> Result { + match expr { + Expr::Literal(lit) => Ok(match lit { + crate::ast::expr::Literal::Number(n) => Value::Number(n), + crate::ast::expr::Literal::String(s) => Value::String(s), + crate::ast::expr::Literal::Bool(b) => Value::Bool(b), + crate::ast::expr::Literal::Nil => Value::Nil, + }), + Expr::Variable(name) => { + match self.env.borrow().get(&name) { + Some(val) => Ok(val), + None => Err(RuntimeError::RuntimeError { + message: format!("Undefined variable '{}'", name), + token: None, + }), + } + } + Expr::Assign { name, value } => { + let val = self.evaluate(*value)?; + if !self.env.borrow_mut().assign(&name, val.clone()) { + return Err(RuntimeError::RuntimeError { + message: format!("Undefined variable '{}'", name), + token: None, + }); + } + Ok(val) + } + Expr::Unary { op, right } => { + let val = self.evaluate(*right)?; + match op { + crate::ast::expr::UnaryOp::Negate => match val { + Value::Number(n) => Ok(Value::Number(-n)), + _ => Err(RuntimeError::RuntimeError { + message: "Unary '-' on non-number".to_string(), + token: None, + }), + }, + crate::ast::expr::UnaryOp::Not => Ok(Value::Bool(!self.is_truthy(&val))), + } + } + Expr::Binary { left, op, right } => { + let l = self.evaluate(*left)?; + let r = self.evaluate(*right)?; + match op { + crate::ast::expr::BinaryOp::Add => match (l, r) { + (Value::Number(a), Value::Number(b)) => Ok(Value::Number(a+b)), + (Value::String(a), Value::String(b)) => Ok(Value::String(a+&b)), + _ => Err(RuntimeError::RuntimeError { + message: "Invalid '+' operands".to_string(), + token: None, + }), + }, + crate::ast::expr::BinaryOp::Sub => match (l,r) { + (Value::Number(a), Value::Number(b)) => Ok(Value::Number(a-b)), + _ => Err(RuntimeError::RuntimeError { + message: "Invalid '-' operands".to_string(), + token: None, + }), + }, + crate::ast::expr::BinaryOp::Mul => match (l,r) { + (Value::Number(a), Value::Number(b)) => Ok(Value::Number(a*b)), + _ => Err(RuntimeError::RuntimeError { + message: "Invalid '*' operands".to_string(), + token: None, + }), + }, + crate::ast::expr::BinaryOp::Div => match (l,r) { + (Value::Number(a), Value::Number(b)) => Ok(Value::Number(a/b)), + _ => Err(RuntimeError::RuntimeError { + message: "Invalid '/' operands".to_string(), + token: None, + }), + }, + crate::ast::expr::BinaryOp::Greater => Ok(Value::Bool(self.as_number(&l)? > self.as_number(&r)?)), + crate::ast::expr::BinaryOp::GreaterEqual => Ok(Value::Bool(self.as_number(&l)? >= self.as_number(&r)?)), + crate::ast::expr::BinaryOp::Less => Ok(Value::Bool(self.as_number(&l)? < self.as_number(&r)?)), + crate::ast::expr::BinaryOp::LessEqual => Ok(Value::Bool(self.as_number(&l)? <= self.as_number(&r)?)), + crate::ast::expr::BinaryOp::Equal => Ok(Value::Bool(self.is_equal(&l,&r))), + crate::ast::expr::BinaryOp::NotEqual => Ok(Value::Bool(!self.is_equal(&l,&r))), + } + } + Expr::Logical { left, op, right } => { + let l = self.evaluate(*left)?; + match op { + crate::ast::expr::LogicalOp::And => { + Ok(if !self.is_truthy(&l) { l } else { self.evaluate(*right)? }) + } + crate::ast::expr::LogicalOp::Or => { + Ok(if self.is_truthy(&l) { l } else { self.evaluate(*right)? }) + } + } + } + Expr::Call { callee, arguments } => { + let func = self.evaluate(*callee)?; + let mut args = Vec::new(); + for e in arguments { + args.push(self.evaluate(e)?); + } + self.call_function(func, args) + } + Expr::Lambda { params, body } => { + Ok(Value::Function(Rc::new(Function { + params, + body, + env: Rc::clone(&self.env), + name: None, + }))) + } + } + } + + fn call_function(&mut self, func_val: Value, args: Vec) -> Result { + match func_val { + Value::NativeFunction(native_fn) => { + Ok(native_fn(args)) + } + Value::Function(f) => { + let env = Rc::new(RefCell::new(Env::new(Some(Rc::clone(&f.env))))); + + if let Some(name) = &f.name { + env.borrow_mut().define(name.clone(), Value::Function(Rc::clone(&f))); + } + + for (i,param) in f.params.iter().enumerate() { + let val = args.get(i).cloned().unwrap_or(Value::Nil); + env.borrow_mut().define(param.clone(), val); + } + + let previous = Rc::clone(&self.env); + self.env = env; + let mut ret = Value::Nil; + for stmt in &f.body { + if let Some(val) = self.execute(stmt.clone())? { + ret = val; + break; + } + } + self.env = previous; + Ok(ret) + } + _ => { + Err(RuntimeError::RuntimeError { + message: "Attempt to call non-function".to_string(), + token: None, + }) + } + } + } + + fn is_truthy(&self, val: &Value) -> bool { + match val { + Value::Nil => false, + Value::Bool(b) => *b, + _ => true, + } + } + + fn is_equal(&self, a: &Value, b: &Value) -> bool { + match (a,b) { + (Value::Nil, Value::Nil) => true, + (Value::Bool(x), Value::Bool(y)) => x==y, + (Value::Number(x), Value::Number(y)) => x==y, + (Value::String(x), Value::String(y)) => x==y, + _ => false, + } + } + + fn as_number(&self, val: &Value) -> Result { + if let Value::Number(n) = val { + Ok(*n) + } else { + Err(RuntimeError::RuntimeError { + message: "Expected number".to_string(), + token: None, + }) + } + } +} + +// 内置函数实现 +fn native_print(args: Vec) -> Value { + for (i, arg) in args.iter().enumerate() { + if i > 0 { + print!(" "); + } + print!("{}", arg); + } + println!(); + Value::Nil +} + +// 获取当前时间 +fn native_clock(_args: Vec) -> Value { + Value::Number(std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64()) +} diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs new file mode 100644 index 0000000..f30b5c1 --- /dev/null +++ b/src/interpreter/mod.rs @@ -0,0 +1,55 @@ +pub mod env; +pub mod interpreter; + +pub use env::Env; +pub use interpreter::Interpreter; + +use std::rc::Rc; +use std::cell::RefCell; +use std::fmt; + +pub type NativeFn = fn(Vec) -> Value; + +#[derive(Clone)] +pub enum Value { + Number(f64), + String(String), + Bool(bool), + Nil, + Function(Rc), + NativeFunction(NativeFn), +} + +impl std::fmt::Debug for Value { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Value::Number(n) => write!(f, "Number({:?})", n), + Value::String(s) => write!(f, "String({:?})", s), + Value::Bool(b) => write!(f, "Bool({:?})", b), + Value::Nil => write!(f, "Nil"), + Value::Function(_) => write!(f, "Function(...)"), + Value::NativeFunction(_) => write!(f, "NativeFunction(...)"), + } + } +} + +impl fmt::Display for Value { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Value::Number(n) => write!(f, "{}", n), + Value::String(s) => write!(f, "{}", s), + Value::Bool(b) => write!(f, "{}", b), + Value::Nil => write!(f, "nil"), + Value::Function(_) => write!(f, ""), + Value::NativeFunction(_) => write!(f, ""), + } + } +} + +#[derive(Debug, Clone)] +pub struct Function { + pub params: Vec, + pub body: Vec, + pub env: Rc>, // 闭包捕获环境 + pub name: Option, +} diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs new file mode 100644 index 0000000..c7c7fb8 --- /dev/null +++ b/src/lexer/lexer.rs @@ -0,0 +1,261 @@ +// src/lexer/lexer.rs + +use super::{Token, TokenKind}; + +pub struct Lexer { + src: Vec, + current: usize, + line: usize, + column: usize, +} + +impl Lexer { + pub fn new(input: &str) -> Self { + Self { + src: input.chars().collect(), + current: 0, + line: 1, + column: 1, + } + } + + pub fn tokenize(mut self) -> Vec { + let mut tokens = Vec::new(); + + while !self.is_at_end() { + if let Some(token) = self.next_token() { + tokens.push(token); + } + } + + tokens.push(Token { + kind: TokenKind::EOF, + line: self.line, + column: self.column, + }); + + tokens + } + + fn next_token(&mut self) -> Option { + self.skip_whitespace(); + + if self.is_at_end() { + return None; + } + + let line = self.line; + let column = self.column; + let c = self.advance(); + + let kind = match c { + '(' => TokenKind::LeftParen, + ')' => TokenKind::RightParen, + '{' => TokenKind::LeftBrace, + '}' => TokenKind::RightBrace, + ',' => TokenKind::Comma, + ';' => TokenKind::Semicolon, + + '+' => TokenKind::Plus, + '-' => TokenKind::Minus, + '*' => TokenKind::Star, + + '/' => { + if self.match_char('/') { + // 单行注释 + while !self.is_at_end() && self.peek() != '\n' { + self.advance(); + } + return None; + } else { + TokenKind::Slash + } + } + + '!' => { + if self.match_char('=') { + TokenKind::BangEqual + } else { + TokenKind::Bang + } + } + + '=' => { + if self.match_char('=') { + TokenKind::EqualEqual + } else { + TokenKind::Equal + } + } + + '>' => { + if self.match_char('=') { + TokenKind::GreaterEqual + } else { + TokenKind::Greater + } + } + + '<' => { + if self.match_char('=') { + TokenKind::LessEqual + } else { + TokenKind::Less + } + } + + '&' => { + if self.match_char('&') { + TokenKind::AndAnd + } else { + panic!("Unexpected '&' at {}:{}", line, column); + } + } + + '|' => { + if self.match_char('|') { + TokenKind::OrOr + } else { + panic!("Unexpected '|' at {}:{}", line, column); + } + } + + '"' => return Some(self.string_literal(line, column)), + + c if c.is_ascii_digit() => { + return Some(self.number_literal(c, line, column)); + } + + c if is_ident_start(c) => { + return Some(self.identifier(c, line, column)); + } + + _ => { + panic!("Unexpected character '{}' at {}:{}", c, line, column); + } + }; + + Some(Token { kind, line, column }) + } + + // ---------------- helpers ---------------- + + fn skip_whitespace(&mut self) { + loop { + if self.is_at_end() { + return; + } + + match self.peek() { + ' ' | '\t' | '\r' => { + self.advance(); + } + '\n' => { + self.advance(); + self.line += 1; + self.column = 1; + } + _ => return, + } + } + } + + fn advance(&mut self) -> char { + let c = self.src[self.current]; + self.current += 1; + self.column += 1; + c + } + + fn match_char(&mut self, expected: char) -> bool { + if self.is_at_end() || self.peek() != expected { + return false; + } + self.advance(); + true + } + + fn peek(&self) -> char { + self.src[self.current] + } + + fn is_at_end(&self) -> bool { + self.current >= self.src.len() + } + + fn string_literal(&mut self, line: usize, column: usize) -> Token { + let mut value = String::new(); + + while !self.is_at_end() && self.peek() != '"' { + value.push(self.advance()); + } + + if self.is_at_end() { + panic!("Unterminated string at {}:{}", line, column); + } + + self.advance(); // consume closing " + + Token { + kind: TokenKind::String(value), + line, + column, + } + } + + fn number_literal(&mut self, first: char, line: usize, column: usize) -> Token { + let mut s = String::new(); + s.push(first); + + while !self.is_at_end() && self.peek().is_ascii_digit() { + s.push(self.advance()); + } + + if !self.is_at_end() && self.peek() == '.' { + s.push(self.advance()); + while !self.is_at_end() && self.peek().is_ascii_digit() { + s.push(self.advance()); + } + } + + let value = s.parse::().unwrap(); + + Token { + kind: TokenKind::Number(value), + line, + column, + } + } + + fn identifier(&mut self, first: char, line: usize, column: usize) -> Token { + let mut s = String::new(); + s.push(first); + + while !self.is_at_end() && is_ident_part(self.peek()) { + s.push(self.advance()); + } + + let kind = match s.as_str() { + "let" => TokenKind::Let, + "fn" => TokenKind::Fn, + "if" => TokenKind::If, + "else" => TokenKind::Else, + "while" => TokenKind::While, + "return" => TokenKind::Return, + "true" => TokenKind::True, + "false" => TokenKind::False, + "nil" => TokenKind::Nil, + _ => TokenKind::Identifier(s), + }; + + Token { kind, line, column } + } +} + +fn is_ident_start(c: char) -> bool { + c.is_ascii_alphabetic() || c == '_' +} + +fn is_ident_part(c: char) -> bool { + is_ident_start(c) || c.is_ascii_digit() +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..4a290f2 --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,5 @@ +pub mod token; +pub mod lexer; + +pub use token::{Token, TokenKind}; +pub use lexer::Lexer; \ No newline at end of file diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 0000000..b33e882 --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,42 @@ +// lexer/token.rs +#[derive(Debug, Clone, PartialEq)] +pub enum TokenKind { + // 单字符 + LeftParen, RightParen, + LeftBrace, RightBrace, + Comma, Semicolon, + + // 运算符 + Plus, Minus, Star, Slash, + Bang, + Equal, + Greater, GreaterEqual, + Less, LessEqual, + EqualEqual, BangEqual, + AndAnd, OrOr, + + // 字面量 + Identifier(String), + Number(f64), + String(String), + + // 关键字 + Let, + Fn, + If, + Else, + While, + Return, + True, + False, + Nil, + + EOF, +} + +#[derive(Debug, Clone)] +pub struct Token { + pub kind: TokenKind, + pub line: usize, + pub column: usize, +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..95fb4b7 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,53 @@ +mod lexer; +mod ast; +mod parser; +mod interpreter; +mod error; + +use lexer::Lexer; +use parser::Parser; +use interpreter::Interpreter; +use std::env; +use std::fs; +use std::io::{self, Read}; + +fn main() { + let src = match env::args().len() { + 1 => { + // 无参数:从 stdin 读取 + let mut buf = String::new(); + if let Err(e) = io::stdin().read_to_string(&mut buf) { + eprintln!("Failed to read stdin: {}", e); + std::process::exit(1); + } + buf + } + 2 => { + // 一个参数:作为文件路径 + let path = env::args().nth(1).unwrap(); + match fs::read_to_string(&path) { + Ok(s) => s, + Err(e) => { + eprintln!("Failed to read file '{}': {}", path, e); + std::process::exit(1); + } + } + } + _ => { + eprintln!("Usage: aster [script_file]"); + eprintln!(" aster - run from stdin"); + eprintln!(" aster - run script file"); + std::process::exit(1); + } + }; + + let tokens = Lexer::new(&src).tokenize(); + let mut parser = Parser::new(tokens); + let stmts = parser.parse(); + + let mut interpreter = Interpreter::new(); + if let Err(e) = interpreter.interpret(stmts) { + eprintln!("{}", e); + std::process::exit(1); + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..4c760ac --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,3 @@ +pub mod parser; + +pub use parser::Parser; diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..d02dae3 --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,431 @@ +use crate::ast::*; +use crate::lexer::{Token, TokenKind}; +use crate::error::RuntimeError; + +pub struct Parser { + tokens: Vec, + current: usize, +} + +impl Parser { + pub fn new(tokens: Vec) -> Self { + Self { tokens, current: 0 } + } + + pub fn parse(&mut self) -> Vec { + let mut statements = Vec::new(); + + while !self.is_at_end() { + statements.push(self.declaration()); + } + + statements + } + + fn declaration(&mut self) -> Stmt { + if self.match_kind(&[TokenKind::Let]) { + self.let_declaration() + } else if self.match_kind(&[TokenKind::Fn]) { + self.fn_declaration() + } else { + self.statement() + } + } + + fn statement(&mut self) -> Stmt { + if self.match_kind(&[TokenKind::If]) { + self.if_statement() + } else if self.match_kind(&[TokenKind::While]) { + self.while_statement() + } else if self.match_kind(&[TokenKind::Return]) { + self.return_statement() + } else if self.match_kind(&[TokenKind::LeftBrace]) { + Stmt::Block(self.block()) + } else { + self.expression_statement() + } + } + + fn let_declaration(&mut self) -> Stmt { + let name = self.consume_ident("Expected variable name.").unwrap(); + self.consume(TokenKind::Equal, "Expected '=' after variable name.").unwrap(); + let initializer = self.expression(); + self.match_kind(&[TokenKind::Semicolon]); // 分号可选 + + Stmt::Let { name, initializer } + } + + fn fn_declaration(&mut self) -> Stmt { + let name = self.consume_ident("Expected function name.").unwrap(); + self.consume(TokenKind::LeftParen, "Expected '(' after function name.").unwrap(); + + let mut params = Vec::new(); + if !self.check(&TokenKind::RightParen) { + loop { + params.push(self.consume_ident("Expected parameter name.").unwrap()); + if !self.match_kind(&[TokenKind::Comma]) { + break; + } + } + } + + self.consume(TokenKind::RightParen, "Expected ')' after parameters.").unwrap(); + self.consume(TokenKind::LeftBrace, "Expected '{' before function body.").unwrap(); + let body = self.block(); + + Stmt::Function { name, params, body } + } + + fn lambda_expr(&mut self) -> Expr { + self.consume(TokenKind::LeftParen, "Expected '(' after 'fn'.").unwrap(); + let mut params = Vec::new(); + if !self.check(&TokenKind::RightParen) { + loop { + params.push(self.consume_ident("Expected parameter name.").unwrap()); + if !self.match_kind(&[TokenKind::Comma]) { + break; + } + } + } + self.consume(TokenKind::RightParen, "Expected ')' after parameters.").unwrap(); + self.consume(TokenKind::LeftBrace, "Expected '{' before function body.").unwrap(); + let body = self.block(); + Expr::Lambda { params, body } + } + + fn if_statement(&mut self) -> Stmt { + self.consume(TokenKind::LeftParen, "Expected '(' after 'if'.").unwrap(); + let condition = self.expression(); + self.consume(TokenKind::RightParen, "Expected ')' after condition.").unwrap(); + + let then_branch = Box::new(self.statement()); + let else_branch = if self.match_kind(&[TokenKind::Else]) { + Some(Box::new(self.statement())) + } else { + None + }; + + Stmt::If { condition, then_branch, else_branch } + } + + fn while_statement(&mut self) -> Stmt { + self.consume(TokenKind::LeftParen, "Expected '(' after 'while'.").unwrap(); + let condition = self.expression(); + self.consume(TokenKind::RightParen, "Expected ')' after condition.").unwrap(); + + let body = Box::new(self.statement()); + Stmt::While { condition, body } + } + + fn return_statement(&mut self) -> Stmt { + let value = if !self.check(&TokenKind::Semicolon) && !self.check(&TokenKind::RightBrace) { + Some(self.expression()) + } else { + None + }; + + self.match_kind(&[TokenKind::Semicolon]); // 分号可选 + Stmt::Return(value) + } + + fn block(&mut self) -> Vec { + let mut statements = Vec::new(); + + while !self.check(&TokenKind::RightBrace) && !self.is_at_end() { + statements.push(self.declaration()); + } + + self.consume(TokenKind::RightBrace, "Expected '}' after block.").unwrap(); + statements + } + + fn expression_statement(&mut self) -> Stmt { + let expr = self.expression(); + self.match_kind(&[TokenKind::Semicolon]); // 分号可选 + Stmt::ExprStmt(expr) + } + + fn expression(&mut self) -> Expr { + self.assignment() + } + + fn assignment(&mut self) -> Expr { + let expr = self.logical_or(); + + if self.match_kind(&[TokenKind::Equal]) { + let value = self.assignment(); + if let Expr::Variable(name) = expr { + return Expr::Assign { + name, + value: Box::new(value), + }; + } else { + panic!("Invalid assignment target."); + } + } + + expr + } + + fn logical_or(&mut self) -> Expr { + let mut expr = self.logical_and(); + + while self.match_kind(&[TokenKind::OrOr]) { + let right = self.logical_and(); + expr = Expr::Logical { + left: Box::new(expr), + op: LogicalOp::Or, + right: Box::new(right), + }; + } + + expr + } + + fn logical_and(&mut self) -> Expr { + let mut expr = self.equality(); + + while self.match_kind(&[TokenKind::AndAnd]) { + let right = self.equality(); + expr = Expr::Logical { + left: Box::new(expr), + op: LogicalOp::And, + right: Box::new(right), + }; + } + + expr + } + + fn equality(&mut self) -> Expr { + let mut expr = self.comparison(); + + while self.match_kind(&[TokenKind::EqualEqual, TokenKind::BangEqual]) { + let op = match self.previous().kind { + TokenKind::EqualEqual => BinaryOp::Equal, + TokenKind::BangEqual => BinaryOp::NotEqual, + _ => unreachable!(), + }; + let right = self.comparison(); + expr = Expr::Binary { + left: Box::new(expr), + op, + right: Box::new(right), + }; + } + + expr + } + + fn comparison(&mut self) -> Expr { + let mut expr = self.term(); + + while self.match_kind(&[TokenKind::Greater, TokenKind::GreaterEqual, TokenKind::Less, TokenKind::LessEqual]) { + let op = match self.previous().kind { + TokenKind::Greater => BinaryOp::Greater, + TokenKind::GreaterEqual => BinaryOp::GreaterEqual, + TokenKind::Less => BinaryOp::Less, + TokenKind::LessEqual => BinaryOp::LessEqual, + _ => unreachable!(), + }; + let right = self.term(); + expr = Expr::Binary { + left: Box::new(expr), + op, + right: Box::new(right), + }; + } + + expr + } + + fn term(&mut self) -> Expr { + let mut expr = self.factor(); + + while self.match_kind(&[TokenKind::Plus, TokenKind::Minus]) { + let op = match self.previous().kind { + TokenKind::Plus => BinaryOp::Add, + TokenKind::Minus => BinaryOp::Sub, + _ => unreachable!(), + }; + let right = self.factor(); + expr = Expr::Binary { + left: Box::new(expr), + op, + right: Box::new(right), + }; + } + + expr + } + + fn factor(&mut self) -> Expr { + let mut expr = self.unary(); + + while self.match_kind(&[TokenKind::Star, TokenKind::Slash]) { + let op = match self.previous().kind { + TokenKind::Star => BinaryOp::Mul, + TokenKind::Slash => BinaryOp::Div, + _ => unreachable!(), + }; + let right = self.unary(); + expr = Expr::Binary { + left: Box::new(expr), + op, + right: Box::new(right), + }; + } + + expr + } + + fn unary(&mut self) -> Expr { + if self.match_kind(&[TokenKind::Bang, TokenKind::Minus]) { + let op = match self.previous().kind { + TokenKind::Bang => UnaryOp::Not, + TokenKind::Minus => UnaryOp::Negate, + _ => unreachable!(), + }; + let right = self.unary(); + return Expr::Unary { + op, + right: Box::new(right), + }; + } + + self.call() + } + + fn call(&mut self) -> Expr { + let mut expr = self.primary(); + + while self.match_kind(&[TokenKind::LeftParen]) { + let mut arguments = Vec::new(); + if !self.check(&TokenKind::RightParen) { + loop { + arguments.push(self.expression()); + if !self.match_kind(&[TokenKind::Comma]) { + break; + } + } + } + self.consume(TokenKind::RightParen, "Expected ')' after arguments.").unwrap(); + expr = Expr::Call { + callee: Box::new(expr), + arguments, + }; + } + + expr + } + + fn primary(&mut self) -> Expr { + // 布尔值 + if self.match_kind(&[TokenKind::True]) { + return Expr::Literal(Literal::Bool(true)); + } + if self.match_kind(&[TokenKind::False]) { + return Expr::Literal(Literal::Bool(false)); + } + if self.match_kind(&[TokenKind::Nil]) { + return Expr::Literal(Literal::Nil); + } + + // 数字 + if let TokenKind::Number(n) = self.peek().kind { + self.advance(); + return Expr::Literal(Literal::Number(n)); + } + + // 字符串 + if let TokenKind::String(s) = &self.peek().kind { + let s = s.clone(); + self.advance(); + return Expr::Literal(Literal::String(s)); + } + + // 标识符(变量) + if let TokenKind::Identifier(name) = &self.peek().kind { + let name = name.clone(); + self.advance(); + return Expr::Variable(name); + } + + // 匿名函数(闭包): fn(params) { body } + if self.match_kind(&[TokenKind::Fn]) { + return self.lambda_expr(); + } + + // 括号表达式 + if self.match_kind(&[TokenKind::LeftParen]) { + let expr = self.expression(); + self.consume(TokenKind::RightParen, "Expected ')' after expression.").unwrap(); + return expr; + } + + panic!("Expected expression at line {}", self.peek().line); + } + + fn match_kind(&mut self, kinds: &[TokenKind]) -> bool { + for kind in kinds { + if self.check(kind) { + self.advance(); + return true; + } + } + false + } + + fn consume(&mut self, kind: TokenKind, msg: &str) -> Result<(), RuntimeError> { + if self.check(&kind) { + self.advance(); + Ok(()) + } else { + Err(RuntimeError::ParseError { + message: msg.to_string(), + token: self.peek().clone(), + }) + } + } + + fn consume_ident(&mut self, msg: &str) -> Result { + match &self.peek().kind { + TokenKind::Identifier(name) => { + let name = name.clone(); + self.advance(); + Ok(name) + } + _ => Err(RuntimeError::ParseError { + message: msg.to_string(), + token: self.peek().clone(), + }), + } + } + + fn check(&self, kind: &TokenKind) -> bool { + if self.is_at_end() { + return false; + } + std::mem::discriminant(&self.peek().kind) + == std::mem::discriminant(kind) + } + + fn advance(&mut self) -> &Token { + if !self.is_at_end() { + self.current += 1; + } + self.previous() + } + + fn is_at_end(&self) -> bool { + matches!(self.peek().kind, TokenKind::EOF) + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } +} \ No newline at end of file