264 lines
6.4 KiB
Rust
264 lines
6.4 KiB
Rust
// src/lexer/lexer.rs
|
|
|
|
use super::{Token, TokenKind};
|
|
|
|
pub struct Lexer {
|
|
src: Vec<char>,
|
|
current: usize,
|
|
line: usize,
|
|
column: usize,
|
|
}
|
|
|
|
impl Lexer {
|
|
pub fn new(input: &str) -> Self {
|
|
Self {
|
|
src: input.chars().collect(),
|
|
current: 0,
|
|
line: 1,
|
|
column: 1,
|
|
}
|
|
}
|
|
|
|
pub fn tokenize(mut self) -> Vec<Token> {
|
|
let mut tokens = Vec::new();
|
|
|
|
while !self.is_at_end() {
|
|
if let Some(token) = self.next_token() {
|
|
tokens.push(token);
|
|
}
|
|
}
|
|
|
|
tokens.push(Token {
|
|
kind: TokenKind::EOF,
|
|
line: self.line,
|
|
column: self.column,
|
|
});
|
|
|
|
tokens
|
|
}
|
|
|
|
fn next_token(&mut self) -> Option<Token> {
|
|
self.skip_whitespace();
|
|
|
|
if self.is_at_end() {
|
|
return None;
|
|
}
|
|
|
|
let line = self.line;
|
|
let column = self.column;
|
|
let c = self.advance();
|
|
|
|
let kind = match c {
|
|
'(' => TokenKind::LeftParen,
|
|
')' => TokenKind::RightParen,
|
|
'{' => TokenKind::LeftBrace,
|
|
'}' => TokenKind::RightBrace,
|
|
',' => TokenKind::Comma,
|
|
';' => TokenKind::Semicolon,
|
|
|
|
'+' => TokenKind::Plus,
|
|
'-' => TokenKind::Minus,
|
|
'*' => TokenKind::Star,
|
|
'.' => TokenKind::Dot,
|
|
':' => TokenKind::Colon,
|
|
|
|
'/' => {
|
|
if self.match_char('/') {
|
|
// 单行注释
|
|
while !self.is_at_end() && self.peek() != '\n' {
|
|
self.advance();
|
|
}
|
|
return None;
|
|
} else {
|
|
TokenKind::Slash
|
|
}
|
|
}
|
|
|
|
'!' => {
|
|
if self.match_char('=') {
|
|
TokenKind::BangEqual
|
|
} else {
|
|
TokenKind::Bang
|
|
}
|
|
}
|
|
|
|
'=' => {
|
|
if self.match_char('=') {
|
|
TokenKind::EqualEqual
|
|
} else {
|
|
TokenKind::Equal
|
|
}
|
|
}
|
|
|
|
'>' => {
|
|
if self.match_char('=') {
|
|
TokenKind::GreaterEqual
|
|
} else {
|
|
TokenKind::Greater
|
|
}
|
|
}
|
|
|
|
'<' => {
|
|
if self.match_char('=') {
|
|
TokenKind::LessEqual
|
|
} else {
|
|
TokenKind::Less
|
|
}
|
|
}
|
|
|
|
'&' => {
|
|
if self.match_char('&') {
|
|
TokenKind::AndAnd
|
|
} else {
|
|
panic!("Unexpected '&' at {}:{}", line, column);
|
|
}
|
|
}
|
|
|
|
'|' => {
|
|
if self.match_char('|') {
|
|
TokenKind::OrOr
|
|
} else {
|
|
panic!("Unexpected '|' at {}:{}", line, column);
|
|
}
|
|
}
|
|
|
|
'"' => return Some(self.string_literal(line, column)),
|
|
|
|
c if c.is_ascii_digit() => {
|
|
return Some(self.number_literal(c, line, column));
|
|
}
|
|
|
|
c if is_ident_start(c) => {
|
|
return Some(self.identifier(c, line, column));
|
|
}
|
|
|
|
_ => {
|
|
panic!("Unexpected character '{}' at {}:{}", c, line, column);
|
|
}
|
|
};
|
|
|
|
Some(Token { kind, line, column })
|
|
}
|
|
|
|
// ---------------- helpers ----------------
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
loop {
|
|
if self.is_at_end() {
|
|
return;
|
|
}
|
|
|
|
match self.peek() {
|
|
' ' | '\t' | '\r' => {
|
|
self.advance();
|
|
}
|
|
'\n' => {
|
|
self.advance();
|
|
self.line += 1;
|
|
self.column = 1;
|
|
}
|
|
_ => return,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn advance(&mut self) -> char {
|
|
let c = self.src[self.current];
|
|
self.current += 1;
|
|
self.column += 1;
|
|
c
|
|
}
|
|
|
|
fn match_char(&mut self, expected: char) -> bool {
|
|
if self.is_at_end() || self.peek() != expected {
|
|
return false;
|
|
}
|
|
self.advance();
|
|
true
|
|
}
|
|
|
|
fn peek(&self) -> char {
|
|
self.src[self.current]
|
|
}
|
|
|
|
fn is_at_end(&self) -> bool {
|
|
self.current >= self.src.len()
|
|
}
|
|
|
|
fn string_literal(&mut self, line: usize, column: usize) -> Token {
|
|
let mut value = String::new();
|
|
|
|
while !self.is_at_end() && self.peek() != '"' {
|
|
value.push(self.advance());
|
|
}
|
|
|
|
if self.is_at_end() {
|
|
panic!("Unterminated string at {}:{}", line, column);
|
|
}
|
|
|
|
self.advance(); // consume closing "
|
|
|
|
Token {
|
|
kind: TokenKind::String(value),
|
|
line,
|
|
column,
|
|
}
|
|
}
|
|
|
|
fn number_literal(&mut self, first: char, line: usize, column: usize) -> Token {
|
|
let mut s = String::new();
|
|
s.push(first);
|
|
|
|
while !self.is_at_end() && self.peek().is_ascii_digit() {
|
|
s.push(self.advance());
|
|
}
|
|
|
|
if !self.is_at_end() && self.peek() == '.' {
|
|
s.push(self.advance());
|
|
while !self.is_at_end() && self.peek().is_ascii_digit() {
|
|
s.push(self.advance());
|
|
}
|
|
}
|
|
|
|
let value = s.parse::<f64>().unwrap();
|
|
|
|
Token {
|
|
kind: TokenKind::Number(value),
|
|
line,
|
|
column,
|
|
}
|
|
}
|
|
|
|
fn identifier(&mut self, first: char, line: usize, column: usize) -> Token {
|
|
let mut s = String::new();
|
|
s.push(first);
|
|
|
|
while !self.is_at_end() && is_ident_part(self.peek()) {
|
|
s.push(self.advance());
|
|
}
|
|
|
|
let kind = match s.as_str() {
|
|
"let" => TokenKind::Let,
|
|
"fn" => TokenKind::Fn,
|
|
"if" => TokenKind::If,
|
|
"else" => TokenKind::Else,
|
|
"while" => TokenKind::While,
|
|
"return" => TokenKind::Return,
|
|
"true" => TokenKind::True,
|
|
"false" => TokenKind::False,
|
|
"nil" => TokenKind::Nil,
|
|
_ => TokenKind::Identifier(s),
|
|
};
|
|
|
|
Token { kind, line, column }
|
|
}
|
|
}
|
|
|
|
fn is_ident_start(c: char) -> bool {
|
|
c.is_ascii_alphabetic() || c == '_'
|
|
}
|
|
|
|
fn is_ident_part(c: char) -> bool {
|
|
is_ident_start(c) || c.is_ascii_digit()
|
|
}
|