// src/lexer/lexer.rs use super::{Token, TokenKind}; pub struct Lexer { src: Vec, current: usize, line: usize, column: usize, } impl Lexer { pub fn new(input: &str) -> Self { Self { src: input.chars().collect(), current: 0, line: 1, column: 1, } } pub fn tokenize(mut self) -> Vec { let mut tokens = Vec::new(); while !self.is_at_end() { if let Some(token) = self.next_token() { tokens.push(token); } } tokens.push(Token { kind: TokenKind::EOF, line: self.line, column: self.column, }); tokens } fn next_token(&mut self) -> Option { self.skip_whitespace(); if self.is_at_end() { return None; } let line = self.line; let column = self.column; let c = self.advance(); let kind = match c { '(' => TokenKind::LeftParen, ')' => TokenKind::RightParen, '{' => TokenKind::LeftBrace, '}' => TokenKind::RightBrace, ',' => TokenKind::Comma, ';' => TokenKind::Semicolon, '+' => TokenKind::Plus, '-' => TokenKind::Minus, '*' => TokenKind::Star, '.' => TokenKind::Dot, ':' => TokenKind::Colon, '/' => { if self.match_char('/') { // 单行注释 while !self.is_at_end() && self.peek() != '\n' { self.advance(); } return None; } else { TokenKind::Slash } } '!' => { if self.match_char('=') { TokenKind::BangEqual } else { TokenKind::Bang } } '=' => { if self.match_char('=') { TokenKind::EqualEqual } else { TokenKind::Equal } } '>' => { if self.match_char('=') { TokenKind::GreaterEqual } else { TokenKind::Greater } } '<' => { if self.match_char('=') { TokenKind::LessEqual } else { TokenKind::Less } } '&' => { if self.match_char('&') { TokenKind::AndAnd } else { panic!("Unexpected '&' at {}:{}", line, column); } } '|' => { if self.match_char('|') { TokenKind::OrOr } else { panic!("Unexpected '|' at {}:{}", line, column); } } '"' => return Some(self.string_literal(line, column)), c if c.is_ascii_digit() => { return Some(self.number_literal(c, line, column)); } c if is_ident_start(c) => { return Some(self.identifier(c, line, column)); } _ => { panic!("Unexpected character '{}' at {}:{}", c, line, column); } }; Some(Token { kind, line, column }) } // ---------------- helpers ---------------- fn skip_whitespace(&mut self) { loop { if self.is_at_end() { return; } match self.peek() { ' ' | '\t' | '\r' => { self.advance(); } '\n' => { self.advance(); self.line += 1; self.column = 1; } _ => return, } } } fn advance(&mut self) -> char { let c = self.src[self.current]; self.current += 1; self.column += 1; c } fn match_char(&mut self, expected: char) -> bool { if self.is_at_end() || self.peek() != expected { return false; } self.advance(); true } fn peek(&self) -> char { self.src[self.current] } fn is_at_end(&self) -> bool { self.current >= self.src.len() } fn string_literal(&mut self, line: usize, column: usize) -> Token { let mut value = String::new(); while !self.is_at_end() && self.peek() != '"' { value.push(self.advance()); } if self.is_at_end() { panic!("Unterminated string at {}:{}", line, column); } self.advance(); // consume closing " Token { kind: TokenKind::String(value), line, column, } } fn number_literal(&mut self, first: char, line: usize, column: usize) -> Token { let mut s = String::new(); s.push(first); while !self.is_at_end() && self.peek().is_ascii_digit() { s.push(self.advance()); } if !self.is_at_end() && self.peek() == '.' { s.push(self.advance()); while !self.is_at_end() && self.peek().is_ascii_digit() { s.push(self.advance()); } } let value = s.parse::().unwrap(); Token { kind: TokenKind::Number(value), line, column, } } fn identifier(&mut self, first: char, line: usize, column: usize) -> Token { let mut s = String::new(); s.push(first); while !self.is_at_end() && is_ident_part(self.peek()) { s.push(self.advance()); } let kind = match s.as_str() { "let" => TokenKind::Let, "fn" => TokenKind::Fn, "if" => TokenKind::If, "else" => TokenKind::Else, "while" => TokenKind::While, "return" => TokenKind::Return, "true" => TokenKind::True, "false" => TokenKind::False, "nil" => TokenKind::Nil, _ => TokenKind::Identifier(s), }; Token { kind, line, column } } } fn is_ident_start(c: char) -> bool { c.is_ascii_alphabetic() || c == '_' } fn is_ident_part(c: char) -> bool { is_ident_start(c) || c.is_ascii_digit() }