From a820d18e740cbb1df5bb97ac089fe6398a62bf85 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 23 Oct 2022 11:14:49 +0300 Subject: refactor: simplify parsing of token stream Sponsored-by: author --- src/parser.rs | 208 ++++++++++++++++++++++++++++++++++------------------------ src/token.rs | 2 +- 2 files changed, 125 insertions(+), 85 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 973d0b2..5378f36 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,7 +7,8 @@ use std::collections::HashMap; #[derive(Debug)] pub struct WikitextParser { - tokens: Vec<(TokenKind, usize, usize)>, + tokens: Vec, + positions: Vec<(usize, usize)>, } impl WikitextParser { @@ -15,6 +16,7 @@ impl WikitextParser { let linecol = LineColLookup::new(input); let mut p = TokenParser::new(input, patterns); let mut tokens = vec![]; + let mut positions = vec![]; loop { let token = p.parse(); debug!("token {:?}", token); @@ -22,78 +24,100 @@ impl WikitextParser { break; } let (line, col) = linecol.get(token.pos); - tokens.push((token.token, line, col)); + tokens.push(token.token); + positions.push((line, col)); } - Self { tokens } + Self { tokens, positions } + } + + fn drain(&mut self, n: usize) { + self.tokens.drain(..n); + self.positions.drain(..n); + } + + fn position(&self) -> (usize, usize) { + self.positions[0] + } + + fn is_empty(&self) -> bool { + self.tokens.is_empty() } pub fn parse(&mut self) -> Result, SiteError> { - if self.tokens.is_empty() { + if self.is_empty() { return Ok(None); } - let (_, line, col) = self.tokens[0]; + let (line, col) = self.position(); debug!("token at {}:{}", line, col); let snippet = match &self.tokens[..] { - [(TokenKind::OpenBrackets, _, _), (TokenKind::Word(target), _, _), (TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::OpenBrackets, TokenKind::Word(target), TokenKind::CloseBrackets, ..] => + { let wikilink = WikiLink::new(target, target); let snippet = Snippet::WikiLink(wikilink); - self.tokens.drain(..3); + self.drain(3); snippet } - [(TokenKind::OpenBrackets, _, _), (TokenKind::Word(word), _, _), ..] => { + [TokenKind::OpenBrackets, TokenKind::Word(word), ..] => { trace!("match [[{:?}", word); let mut link_text = word.to_string(); let mut target = None; - self.tokens.drain(..2); + self.drain(2); loop { - let (_, line, col) = self.tokens[0]; + let (line, col) = self.position(); match &self.tokens[..] { - [(TokenKind::Spaces(_), _, _), ..] => { + [TokenKind::Spaces(_), ..] => { trace!("match space"); - self.tokens.drain(..1); + self.drain(1); link_text.push(' '); } - [(TokenKind::Markdown(s), _, _), ..] => { + [TokenKind::Markdown(s), ..] => { trace!("match markdown {:?}", s); link_text.push_str(s); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::OpenParens, _, _), (TokenKind::Word(word), _, _), ..] => { + [TokenKind::OpenParens, TokenKind::Word(word), ..] => { trace!("match ({:?}", word); link_text.push('('); link_text.push_str(word); - self.tokens.drain(..2); + self.drain(2); } - [(TokenKind::Word(word), _, _), ..] => { + [TokenKind::Word(word), ..] => { trace!("match {:?}", word); link_text.push_str(word); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::ClosedParens, _, _), ..] => { + [TokenKind::ClosedParens, ..] => { trace!("match )"); link_text.push(')'); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::CloseBrackets, ..] => { trace!("match ]]"); - self.tokens.drain(..1); + self.drain(1); break; } - [(TokenKind::Pipe, _, _), (TokenKind::Word(word), _, _), (TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::Pipe, TokenKind::Word(word), TokenKind::CloseBrackets, ..] => + { trace!("match |{:?}]]", word); target = Some(word.to_string()); - self.tokens.drain(..3); + self.drain(3); break; } - [(TokenKind::Pipe, _, _), (TokenKind::Spaces(_), _, _), (TokenKind::Word(word), _, _), (TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::Pipe, TokenKind::Spaces(_), TokenKind::Word(word), TokenKind::CloseBrackets, ..] => + { trace!("match |{:?}]]", word); target = Some(word.to_string()); - self.tokens.drain(..3); + self.drain(3); break; } - _ => panic!("a can't parse line {} column {}: {:?}", line, col, &self.tokens[..5]), + _ => panic!( + "a can't parse line {} column {}: {:?}", + line, + col, + &self.tokens[..5] + ), } } if target.is_none() { @@ -102,143 +126,159 @@ impl WikitextParser { let wikilink = WikiLink::new(&link_text, &target.unwrap()); Snippet::WikiLink(wikilink) } - [(TokenKind::OpenBrackets, _, _), (TokenKind::Bang, _, _), (TokenKind::Word(name), _, _), ..] => { + [TokenKind::OpenBrackets, TokenKind::Bang, TokenKind::Word(name), ..] => + { trace!("match [[!{:?}", name); let name = name.to_string(); let mut args = HashMap::new(); - self.tokens.drain(..3); + self.drain(3); loop { - let (_, line, col) = self.tokens[0]; + let (line, col) = self.position(); match &self.tokens[..] { - [(TokenKind::Spaces(_), _, _), ..] => { + [TokenKind::Spaces(_), ..] => { trace!("match spaces"); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::CloseBrackets, ..] => { trace!("match ]]"); - self.tokens.drain(..1); + self.drain(1); break; } - [(TokenKind::Word(word), _, _), (TokenKind::Spaces(_), _, _), ..] => { + [TokenKind::Word(word), TokenKind::Spaces(_), ..] => { trace!("match {:?} spaces", word); args.insert(word.to_string(), "".to_string()); - self.tokens.drain(..2); + self.drain(2); } - [(TokenKind::Word(word), _, _), (TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::Word(word), TokenKind::CloseBrackets, ..] => { trace!("match {:?}]]", word); args.insert(word.to_string(), "".to_string()); - self.tokens.drain(..2); + self.drain(2); break; } - [(TokenKind::Word(name), _, _), (TokenKind::Equals, _, _), (TokenKind::Word(value), _, _), ..] => { + [TokenKind::Word(name), TokenKind::Equals, TokenKind::Word(value), ..] => + { trace!("match {:?}={:?}", name, value); args.insert(name.to_string(), value.to_string()); - self.tokens.drain(..3); + self.drain(3); } - [(TokenKind::Word(name), _, _), (TokenKind::Equals, _, _), (TokenKind::QuotedValue(value), _, _), ..] => { + [TokenKind::Word(name), TokenKind::Equals, TokenKind::QuotedValue(value), ..] => + { trace!("match {:?}={:?}", name, value); args.insert(name.to_string(), value.to_string()); - self.tokens.drain(..3); + self.drain(3); } - [(TokenKind::QuotedValue(value), _, _), ..] => { + [TokenKind::QuotedValue(value), ..] => { trace!("match {:?}", value); args.insert(value.to_string(), "".to_string()); - self.tokens.drain(..1); + self.drain(1); } - _ => panic!("b can't parse line {} column {}: {:?}", line, col, &self.tokens[..5]), + _ => panic!( + "b can't parse line {} column {}: {:?}", + line, + col, + &self.tokens[..5] + ), } } Snippet::Directive(ParsedDirective::new(&name, args)?) } - [(TokenKind::Bang, _, _), (TokenKind::OpenBracket, _, _), ..] => { + [TokenKind::Bang, TokenKind::OpenBracket, ..] => { let mut link_text = String::new(); #[allow(unused_assignments)] let mut target = None; - self.tokens.drain(..2); + self.drain(2); loop { - let (_, line, col) = self.tokens[0]; + let (line, col) = self.position(); match &self.tokens[..] { - [(TokenKind::Word(word), _, _), ..] => { + [TokenKind::Word(word), ..] => { link_text.push_str(word); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::Spaces(_), _, _), ..] => { + [TokenKind::Spaces(_), ..] => { link_text.push(' '); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::ClosedBracket, _, _), (TokenKind::OpenParens, _, _), (TokenKind::Word(word), _, _), (TokenKind::ClosedParens, _, _), ..] => + [TokenKind::ClosedBracket, TokenKind::OpenParens, TokenKind::Word(word), TokenKind::ClosedParens, ..] => { target = Some(word.to_string()); - self.tokens.drain(..4); + self.drain(4); break; } - _ => panic!("c can't parse line {} column {}: {:?}", line, col, &self.tokens[..5]), + _ => panic!( + "c can't parse line {} column {}: {:?}", + line, + col, + &self.tokens[..5] + ), } } Snippet::Markdown(format!("![{}]({})", link_text, target.unwrap())) } - [(TokenKind::Markdown(text), _, _), ..] => { + [TokenKind::Markdown(text), ..] => { let snippet = Snippet::Markdown(text.to_string()); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::Spaces(s), _, _), ..] => { + [TokenKind::Spaces(s), ..] => { let snippet = Snippet::Markdown(s.to_string()); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::Word(text), _, _), ..] => { + [TokenKind::Word(text), ..] => { let snippet = Snippet::Markdown(text.to_string()); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::Equals, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::Equals, ..] => { + self.drain(1); Snippet::Markdown("=".into()) } - [(TokenKind::Bang, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::Bang, ..] => { + self.drain(1); Snippet::Markdown("!".into()) } - [(TokenKind::Pipe, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::Pipe, ..] => { + self.drain(1); Snippet::Markdown("|".into()) } - [(TokenKind::PageName(s), _, _), ..] => { + [TokenKind::PageName(s), ..] => { let snippet = Snippet::Markdown(s.to_string()); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::QuotedValue(s), _, _), ..] => { + [TokenKind::QuotedValue(s), ..] => { let snippet = Snippet::Markdown(format!("\"{}\"", s)); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::OpenParens, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::OpenParens, ..] => { + self.drain(1); Snippet::Markdown("(".into()) } - [(TokenKind::ClosedParens, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::ClosedParens, ..] => { + self.drain(1); Snippet::Markdown(")".into()) } - [(TokenKind::OpenBracket, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::OpenBracket, ..] => { + self.drain(1); Snippet::Markdown("[".into()) } - [(TokenKind::ClosedBracket, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::ClosedBracket, ..] => { + self.drain(1); Snippet::Markdown("]".into()) } - [(TokenKind::OpenBrackets, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::OpenBrackets, ..] => { + self.drain(1); Snippet::Markdown("[[".into()) } - [(TokenKind::CloseBrackets, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::CloseBrackets, ..] => { + self.drain(1); Snippet::Markdown("]]".into()) } - _ => panic!("d can't parse line {} column {}: {:?}", line, col, self.tokens), + _ => panic!( + "d can't parse line {} column {}: {:?}", + line, col, self.tokens + ), }; Ok(Some(snippet)) } diff --git a/src/token.rs b/src/token.rs index 7af018a..0f2daaa 100644 --- a/src/token.rs +++ b/src/token.rs @@ -153,7 +153,7 @@ impl<'a> TokenParser<'a> { #[cfg(test)] mod test { - use super::{Token, TokenKind, TokenParser, TokenPatterns}; + use super::{TokenKind, TokenParser, TokenPatterns}; fn parser<'a>(input: &'a str, patterns: &'a TokenPatterns) -> TokenParser<'a> { TokenParser::new(input, patterns) -- cgit v1.2.1