From a820d18e740cbb1df5bb97ac089fe6398a62bf85 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 23 Oct 2022 11:14:49 +0300 Subject: refactor: simplify parsing of token stream Sponsored-by: author --- src/parser.rs | 208 ++++++++++++++++++++++++++++++++++------------------------ src/token.rs | 2 +- 2 files changed, 125 insertions(+), 85 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 973d0b2..5378f36 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,7 +7,8 @@ use std::collections::HashMap; #[derive(Debug)] pub struct WikitextParser { - tokens: Vec<(TokenKind, usize, usize)>, + tokens: Vec, + positions: Vec<(usize, usize)>, } impl WikitextParser { @@ -15,6 +16,7 @@ impl WikitextParser { let linecol = LineColLookup::new(input); let mut p = TokenParser::new(input, patterns); let mut tokens = vec![]; + let mut positions = vec![]; loop { let token = p.parse(); debug!("token {:?}", token); @@ -22,78 +24,100 @@ impl WikitextParser { break; } let (line, col) = linecol.get(token.pos); - tokens.push((token.token, line, col)); + tokens.push(token.token); + positions.push((line, col)); } - Self { tokens } + Self { tokens, positions } + } + + fn drain(&mut self, n: usize) { + self.tokens.drain(..n); + self.positions.drain(..n); + } + + fn position(&self) -> (usize, usize) { + self.positions[0] + } + + fn is_empty(&self) -> bool { + self.tokens.is_empty() } pub fn parse(&mut self) -> Result, SiteError> { - if self.tokens.is_empty() { + if self.is_empty() { return Ok(None); } - let (_, line, col) = self.tokens[0]; + let (line, col) = self.position(); debug!("token at {}:{}", line, col); let snippet = match &self.tokens[..] { - [(TokenKind::OpenBrackets, _, _), (TokenKind::Word(target), _, _), (TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::OpenBrackets, TokenKind::Word(target), TokenKind::CloseBrackets, ..] => + { let wikilink = WikiLink::new(target, target); let snippet = Snippet::WikiLink(wikilink); - self.tokens.drain(..3); + self.drain(3); snippet } - [(TokenKind::OpenBrackets, _, _), (TokenKind::Word(word), _, _), ..] => { + [TokenKind::OpenBrackets, TokenKind::Word(word), ..] => { trace!("match [[{:?}", word); let mut link_text = word.to_string(); let mut target = None; - self.tokens.drain(..2); + self.drain(2); loop { - let (_, line, col) = self.tokens[0]; + let (line, col) = self.position(); match &self.tokens[..] { - [(TokenKind::Spaces(_), _, _), ..] => { + [TokenKind::Spaces(_), ..] => { trace!("match space"); - self.tokens.drain(..1); + self.drain(1); link_text.push(' '); } - [(TokenKind::Markdown(s), _, _), ..] => { + [TokenKind::Markdown(s), ..] => { trace!("match markdown {:?}", s); link_text.push_str(s); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::OpenParens, _, _), (TokenKind::Word(word), _, _), ..] => { + [TokenKind::OpenParens, TokenKind::Word(word), ..] => { trace!("match ({:?}", word); link_text.push('('); link_text.push_str(word); - self.tokens.drain(..2); + self.drain(2); } - [(TokenKind::Word(word), _, _), ..] => { + [TokenKind::Word(word), ..] => { trace!("match {:?}", word); link_text.push_str(word); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::ClosedParens, _, _), ..] => { + [TokenKind::ClosedParens, ..] => { trace!("match )"); link_text.push(')'); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::CloseBrackets, ..] => { trace!("match ]]"); - self.tokens.drain(..1); + self.drain(1); break; } - [(TokenKind::Pipe, _, _), (TokenKind::Word(word), _, _), (TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::Pipe, TokenKind::Word(word), TokenKind::CloseBrackets, ..] => + { trace!("match |{:?}]]", word); target = Some(word.to_string()); - self.tokens.drain(..3); + self.drain(3); break; } - [(TokenKind::Pipe, _, _), (TokenKind::Spaces(_), _, _), (TokenKind::Word(word), _, _), (TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::Pipe, TokenKind::Spaces(_), TokenKind::Word(word), TokenKind::CloseBrackets, ..] => + { trace!("match |{:?}]]", word); target = Some(word.to_string()); - self.tokens.drain(..3); + self.drain(3); break; } - _ => panic!("a can't parse line {} column {}: {:?}", line, col, &self.tokens[..5]), + _ => panic!( + "a can't parse line {} column {}: {:?}", + line, + col, + &self.tokens[..5] + ), } } if target.is_none() { @@ -102,143 +126,159 @@ impl WikitextParser { let wikilink = WikiLink::new(&link_text, &target.unwrap()); Snippet::WikiLink(wikilink) } - [(TokenKind::OpenBrackets, _, _), (TokenKind::Bang, _, _), (TokenKind::Word(name), _, _), ..] => { + [TokenKind::OpenBrackets, TokenKind::Bang, TokenKind::Word(name), ..] => + { trace!("match [[!{:?}", name); let name = name.to_string(); let mut args = HashMap::new(); - self.tokens.drain(..3); + self.drain(3); loop { - let (_, line, col) = self.tokens[0]; + let (line, col) = self.position(); match &self.tokens[..] { - [(TokenKind::Spaces(_), _, _), ..] => { + [TokenKind::Spaces(_), ..] => { trace!("match spaces"); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::CloseBrackets, ..] => { trace!("match ]]"); - self.tokens.drain(..1); + self.drain(1); break; } - [(TokenKind::Word(word), _, _), (TokenKind::Spaces(_), _, _), ..] => { + [TokenKind::Word(word), TokenKind::Spaces(_), ..] => { trace!("match {:?} spaces", word); args.insert(word.to_string(), "".to_string()); - self.tokens.drain(..2); + self.drain(2); } - [(TokenKind::Word(word), _, _), (TokenKind::CloseBrackets, _, _), ..] => { + [TokenKind::Word(word), TokenKind::CloseBrackets, ..] => { trace!("match {:?}]]", word); args.insert(word.to_string(), "".to_string()); - self.tokens.drain(..2); + self.drain(2); break; } - [(TokenKind::Word(name), _, _), (TokenKind::Equals, _, _), (TokenKind::Word(value), _, _), ..] => { + [TokenKind::Word(name), TokenKind::Equals, TokenKind::Word(value), ..] => + { trace!("match {:?}={:?}", name, value); args.insert(name.to_string(), value.to_string()); - self.tokens.drain(..3); + self.drain(3); } - [(TokenKind::Word(name), _, _), (TokenKind::Equals, _, _), (TokenKind::QuotedValue(value), _, _), ..] => { + [TokenKind::Word(name), TokenKind::Equals, TokenKind::QuotedValue(value), ..] => + { trace!("match {:?}={:?}", name, value); args.insert(name.to_string(), value.to_string()); - self.tokens.drain(..3); + self.drain(3); } - [(TokenKind::QuotedValue(value), _, _), ..] => { + [TokenKind::QuotedValue(value), ..] => { trace!("match {:?}", value); args.insert(value.to_string(), "".to_string()); - self.tokens.drain(..1); + self.drain(1); } - _ => panic!("b can't parse line {} column {}: {:?}", line, col, &self.tokens[..5]), + _ => panic!( + "b can't parse line {} column {}: {:?}", + line, + col, + &self.tokens[..5] + ), } } Snippet::Directive(ParsedDirective::new(&name, args)?) } - [(TokenKind::Bang, _, _), (TokenKind::OpenBracket, _, _), ..] => { + [TokenKind::Bang, TokenKind::OpenBracket, ..] => { let mut link_text = String::new(); #[allow(unused_assignments)] let mut target = None; - self.tokens.drain(..2); + self.drain(2); loop { - let (_, line, col) = self.tokens[0]; + let (line, col) = self.position(); match &self.tokens[..] { - [(TokenKind::Word(word), _, _), ..] => { + [TokenKind::Word(word), ..] => { link_text.push_str(word); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::Spaces(_), _, _), ..] => { + [TokenKind::Spaces(_), ..] => { link_text.push(' '); - self.tokens.drain(..1); + self.drain(1); } - [(TokenKind::ClosedBracket, _, _), (TokenKind::OpenParens, _, _), (TokenKind::Word(word), _, _), (TokenKind::ClosedParens, _, _), ..] => + [TokenKind::ClosedBracket, TokenKind::OpenParens, TokenKind::Word(word), TokenKind::ClosedParens, ..] => { target = Some(word.to_string()); - self.tokens.drain(..4); + self.drain(4); break; } - _ => panic!("c can't parse line {} column {}: {:?}", line, col, &self.tokens[..5]), + _ => panic!( + "c can't parse line {} column {}: {:?}", + line, + col, + &self.tokens[..5] + ), } } Snippet::Markdown(format!("![{}]({})", link_text, target.unwrap())) } - [(TokenKind::Markdown(text), _, _), ..] => { + [TokenKind::Markdown(text), ..] => { let snippet = Snippet::Markdown(text.to_string()); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::Spaces(s), _, _), ..] => { + [TokenKind::Spaces(s), ..] => { let snippet = Snippet::Markdown(s.to_string()); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::Word(text), _, _), ..] => { + [TokenKind::Word(text), ..] => { let snippet = Snippet::Markdown(text.to_string()); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::Equals, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::Equals, ..] => { + self.drain(1); Snippet::Markdown("=".into()) } - [(TokenKind::Bang, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::Bang, ..] => { + self.drain(1); Snippet::Markdown("!".into()) } - [(TokenKind::Pipe, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::Pipe, ..] => { + self.drain(1); Snippet::Markdown("|".into()) } - [(TokenKind::PageName(s), _, _), ..] => { + [TokenKind::PageName(s), ..] => { let snippet = Snippet::Markdown(s.to_string()); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::QuotedValue(s), _, _), ..] => { + [TokenKind::QuotedValue(s), ..] => { let snippet = Snippet::Markdown(format!("\"{}\"", s)); - self.tokens.drain(..1); + self.drain(1); snippet } - [(TokenKind::OpenParens, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::OpenParens, ..] => { + self.drain(1); Snippet::Markdown("(".into()) } - [(TokenKind::ClosedParens, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::ClosedParens, ..] => { + self.drain(1); Snippet::Markdown(")".into()) } - [(TokenKind::OpenBracket, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::OpenBracket, ..] => { + self.drain(1); Snippet::Markdown("[".into()) } - [(TokenKind::ClosedBracket, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::ClosedBracket, ..] => { + self.drain(1); Snippet::Markdown("]".into()) } - [(TokenKind::OpenBrackets, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::OpenBrackets, ..] => { + self.drain(1); Snippet::Markdown("[[".into()) } - [(TokenKind::CloseBrackets, _, _), ..] => { - self.tokens.drain(..1); + [TokenKind::CloseBrackets, ..] => { + self.drain(1); Snippet::Markdown("]]".into()) } - _ => panic!("d can't parse line {} column {}: {:?}", line, col, self.tokens), + _ => panic!( + "d can't parse line {} column {}: {:?}", + line, col, self.tokens + ), }; Ok(Some(snippet)) } diff --git a/src/token.rs b/src/token.rs index 7af018a..0f2daaa 100644 --- a/src/token.rs +++ b/src/token.rs @@ -153,7 +153,7 @@ impl<'a> TokenParser<'a> { #[cfg(test)] mod test { - use super::{Token, TokenKind, TokenParser, TokenPatterns}; + use super::{TokenKind, TokenParser, TokenPatterns}; fn parser<'a>(input: &'a str, patterns: &'a TokenPatterns) -> TokenParser<'a> { TokenParser::new(input, patterns) -- cgit v1.2.1 From 9440a3d5f464696615419d9f15459c928538b869 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 23 Oct 2022 11:16:53 +0300 Subject: chore: simplify code based on clippy suggestions Sponsored-by: author --- src/pagespec.rs | 12 ++---------- src/site.rs | 6 +----- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/pagespec.rs b/src/pagespec.rs index 1ebb1b1..55c83fb 100644 --- a/src/pagespec.rs +++ b/src/pagespec.rs @@ -93,11 +93,7 @@ pub enum OpCode { fn glob_matches(glob: &str, path: &str) -> bool { let glob: Vec = glob.chars().collect(); let path: Vec = path.chars().collect(); - if glob_matches_helper(&glob, &path) { - true - } else { - false - } + glob_matches_helper(&glob, &path) } fn glob_matches_helper(mut glob: &[char], mut path: &[char]) -> bool { @@ -141,11 +137,7 @@ fn glob_matches_helper(mut glob: &[char], mut path: &[char]) -> bool { fn page_matches(site: &Site, container: &Path, glob: &str, path: &str) -> bool { if glob_matches(glob, path) { let full_path = container.join(path); - if site.is_page(&full_path) { - true - } else { - false - } + site.is_page(&full_path) } else { false } diff --git a/src/site.rs b/src/site.rs index 10fa923..6fa4625 100644 --- a/src/site.rs +++ b/src/site.rs @@ -118,11 +118,7 @@ impl Site { } pub fn is_page(&self, path: &Path) -> bool { - if self.pages_that_will_exist.get(path).is_some() { - true - } else { - false - } + self.pages_that_will_exist.get(path).is_some() } fn all_files(&self) -> Result, SiteError> { -- cgit v1.2.1