use log::trace; use regex::Regex; #[derive(Debug, Clone, Eq, PartialEq)] pub enum Token { End, Markdown(String), OpenParens, ClosedParens, OpenBracket, ClosedBracket, OpenBrackets, CloseBrackets, Bang, Pipe, Word(String), PageName(String), Spaces(String), Equals, QuotedValue(String), } #[derive(Debug, Clone)] pub struct TokenPatterns { plain: Regex, word: Regex, spaces: Regex, single_quoted: Regex, triple_quoted: Regex, } impl Default for TokenPatterns { fn default() -> Self { Self { plain: Regex::new(r#"(?P[^\[]+)"#).unwrap(), word: Regex::new(r#"[-._/[:alpha:][:digit:]]+"#).unwrap(), spaces: Regex::new(r#"([[:space:]]|\n)+"#).unwrap(), single_quoted: Regex::new(r#""(?P.*?)""#).unwrap(), triple_quoted: Regex::new(r#""""(?P(.|\n)*?)""""#).unwrap(), } } } #[derive(Debug, Clone)] pub struct TokenParser<'a> { input: &'a str, patterns: &'a TokenPatterns, } impl<'a> TokenParser<'a> { pub fn new(input: &'a str, patterns: &'a TokenPatterns) -> Self { Self { input, patterns } } pub fn parse(&mut self) -> Token { if self.input.is_empty() { Token::End } else if self.literal("(") { Token::OpenParens } else if self.literal(")") { Token::ClosedParens } else if self.literal("[[") { Token::OpenBrackets } else if self.literal("]]") { Token::CloseBrackets } else if self.literal("[") { Token::OpenBracket } else if self.literal("]") { Token::ClosedBracket } else if self.literal("!") { Token::Bang } else if self.literal("|") { Token::Pipe } else if self.literal("=") { Token::Equals } else if let Some(m) = self.regex(&self.patterns.spaces.clone()) { Token::Spaces(m.as_str().into()) } else if let Some(m) = self.regex(&self.patterns.triple_quoted.clone()) { Token::QuotedValue(m.as_str().into()) } else if let Some(m) = self.regex(&self.patterns.single_quoted.clone()) { Token::QuotedValue(m.as_str().into()) } else if let Some(m) = self.regex(&self.patterns.word.clone()) { Token::Word(m.as_str().into()) } else if let Some(m) = self.regex(&self.patterns.plain.clone()) { Token::Markdown(m.as_str().into()) } else { panic!("can't handle input: {:?}", self.input); } } fn literal(&mut self, pattern: &str) -> bool { if let Some(rest) = self.input.strip_prefix(pattern) { self.input = rest; true } else { false } } fn regex(&mut self, pattern: &Regex) -> Option { trace!("matching regex {}", pattern.as_str()); if let Some(m) = pattern.find(self.input) { if m.start() == 0 { trace!("match at beginning"); let captures = pattern.captures(self.input).unwrap(); let m = if let Some(value) = captures.name("value") { self.input = &self.input[m.end()..]; value } else { self.input = &self.input[m.end()..]; captures.get(0).unwrap() }; return Some(m.as_str().to_string()); } } trace!("no match at beginning"); None } } #[cfg(test)] mod test { use super::{Token, TokenParser, TokenPatterns}; fn parser<'a>(input: &'a str, patterns: &'a TokenPatterns) -> TokenParser<'a> { TokenParser::new(input, patterns) } #[test] fn empty_string() { let patterns = TokenPatterns::default(); let mut p = parser("", &patterns); assert_eq!(p.parse(), Token::End); } #[test] fn plain_markdown() { let patterns = TokenPatterns::default(); let mut p = parser("** hello, world", &patterns); assert_eq!(p.parse(), Token::Markdown("** hello, world".into())); assert_eq!(p.parse(), Token::End); } #[test] fn single_open_parens() { let patterns = TokenPatterns::default(); let mut p = parser("(", &patterns); assert_eq!(p.parse(), Token::OpenParens); assert_eq!(p.parse(), Token::End); } #[test] fn single_close_parens() { let patterns = TokenPatterns::default(); let mut p = parser(")", &patterns); assert_eq!(p.parse(), Token::ClosedParens); assert_eq!(p.parse(), Token::End); } #[test] fn single_open_bracket() { let patterns = TokenPatterns::default(); let mut p = parser("[", &patterns); assert_eq!(p.parse(), Token::OpenBracket); assert_eq!(p.parse(), Token::End); } #[test] fn single_close_bracket() { let patterns = TokenPatterns::default(); let mut p = parser("]", &patterns); assert_eq!(p.parse(), Token::ClosedBracket); assert_eq!(p.parse(), Token::End); } #[test] fn double_open_bracket() { let patterns = TokenPatterns::default(); let mut p = parser("[[", &patterns); assert_eq!(p.parse(), Token::OpenBrackets); assert_eq!(p.parse(), Token::End); } #[test] fn double_close_bracket() { let patterns = TokenPatterns::default(); let mut p = parser("]]", &patterns); assert_eq!(p.parse(), Token::CloseBrackets); assert_eq!(p.parse(), Token::End); } #[test] fn bang() { let patterns = TokenPatterns::default(); let mut p = parser("!", &patterns); assert_eq!(p.parse(), Token::Bang); assert_eq!(p.parse(), Token::End); } #[test] fn pipe() { let patterns = TokenPatterns::default(); let mut p = parser("|", &patterns); assert_eq!(p.parse(), Token::Pipe); assert_eq!(p.parse(), Token::End); } #[test] fn equals() { let patterns = TokenPatterns::default(); let mut p = parser("=", &patterns); assert_eq!(p.parse(), Token::Equals); assert_eq!(p.parse(), Token::End); } #[test] fn simple_word() { let patterns = TokenPatterns::default(); let mut p = parser("foo bar", &patterns); assert_eq!(p.parse(), Token::Word("foo".into())); assert_eq!(p.parse(), Token::Spaces(" ".into())); assert_eq!(p.parse(), Token::Word("bar".into())); assert_eq!(p.parse(), Token::End); } #[test] fn complex_word() { let patterns = TokenPatterns::default(); let mut p = parser("foo-1.2_3[[bar/subpage]]", &patterns); assert_eq!(p.parse(), Token::Word("foo-1.2_3".into())); assert_eq!(p.parse(), Token::OpenBrackets); assert_eq!(p.parse(), Token::Word("bar/subpage".into())); assert_eq!(p.parse(), Token::CloseBrackets); assert_eq!(p.parse(), Token::End); } #[test] fn spaces() { let patterns = TokenPatterns::default(); let mut p = parser("\n", &patterns); assert_eq!(p.parse(), Token::Spaces("\n".into())); assert_eq!(p.parse(), Token::End); } #[test] fn single_quoted() { let patterns = TokenPatterns::default(); let mut p = parser(r#""hello there""#, &patterns); assert_eq!(p.parse(), Token::QuotedValue(r#"hello there"#.into())); assert_eq!(p.parse(), Token::End); } #[test] fn triple_quoted() { let patterns = TokenPatterns::default(); let mut p = parser(r#""""hello\nthere""""#, &patterns); assert_eq!(p.parse(), Token::QuotedValue(r#"hello\nthere"#.into())); assert_eq!(p.parse(), Token::End); } #[test] fn simple_directive() { let patterns = TokenPatterns::default(); let mut p = parser(r#"[[!if test="enabled(sidebar)"]]"#, &patterns); assert_eq!(p.parse(), Token::OpenBrackets); assert_eq!(p.parse(), Token::Bang); assert_eq!(p.parse(), Token::Word("if".into())); assert_eq!(p.parse(), Token::Spaces(" ".into())); assert_eq!(p.parse(), Token::Word("test".into())); assert_eq!(p.parse(), Token::Equals); assert_eq!(p.parse(), Token::QuotedValue(r#"enabled(sidebar)"#.into())); assert_eq!(p.parse(), Token::CloseBrackets); assert_eq!(p.parse(), Token::End); } #[test] fn complex_directive() { let patterns = TokenPatterns::default(); let mut p = parser( r#"[[!if test="enabled(sidebar)" then=""" [[!sidebar]] """ else=""" [[!inline pages=sidebar raw=yes]] """]]"#, &patterns, ); assert_eq!(p.parse(), Token::OpenBrackets); assert_eq!(p.parse(), Token::Bang); assert_eq!(p.parse(), Token::Word("if".into())); assert_eq!(p.parse(), Token::Spaces(" ".into())); assert_eq!(p.parse(), Token::Word("test".into())); assert_eq!(p.parse(), Token::Equals); assert_eq!(p.parse(), Token::QuotedValue(r#"enabled(sidebar)"#.into())); assert_eq!(p.parse(), Token::Spaces(" ".into())); assert_eq!(p.parse(), Token::Word("then".into())); assert_eq!(p.parse(), Token::Equals); assert_eq!(p.parse(), Token::QuotedValue("\n[[!sidebar]]\n".into())); assert_eq!(p.parse(), Token::Spaces(" ".into())); assert_eq!(p.parse(), Token::Word("else".into())); assert_eq!(p.parse(), Token::Equals); assert_eq!( p.parse(), Token::QuotedValue("\n[[!inline pages=sidebar raw=yes]]\n".into()) ); assert_eq!(p.parse(), Token::CloseBrackets); assert_eq!(p.parse(), Token::End); } #[test] fn complex_directive_with_lookahead() { let patterns = TokenPatterns::default(); let orig = parser( r#"[[!if test="enabled(sidebar)" then=""" [[!sidebar]] """ else=""" [[!inline pages=sidebar raw=yes]] """]]"#, &patterns, ); let mut p = orig.clone(); assert_eq!(p.parse(), Token::OpenBrackets); assert_eq!(p.parse(), Token::Bang); assert_eq!(p.parse(), Token::Word("if".into())); assert_eq!(p.parse(), Token::Spaces(" ".into())); assert_eq!(p.parse(), Token::Word("test".into())); assert_eq!(p.parse(), Token::Equals); assert_eq!(p.parse(), Token::QuotedValue(r#"enabled(sidebar)"#.into())); assert_eq!(p.parse(), Token::Spaces(" ".into())); assert_eq!(p.parse(), Token::Word("then".into())); assert_eq!(p.parse(), Token::Equals); assert_eq!(p.parse(), Token::QuotedValue("\n[[!sidebar]]\n".into())); assert_eq!(p.parse(), Token::Spaces(" ".into())); assert_eq!(p.parse(), Token::Word("else".into())); assert_eq!(p.parse(), Token::Equals); assert_eq!( p.parse(), Token::QuotedValue("\n[[!inline pages=sidebar raw=yes]]\n".into()) ); assert_eq!(p.parse(), Token::CloseBrackets); assert_eq!(p.parse(), Token::End); } }