use crate::token::{TokenKind, TokenParser, TokenPatterns}; use crate::wikitext::{ParsedDirective, Snippet, WikiLink, WikitextError}; use line_col::LineColLookup; use log::trace; use std::collections::HashMap; #[derive(Debug, thiserror::Error)] pub enum ParserError { #[error("failed to parse wikitext, line {0}, column {1}: {2:?}")] WikitextSyntax(usize, usize, Vec), #[error(transparent)] Wikitext(#[from] WikitextError), } impl ParserError { pub fn wikitext_syntax(line: usize, col: usize, tokens: &[crate::token::TokenKind]) -> Self { let tokens = tokens.to_vec(); crate::parser::ParserError::WikitextSyntax(line, col, tokens) } } #[derive(Debug)] pub struct WikitextParser { tokens: Vec, positions: Vec<(usize, usize)>, } impl WikitextParser { pub fn new(input: &str, patterns: &TokenPatterns) -> Self { let linecol = LineColLookup::new(input); let mut p = TokenParser::new(input, patterns); let mut tokens = vec![]; let mut positions = vec![]; loop { let token = p.parse(); trace!("token {:?}", token); if token.token == TokenKind::End { break; } let (line, col) = linecol.get(token.pos); tokens.push(token.token); positions.push((line, col)); } Self { tokens, positions } } fn drain(&mut self, n: usize) { self.tokens.drain(..n); self.positions.drain(..n); } fn position(&self) -> (usize, usize) { self.positions[0] } fn is_empty(&self) -> bool { self.tokens.is_empty() } pub fn parse(&mut self) -> Result, ParserError> { if self.is_empty() { return Ok(None); } let (line, col) = self.position(); trace!("token at {}:{}", line, col); let snippet = match &self.tokens[..] { [TokenKind::OpenBrackets, TokenKind::Word(target), TokenKind::CloseBrackets, ..] => { let wikilink = WikiLink::new(target, target); let snippet = Snippet::WikiLink(wikilink); self.drain(3); snippet } [TokenKind::OpenBrackets, TokenKind::Word(word), ..] => { trace!("match [[{:?}", word); let mut link_text = word.to_string(); let mut target = None; self.drain(2); loop { let (line, col) = self.position(); match &self.tokens[..] { [TokenKind::Spaces(_), ..] => { trace!("match space"); self.drain(1); link_text.push(' '); } [TokenKind::Markdown(s), ..] => { trace!("match markdown {:?}", s); link_text.push_str(s); self.drain(1); } [TokenKind::OpenParens, TokenKind::Word(word), ..] => { trace!("match ({:?}", word); link_text.push('('); link_text.push_str(word); self.drain(2); } [TokenKind::Word(word), ..] => { trace!("match {:?}", word); link_text.push_str(word); self.drain(1); } [TokenKind::ClosedParens, ..] => { trace!("match )"); link_text.push(')'); self.drain(1); } [TokenKind::CloseBrackets, ..] => { trace!("match ]]"); self.drain(1); break; } [TokenKind::Pipe, TokenKind::Word(word), TokenKind::CloseBrackets, ..] => { trace!("match |{:?}]]", word); target = Some(word.to_string()); self.drain(3); break; } [TokenKind::Pipe, TokenKind::Spaces(_), TokenKind::Word(word), TokenKind::CloseBrackets, ..] => { trace!("match |{:?}]]", word); target = Some(word.to_string()); self.drain(3); break; } _ => { return Err(ParserError::wikitext_syntax(line, col, &self.tokens[..5])) } } } if target.is_none() { target = Some(link_text.clone()); } let wikilink = WikiLink::new(&link_text, &target.unwrap()); Snippet::WikiLink(wikilink) } [TokenKind::OpenBrackets, TokenKind::Bang, TokenKind::Word(name), ..] => { trace!("match [[!{:?}", name); let name = name.to_string(); let mut args = HashMap::new(); self.drain(3); loop { let (line, col) = self.position(); match &self.tokens[..] { [TokenKind::Spaces(_), ..] => { trace!("match spaces"); self.drain(1); } [TokenKind::CloseBrackets, ..] => { trace!("match ]]"); self.drain(1); break; } [TokenKind::Word(word), TokenKind::Spaces(_), ..] => { trace!("match {:?} spaces", word); args.insert(word.to_string(), "".to_string()); self.drain(2); } [TokenKind::Word(word), TokenKind::CloseBrackets, ..] => { trace!("match {:?}]]", word); args.insert(word.to_string(), "".to_string()); self.drain(2); break; } [TokenKind::Word(name), TokenKind::Equals, TokenKind::Word(value), ..] => { trace!("match {:?}={:?}", name, value); args.insert(name.to_string(), value.to_string()); self.drain(3); } [TokenKind::Word(name), TokenKind::Equals, TokenKind::QuotedValue(value), ..] => { trace!("match {:?}={:?}", name, value); args.insert(name.to_string(), value.to_string()); self.drain(3); } [TokenKind::QuotedValue(value), ..] => { trace!("match {:?}", value); args.insert(value.to_string(), "".to_string()); self.drain(1); } _ => { return Err(ParserError::wikitext_syntax(line, col, &self.tokens[..5])) } } } Snippet::Directive(ParsedDirective::new(&name, args)?) } [TokenKind::Bang, TokenKind::OpenBracket, ..] => { let mut link_text = String::new(); #[allow(unused_assignments)] let mut target = None; let mut title = None; self.drain(2); loop { let (line, col) = self.position(); match &self.tokens[..] { [TokenKind::Word(word), ..] => { link_text.push_str(word); self.drain(1); } [TokenKind::Spaces(_), ..] => { link_text.push(' '); self.drain(1); } [TokenKind::ClosedBracket, TokenKind::OpenParens, TokenKind::Word(word), TokenKind::ClosedParens, ..] => { target = Some(word.to_string()); self.drain(4); break; } [TokenKind::ClosedBracket, TokenKind::OpenParens, TokenKind::Word(word), TokenKind::Spaces(_), TokenKind::QuotedValue(t), TokenKind::ClosedParens, ..] => { target = Some(word.to_string()); title = Some(t.to_string()); self.drain(6); break; } _ => { return Err(ParserError::wikitext_syntax( line, col, &self.tokens[..std::cmp::min(5, self.tokens.len())], )) } } } if let Some(title) = title { Snippet::Markdown(format!( "![{}]({} \"{}\")", link_text, target.unwrap(), title )) } else { Snippet::Markdown(format!("![{}]({})", link_text, target.unwrap())) } } [TokenKind::Markdown(text), ..] => { let snippet = Snippet::Markdown(text.to_string()); self.drain(1); snippet } [TokenKind::Spaces(s), ..] => { let snippet = Snippet::Markdown(s.to_string()); self.drain(1); snippet } [TokenKind::Word(text), ..] => { let snippet = Snippet::Markdown(text.to_string()); self.drain(1); snippet } [TokenKind::Equals, ..] => { self.drain(1); Snippet::Markdown("=".into()) } [TokenKind::Bang, ..] => { self.drain(1); Snippet::Markdown("!".into()) } [TokenKind::Pipe, ..] => { self.drain(1); Snippet::Markdown("|".into()) } [TokenKind::PageName(s), ..] => { let snippet = Snippet::Markdown(s.to_string()); self.drain(1); snippet } [TokenKind::QuotedValue(s), ..] => { let snippet = Snippet::Markdown(format!("\"{}\"", s)); self.drain(1); snippet } [TokenKind::OpenParens, ..] => { self.drain(1); Snippet::Markdown("(".into()) } [TokenKind::ClosedParens, ..] => { self.drain(1); Snippet::Markdown(")".into()) } [TokenKind::OpenBracket, ..] => { self.drain(1); Snippet::Markdown("[".into()) } [TokenKind::ClosedBracket, ..] => { self.drain(1); Snippet::Markdown("]".into()) } [TokenKind::OpenBrackets, ..] => { self.drain(1); Snippet::Markdown("[[".into()) } [TokenKind::CloseBrackets, ..] => { self.drain(1); Snippet::Markdown("]]".into()) } _ => return Err(ParserError::wikitext_syntax(line, col, &self.tokens[..5])), }; Ok(Some(snippet)) } } #[cfg(test)] mod test { use super::{ParsedDirective, Snippet, TokenPatterns, WikiLink, WikitextParser}; use std::collections::HashMap; fn parsed_directive(name: &str, kv: &[(&str, &str)]) -> ParsedDirective { ParsedDirective::new( name, HashMap::from_iter(kv.iter().map(|(k, v)| (k.to_string(), v.to_string()))), ) .unwrap() } #[test] fn empty_input() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("", &patterns); assert_eq!(p.parse().unwrap(), None); } #[test] fn plain_markdown() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("hello, world", &patterns); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown("hello".into()))); assert_eq!( p.parse().unwrap(), Some(Snippet::Markdown(", world".into())) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn simple_wikilink() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("hello, [[planet-earth]]", &patterns); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown("hello".into()))); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown(", ".into()))); assert_eq!( p.parse().unwrap(), Some(Snippet::WikiLink(WikiLink::new( "planet-earth", "planet-earth" ))) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn simple_wikilink_to_subpage() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("hello, [[planets/earth]]", &patterns); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown("hello".into()))); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown(", ".into()))); assert_eq!( p.parse().unwrap(), Some(Snippet::WikiLink(WikiLink::new( "planets/earth", "planets/earth" ))) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn complex_wikilink() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("hello, [[whomever we greet|planets/earth]]", &patterns); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown("hello".into()))); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown(", ".into()))); assert_eq!( p.parse().unwrap(), Some(Snippet::WikiLink(WikiLink::new( "whomever we greet", "planets/earth" ))) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn bracket() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("[world", &patterns); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown("[".into()))); assert_eq!(p.parse().unwrap(), Some(Snippet::Markdown("world".into()))); assert_eq!(p.parse().unwrap(), None); } #[test] fn simple_directive() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("[[!simple]]", &patterns); assert_eq!( p.parse().unwrap(), Some(Snippet::Directive(parsed_directive("simple", &[]))) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn directive_unnamed_arg() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("[[!unnamedarg foo.jpg]]", &patterns); assert_eq!( p.parse().unwrap(), Some(Snippet::Directive(parsed_directive( "unnamedarg", &[("foo.jpg", "")] ))) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn directive_simple_arg() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new("[[!simplearg foo=bar]]", &patterns); assert_eq!( p.parse().unwrap(), Some(Snippet::Directive(parsed_directive( "simplearg", &[("foo", "bar")] ))) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn directive_quoted_arg() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new(r#"[[!quotedarg bar="foobar"]]"#, &patterns); assert_eq!( p.parse().unwrap(), Some(Snippet::Directive(parsed_directive( "quotedarg", &[("bar", "foobar")] ))) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn directive_multiline_arg() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new( r#"[[!multilinearg yo="""foo bar"""]]"#, &patterns, ); assert_eq!( p.parse().unwrap(), Some(Snippet::Directive(parsed_directive( "multilinearg", &[("yo", "foo\nbar")] ))) ); assert_eq!(p.parse().unwrap(), None); } #[test] fn directive_multiple_args() { let patterns = TokenPatterns::default(); let mut p = WikitextParser::new(r#"[[!img foo.jpg class=image]]"#, &patterns); assert_eq!( p.parse().unwrap(), Some(Snippet::Directive(parsed_directive( "img", &[("foo.jpg", ""), ("class", "image")] ))) ); assert_eq!(p.parse().unwrap(), None); } }