From 0f421241efad0e8163b90acbd85a7324003da248 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 30 Jul 2022 10:07:45 +0300 Subject: chore: drop unused old WikitextParser Sponsored-by: author --- src/wikitext.rs | 288 -------------------------------------------------------- 1 file changed, 288 deletions(-) diff --git a/src/wikitext.rs b/src/wikitext.rs index 39fbd32..54570fa 100644 --- a/src/wikitext.rs +++ b/src/wikitext.rs @@ -3,154 +3,8 @@ use crate::error::SiteError; use crate::page::PageMeta; use crate::site::Site; use log::trace; -use regex::Regex; use std::collections::HashMap; -use std::convert::TryFrom; use std::path::Path; - -#[derive(Debug)] -pub struct WikitextParser { - // Text without open bracket: can't be a link or a directive. - no_bracket: Regex, - - // An open bracket. - bracket: Regex, - - // A wikilink that is just a bare page name. - wikilink_bare: Regex, - - // A wikilink that has link text separately from target. - wikilink_complex: Regex, - - // A directive without arguments. - directive_no_args: Regex, - - // A directive with arguments. - directive_args: Regex, - - // A plain argument without a value. - plain: Regex, - // An argument with an unquoted value. - simple: Regex, - // An argument with a quoted value. - quoted: Regex, - // An argument with a triple-quoted value. - multiline: Regex, -} - -impl WikitextParser { - pub fn parse<'a>(&self, text: &'a str) -> Result<(Snippet, &'a str), SiteError> { - let patterns = vec![ - &self.directive_args, - &self.directive_no_args, - &self.wikilink_bare, - &self.wikilink_complex, - &self.no_bracket, - &self.bracket, - ]; - for pat in patterns { - if let Some(m) = pat.find(text) { - if m.start() > 0 { - continue; - } - trace!("WikitextParser: m={:?}", m); - let t = m.as_str(); - let c = pat.captures(t).unwrap(); - let token = if pat.as_str() == self.directive_args.as_str() { - let name = c.name("name").unwrap().as_str(); - let args = c.name("args").unwrap().as_str(); - let args = self.parse_args(args); - let d = ParsedDirective::new(name, args)?; - trace!("WikitextParser: directive={:?}", d); - let d = Directive::try_from(d)?; - Snippet::Directive(d) - } else if pat.as_str() == self.directive_no_args.as_str() { - let name = c.name("name").unwrap().as_str(); - let args = self.parse_args(""); - let d = ParsedDirective::new(name, args)?; - trace!("WikitextParser: directive={:?}", d); - let d = Directive::try_from(d)?; - Snippet::Directive(d) - } else if pat.as_str() == self.wikilink_bare.as_str() { - let s = c.name("linktext").unwrap().as_str(); - let link = WikiLink::new(s, s); - Snippet::WikiLink(link) - } else if pat.as_str() == self.wikilink_complex.as_str() { - let link_text = c.name("linktext").unwrap().as_str(); - let target = c.name("target").unwrap().as_str(); - let link = WikiLink::new(link_text, target); - Snippet::WikiLink(link) - } else if pat.as_str() == self.no_bracket.as_str() - || pat.as_str() == self.bracket.as_str() - { - Snippet::Markdown(m.as_str().into()) - } else { - unreachable!("need to handle pattern: {}", pat.as_str()); - }; - let rest = text.get(m.end()..).unwrap(); - trace!("WikitextParser: token={:?}", token); - return Ok((token, rest)); - } - } - Ok((Snippet::Markdown(text.into()), "")) - } - - fn parse_args(&self, mut args: &str) -> HashMap { - let mut map = HashMap::new(); - let patterns = vec![&self.multiline, &self.quoted, &self.simple, &self.plain]; - loop { - args = args.trim_start(); - if args.is_empty() { - break; - } - let mut matched = false; - for pat in patterns.iter() { - if let Some(m) = pat.find(args) { - if m.start() > 0 { - continue; - } - matched = true; - let t = m.as_str(); - let c = pat.captures(t).unwrap(); - if pat.as_str() == self.plain.as_str() { - let key = c.name("key").unwrap().as_str(); - map.insert(key.into(), "".into()); - } else { - let key = c.name("key").unwrap().as_str(); - let value = c.name("value").unwrap().as_str(); - map.insert(key.into(), value.into()); - } - args = args.get(m.end()..).unwrap(); - } - } - if !matched { - break; - } - } - map - } -} - -impl Default for WikitextParser { - fn default() -> Self { - Self { - no_bracket: Regex::new(r"^[^\[]+").unwrap(), - bracket: Regex::new(r"^\[").unwrap(), - wikilink_bare: Regex::new(r"^\[\[\s*(?P(\w|[-/_])+)\s*\]\]").unwrap(), - wikilink_complex: Regex::new( - r"\[\[\s*(?P.*)\|(?P(\w|[-/_])+)\s*\]\]", - ) - .unwrap(), - directive_no_args: Regex::new(r"^\[\[!(?P\w+)\s*\]\]").unwrap(), - directive_args: Regex::new(r#"\[\[!(?P\w+)\s+(?P[^]]*?)\s*\]\]"#).unwrap(), - plain: Regex::new(r"(?P[a-z0-9._-]+)").unwrap(), - simple: Regex::new(r"(?P\w+)=(?P\S+)").unwrap(), - quoted: Regex::new(r#"(?P\w+)="(?P[^"]*)""#).unwrap(), - multiline: Regex::new(r#"(?P\w+)=("""(?P(.|\n)*?)""")"#).unwrap(), - } - } -} - #[derive(Debug, Eq, PartialEq)] pub enum Snippet { Markdown(String), @@ -236,145 +90,3 @@ impl ParsedDirective { .collect() } } - -#[derive(Debug, thiserror::Error)] -pub enum WikiError {} - -#[cfg(test)] -mod test { - use super::{Directive, Snippet, WikiLink, WikitextParser}; - - #[test] - fn plain_markdown() { - let p = WikitextParser::default(); - let (snippet, rest) = p.parse("hello, world").unwrap(); - assert_eq!(snippet, Snippet::Markdown("hello, world".into())); - assert_eq!(rest, ""); - } - - #[test] - fn simple_wikilink() { - let p = WikitextParser::default(); - - let (snippet, rest) = p.parse("hello, [[planet-earth]]").unwrap(); - assert_eq!(snippet, Snippet::Markdown("hello, ".into())); - assert_eq!(rest, "[[planet-earth]]"); - - let (snippet, rest) = p.parse(rest).unwrap(); - assert_eq!( - snippet, - Snippet::WikiLink(WikiLink::new("planet-earth", "planet-earth")) - ); - assert_eq!(rest, ""); - } - - #[test] - fn simple_wikilink_to_subpage() { - let p = WikitextParser::default(); - - let (snippet, rest) = p.parse("hello, [[planets/earth]]").unwrap(); - assert_eq!(snippet, Snippet::Markdown("hello, ".into())); - assert_eq!(rest, "[[planets/earth]]"); - - let (snippet, rest) = p.parse(rest).unwrap(); - assert_eq!( - snippet, - Snippet::WikiLink(WikiLink::new("planets/earth", "planets/earth")) - ); - assert_eq!(rest, ""); - } - - #[test] - fn complex_wikilink() { - let p = WikitextParser::default(); - - let (snippet, rest) = p - .parse("hello, [[whomever we greet|planet-earth]]") - .unwrap(); - assert_eq!(snippet, Snippet::Markdown("hello, ".into())); - assert_eq!(rest, "[[whomever we greet|planet-earth]]"); - - let (snippet, rest) = p.parse(rest).unwrap(); - assert_eq!( - snippet, - Snippet::WikiLink(WikiLink::new("whomever we greet", "planet-earth")) - ); - assert_eq!(rest, ""); - } - - #[test] - fn complex_wikilink_to_subpage() { - let p = WikitextParser::default(); - - let (snippet, rest) = p - .parse("hello, [[whomever we greet|planets/earth]]") - .unwrap(); - assert_eq!(snippet, Snippet::Markdown("hello, ".into())); - assert_eq!(rest, "[[whomever we greet|planets/earth]]"); - - let (snippet, rest) = p.parse(rest).unwrap(); - assert_eq!( - snippet, - Snippet::WikiLink(WikiLink::new("whomever we greet", "planets/earth")) - ); - assert_eq!(rest, ""); - } - - #[test] - fn bracket() { - let p = WikitextParser::default(); - - let (snippet, rest) = p.parse("[world").unwrap(); - assert_eq!(snippet, Snippet::Markdown("[".into())); - assert_eq!(rest, "world"); - - let (snippet, rest) = p.parse(rest).unwrap(); - assert_eq!(snippet, Snippet::Markdown("world".into())); - assert_eq!(rest, ""); - } - - #[test] - fn simple_directive() { - let p = WikitextParser::default(); - let (snippet, rest) = p.parse("[[!simple]]").unwrap(); - assert_eq!(snippet, Snippet::Directive(Directive::Simple)); - assert_eq!(rest, ""); - } - - #[test] - fn directive_unnamed_arg() { - let p = WikitextParser::default(); - let (snippet, rest) = p.parse("[[!unnamedarg foo.jpg]]").unwrap(); - assert_eq!(snippet, Snippet::Directive(Directive::UnnamedArg)); - assert_eq!(rest, ""); - } - - #[test] - fn directive_simple_arg() { - let p = WikitextParser::default(); - let (snippet, rest) = p.parse("[[!simplearg foo=bar]]").unwrap(); - assert_eq!(snippet, Snippet::Directive(Directive::SimpleArg)); - assert_eq!(rest, ""); - } - - #[test] - fn directive_quoted_arg() { - let p = WikitextParser::default(); - let (snippet, rest) = p.parse(r#"[[!quotedarg bar="foobar"]]"#).unwrap(); - assert_eq!(snippet, Snippet::Directive(Directive::QuotedArg)); - assert_eq!(rest, ""); - } - - #[test] - fn directive_multiline_arg() { - let p = WikitextParser::default(); - let (snippet, rest) = p - .parse( - r#"[[!multilinearg yo="""foo -bar"""]]"#, - ) - .unwrap(); - assert_eq!(snippet, Snippet::Directive(Directive::MultilineArg)); - assert_eq!(rest, ""); - } -} -- cgit v1.2.1