summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2022-07-30 07:08:18 +0000
committerLars Wirzenius <liw@liw.fi>2022-07-30 07:08:18 +0000
commitbc9493725346d909c305fcaacd06c448e303cd05 (patch)
tree40d8a37976a39b961588abd0ae8640be9f696cbd
parentb4fedfa3c05aebb10e183a744f9203583982db2c (diff)
parent0f421241efad0e8163b90acbd85a7324003da248 (diff)
downloadriki-bc9493725346d909c305fcaacd06c448e303cd05.tar.gz
Merge branch 'liw/drop-old-parser' into 'main'
chore: drop unused old WikitextParser See merge request larswirzenius/riki!28
-rw-r--r--src/wikitext.rs288
1 files changed, 0 insertions, 288 deletions
diff --git a/src/wikitext.rs b/src/wikitext.rs
index 39fbd32..54570fa 100644
--- a/src/wikitext.rs
+++ b/src/wikitext.rs
@@ -3,154 +3,8 @@ use crate::error::SiteError;
use crate::page::PageMeta;
use crate::site::Site;
use log::trace;
-use regex::Regex;
use std::collections::HashMap;
-use std::convert::TryFrom;
use std::path::Path;
-
-#[derive(Debug)]
-pub struct WikitextParser {
- // Text without open bracket: can't be a link or a directive.
- no_bracket: Regex,
-
- // An open bracket.
- bracket: Regex,
-
- // A wikilink that is just a bare page name.
- wikilink_bare: Regex,
-
- // A wikilink that has link text separately from target.
- wikilink_complex: Regex,
-
- // A directive without arguments.
- directive_no_args: Regex,
-
- // A directive with arguments.
- directive_args: Regex,
-
- // A plain argument without a value.
- plain: Regex,
- // An argument with an unquoted value.
- simple: Regex,
- // An argument with a quoted value.
- quoted: Regex,
- // An argument with a triple-quoted value.
- multiline: Regex,
-}
-
-impl WikitextParser {
- pub fn parse<'a>(&self, text: &'a str) -> Result<(Snippet, &'a str), SiteError> {
- let patterns = vec![
- &self.directive_args,
- &self.directive_no_args,
- &self.wikilink_bare,
- &self.wikilink_complex,
- &self.no_bracket,
- &self.bracket,
- ];
- for pat in patterns {
- if let Some(m) = pat.find(text) {
- if m.start() > 0 {
- continue;
- }
- trace!("WikitextParser: m={:?}", m);
- let t = m.as_str();
- let c = pat.captures(t).unwrap();
- let token = if pat.as_str() == self.directive_args.as_str() {
- let name = c.name("name").unwrap().as_str();
- let args = c.name("args").unwrap().as_str();
- let args = self.parse_args(args);
- let d = ParsedDirective::new(name, args)?;
- trace!("WikitextParser: directive={:?}", d);
- let d = Directive::try_from(d)?;
- Snippet::Directive(d)
- } else if pat.as_str() == self.directive_no_args.as_str() {
- let name = c.name("name").unwrap().as_str();
- let args = self.parse_args("");
- let d = ParsedDirective::new(name, args)?;
- trace!("WikitextParser: directive={:?}", d);
- let d = Directive::try_from(d)?;
- Snippet::Directive(d)
- } else if pat.as_str() == self.wikilink_bare.as_str() {
- let s = c.name("linktext").unwrap().as_str();
- let link = WikiLink::new(s, s);
- Snippet::WikiLink(link)
- } else if pat.as_str() == self.wikilink_complex.as_str() {
- let link_text = c.name("linktext").unwrap().as_str();
- let target = c.name("target").unwrap().as_str();
- let link = WikiLink::new(link_text, target);
- Snippet::WikiLink(link)
- } else if pat.as_str() == self.no_bracket.as_str()
- || pat.as_str() == self.bracket.as_str()
- {
- Snippet::Markdown(m.as_str().into())
- } else {
- unreachable!("need to handle pattern: {}", pat.as_str());
- };
- let rest = text.get(m.end()..).unwrap();
- trace!("WikitextParser: token={:?}", token);
- return Ok((token, rest));
- }
- }
- Ok((Snippet::Markdown(text.into()), ""))
- }
-
- fn parse_args(&self, mut args: &str) -> HashMap<String, String> {
- let mut map = HashMap::new();
- let patterns = vec![&self.multiline, &self.quoted, &self.simple, &self.plain];
- loop {
- args = args.trim_start();
- if args.is_empty() {
- break;
- }
- let mut matched = false;
- for pat in patterns.iter() {
- if let Some(m) = pat.find(args) {
- if m.start() > 0 {
- continue;
- }
- matched = true;
- let t = m.as_str();
- let c = pat.captures(t).unwrap();
- if pat.as_str() == self.plain.as_str() {
- let key = c.name("key").unwrap().as_str();
- map.insert(key.into(), "".into());
- } else {
- let key = c.name("key").unwrap().as_str();
- let value = c.name("value").unwrap().as_str();
- map.insert(key.into(), value.into());
- }
- args = args.get(m.end()..).unwrap();
- }
- }
- if !matched {
- break;
- }
- }
- map
- }
-}
-
-impl Default for WikitextParser {
- fn default() -> Self {
- Self {
- no_bracket: Regex::new(r"^[^\[]+").unwrap(),
- bracket: Regex::new(r"^\[").unwrap(),
- wikilink_bare: Regex::new(r"^\[\[\s*(?P<linktext>(\w|[-/_])+)\s*\]\]").unwrap(),
- wikilink_complex: Regex::new(
- r"\[\[\s*(?P<linktext>.*)\|(?P<target>(\w|[-/_])+)\s*\]\]",
- )
- .unwrap(),
- directive_no_args: Regex::new(r"^\[\[!(?P<name>\w+)\s*\]\]").unwrap(),
- directive_args: Regex::new(r#"\[\[!(?P<name>\w+)\s+(?P<args>[^]]*?)\s*\]\]"#).unwrap(),
- plain: Regex::new(r"(?P<key>[a-z0-9._-]+)").unwrap(),
- simple: Regex::new(r"(?P<key>\w+)=(?P<value>\S+)").unwrap(),
- quoted: Regex::new(r#"(?P<key>\w+)="(?P<value>[^"]*)""#).unwrap(),
- multiline: Regex::new(r#"(?P<key>\w+)=("""(?P<value>(.|\n)*?)""")"#).unwrap(),
- }
- }
-}
-
#[derive(Debug, Eq, PartialEq)]
pub enum Snippet {
Markdown(String),
@@ -236,145 +90,3 @@ impl ParsedDirective {
.collect()
}
}
-
-#[derive(Debug, thiserror::Error)]
-pub enum WikiError {}
-
-#[cfg(test)]
-mod test {
- use super::{Directive, Snippet, WikiLink, WikitextParser};
-
- #[test]
- fn plain_markdown() {
- let p = WikitextParser::default();
- let (snippet, rest) = p.parse("hello, world").unwrap();
- assert_eq!(snippet, Snippet::Markdown("hello, world".into()));
- assert_eq!(rest, "");
- }
-
- #[test]
- fn simple_wikilink() {
- let p = WikitextParser::default();
-
- let (snippet, rest) = p.parse("hello, [[planet-earth]]").unwrap();
- assert_eq!(snippet, Snippet::Markdown("hello, ".into()));
- assert_eq!(rest, "[[planet-earth]]");
-
- let (snippet, rest) = p.parse(rest).unwrap();
- assert_eq!(
- snippet,
- Snippet::WikiLink(WikiLink::new("planet-earth", "planet-earth"))
- );
- assert_eq!(rest, "");
- }
-
- #[test]
- fn simple_wikilink_to_subpage() {
- let p = WikitextParser::default();
-
- let (snippet, rest) = p.parse("hello, [[planets/earth]]").unwrap();
- assert_eq!(snippet, Snippet::Markdown("hello, ".into()));
- assert_eq!(rest, "[[planets/earth]]");
-
- let (snippet, rest) = p.parse(rest).unwrap();
- assert_eq!(
- snippet,
- Snippet::WikiLink(WikiLink::new("planets/earth", "planets/earth"))
- );
- assert_eq!(rest, "");
- }
-
- #[test]
- fn complex_wikilink() {
- let p = WikitextParser::default();
-
- let (snippet, rest) = p
- .parse("hello, [[whomever we greet|planet-earth]]")
- .unwrap();
- assert_eq!(snippet, Snippet::Markdown("hello, ".into()));
- assert_eq!(rest, "[[whomever we greet|planet-earth]]");
-
- let (snippet, rest) = p.parse(rest).unwrap();
- assert_eq!(
- snippet,
- Snippet::WikiLink(WikiLink::new("whomever we greet", "planet-earth"))
- );
- assert_eq!(rest, "");
- }
-
- #[test]
- fn complex_wikilink_to_subpage() {
- let p = WikitextParser::default();
-
- let (snippet, rest) = p
- .parse("hello, [[whomever we greet|planets/earth]]")
- .unwrap();
- assert_eq!(snippet, Snippet::Markdown("hello, ".into()));
- assert_eq!(rest, "[[whomever we greet|planets/earth]]");
-
- let (snippet, rest) = p.parse(rest).unwrap();
- assert_eq!(
- snippet,
- Snippet::WikiLink(WikiLink::new("whomever we greet", "planets/earth"))
- );
- assert_eq!(rest, "");
- }
-
- #[test]
- fn bracket() {
- let p = WikitextParser::default();
-
- let (snippet, rest) = p.parse("[world").unwrap();
- assert_eq!(snippet, Snippet::Markdown("[".into()));
- assert_eq!(rest, "world");
-
- let (snippet, rest) = p.parse(rest).unwrap();
- assert_eq!(snippet, Snippet::Markdown("world".into()));
- assert_eq!(rest, "");
- }
-
- #[test]
- fn simple_directive() {
- let p = WikitextParser::default();
- let (snippet, rest) = p.parse("[[!simple]]").unwrap();
- assert_eq!(snippet, Snippet::Directive(Directive::Simple));
- assert_eq!(rest, "");
- }
-
- #[test]
- fn directive_unnamed_arg() {
- let p = WikitextParser::default();
- let (snippet, rest) = p.parse("[[!unnamedarg foo.jpg]]").unwrap();
- assert_eq!(snippet, Snippet::Directive(Directive::UnnamedArg));
- assert_eq!(rest, "");
- }
-
- #[test]
- fn directive_simple_arg() {
- let p = WikitextParser::default();
- let (snippet, rest) = p.parse("[[!simplearg foo=bar]]").unwrap();
- assert_eq!(snippet, Snippet::Directive(Directive::SimpleArg));
- assert_eq!(rest, "");
- }
-
- #[test]
- fn directive_quoted_arg() {
- let p = WikitextParser::default();
- let (snippet, rest) = p.parse(r#"[[!quotedarg bar="foobar"]]"#).unwrap();
- assert_eq!(snippet, Snippet::Directive(Directive::QuotedArg));
- assert_eq!(rest, "");
- }
-
- #[test]
- fn directive_multiline_arg() {
- let p = WikitextParser::default();
- let (snippet, rest) = p
- .parse(
- r#"[[!multilinearg yo="""foo
-bar"""]]"#,
- )
- .unwrap();
- assert_eq!(snippet, Snippet::Directive(Directive::MultilineArg));
- assert_eq!(rest, "");
- }
-}