From 36cf5c88cfe5a3141eaa784905f3a3bcc0f7f70c Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sat, 15 Jun 2019 19:15:48 +0100 Subject: Proof of concept extraction state machine --- fable-cat-poc/.gitignore | 2 + fable-cat-poc/Cargo.lock | 64 +++++++++++++++++++ fable-cat-poc/Cargo.toml | 12 ++++ fable-cat-poc/src/main.rs | 152 ++++++++++++++++++++++++++++++++++++++++++++++ fable-cat-poc/test.md | 57 +++++++++++++++++ 5 files changed, 287 insertions(+) create mode 100644 fable-cat-poc/.gitignore create mode 100644 fable-cat-poc/Cargo.lock create mode 100644 fable-cat-poc/Cargo.toml create mode 100644 fable-cat-poc/src/main.rs create mode 100644 fable-cat-poc/test.md (limited to 'fable-cat-poc') diff --git a/fable-cat-poc/.gitignore b/fable-cat-poc/.gitignore new file mode 100644 index 0000000..7aef303 --- /dev/null +++ b/fable-cat-poc/.gitignore @@ -0,0 +1,2 @@ +target +*~ diff --git a/fable-cat-poc/Cargo.lock b/fable-cat-poc/Cargo.lock new file mode 100644 index 0000000..eb08ed7 --- /dev/null +++ b/fable-cat-poc/Cargo.lock @@ -0,0 +1,64 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "bitflags" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "fable-cat-poc" +version = "0.1.0" +dependencies = [ + "pulldown-cmark 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "getopts" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "memchr" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "pulldown-cmark" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "getopts 0.2.19 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicase 2.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicase" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-width" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" +"checksum getopts 0.2.19 (registry+https://github.com/rust-lang/crates.io-index)" = "72327b15c228bfe31f1390f93dd5e9279587f0463836393c9df719ce62a3e450" +"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39" +"checksum pulldown-cmark 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "051e60ace841b3bfecd402fe5051c06cb3bec4a6e6fdd060a37aa8eb829a1db3" +"checksum unicase 2.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a84e5511b2a947f3ae965dcb29b13b7b1691b6e7332cf5dbc1744138d5acb7f6" +"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" +"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" diff --git a/fable-cat-poc/Cargo.toml b/fable-cat-poc/Cargo.toml new file mode 100644 index 0000000..86bc636 --- /dev/null +++ b/fable-cat-poc/Cargo.toml @@ -0,0 +1,12 @@ +[package] +authors = ["Daniel Silverstone "] +edition = "2018" +name = "fable-cat-poc" +version = "0.1.0" + +[features] +default = [] +print_headers = [] + +[dependencies] +pulldown-cmark = "0.5.2" diff --git a/fable-cat-poc/src/main.rs b/fable-cat-poc/src/main.rs new file mode 100644 index 0000000..fb82b8a --- /dev/null +++ b/fable-cat-poc/src/main.rs @@ -0,0 +1,152 @@ +use std::io::{self, Read}; +use std::ops::{Range, RangeBounds}; + +use pulldown_cmark::{Event, Options, Parser, Tag}; + +#[derive(Default)] +struct Span { + first_line: usize, + first_column: usize, + last_line: usize, + last_column: usize, +} + +fn location_to_span(input: &str, loc: &Range) -> Span { + let mut ret = Span::default(); + + let mut line = 1; + let mut col = 1; + for (pos, ch) in input.chars().enumerate() { + if ch == '\n' { + line += 1; + col = 1; + } else { + col += 1; + } + if pos == loc.start { + ret.first_line = line; + ret.first_column = col; + } + if pos == loc.end { + ret.last_line = line; + ret.last_column = col; + } + } + + ret +} + +fn print_thing(input: &str, loc: &Range) { + let content = &input[loc.clone()]; + let span = location_to_span(input, loc); + println!( + "> From line {} column {} to line {} column {}", + span.first_line, span.first_column, span.last_line, span.last_column + ); + for l in content.lines() { + println!("> `{}`", l); + } + if span.first_line == span.last_line { + print!("> `"); + for _ in 0..span.first_column { + print!(" "); + } + for _ in span.first_column..=span.last_column { + print!("^"); + } + println!("`"); + } +} + +fn main() -> io::Result<()> { + let mut input = String::new(); + let inputlen = io::stdin().read_to_string(&mut input)?; + eprintln!("Read {} bytes of input", inputlen); + let options = Options::all(); + let parser = Parser::new_ext(&input, options); + + let mut depth = 0; + let mut in_scenario = false; + let mut in_scenario_block = false; + let mut in_header = false; + let mut last_header_depth = 1000; + let mut last_header_content: Option = None; + for (event, span) in parser.into_offset_iter() { + eprintln!("{:?}", event); + match event { + Event::Start(tag) => { + depth += 1; + eprintln!("Depth now: {}", depth); + if depth == 1 { + match tag { + Tag::Header(level) => { + // Two ways to handle a header, + // If we're in a scenario, we care only if this header + // is at the same or a higher level, otherwise we + // always read the header in. + if in_scenario { + if level <= last_header_depth { + println!("```"); + in_scenario = false; + in_header = true; + last_header_depth = level; + } + } else { + in_header = true; + last_header_depth = level; + } + } + Tag::CodeBlock(name) => { + if name.as_ref() == "fable" { + // Fable code block, only valid if we were + // after a header + if last_header_content.is_some() { + in_scenario_block = true; + } + } + } + _ => {} + } + } + } + Event::End(tag) => { + depth -= 1; + eprintln!("Depth now: {}", depth); + if depth == 0 { + match tag { + Tag::Header(_) => { + // We're ending a header now + in_header = false; + } + Tag::CodeBlock(_) => { + in_scenario_block = false; + } + _ => {} + } + } + } + Event::Text(s) => { + if in_header { + // We're parsing a header + last_header_content = Some(s.to_string()); + in_header = false; + if cfg!(feature = "print_headers") { + print_thing(&input, &span); + } + } else if in_scenario_block { + if !in_scenario { + for _ in 0..last_header_depth { + print!("#"); + } + println!(" {}", last_header_content.as_ref().unwrap()); + in_scenario = true; + println!("```fable"); + } + print!("{}", s); + } + } + _ => {} + } + } + Ok(()) +} diff --git a/fable-cat-poc/test.md b/fable-cat-poc/test.md new file mode 100644 index 0000000..384ee94 --- /dev/null +++ b/fable-cat-poc/test.md @@ -0,0 +1,57 @@ +# Testing for fable-cat-poc + +Essentially we're after checking the following properties: + +1. Only fenced code blocks labelled `fable` will be consumed +2. Those code blocks must be at the top level +3. Only headers at the top level will be consumed +4. A header which is at the same or a higher level than the last scenario + will be used to indicate the end of a scenario +5. Only the header for the scenario will be output +6. The code blocks for a scenario will be merged by the test tool + +# First example + +```fable +This content doesn't matter for now +``` + +# Second example + +## Second layer + +```fable +This content also doesn't matter for now +``` + +## Third example + +```fable +This is a third unimportant block of text +``` + +### This header is ignored + +#### This header is also ignored + +### Despite being higher than the above, this header doesn't stop the scenario + +- # This is a top level header yet is ignored by fable. + +> # Block quoted header ignored + +```fable +This block is a part of the third example +``` + +# This header stopped the above fable + +## This header is used as the fourth fable scenario name + +```fable +A fourth irrelevant block of text +``` + +- ```fable + Despite being a fenced block, this is ignored + ``` -- cgit v1.2.1