summaryrefslogtreecommitdiff
path: root/fable-cat-poc
diff options
context:
space:
mode:
authorDaniel Silverstone <dsilvers@digital-scurf.org>2019-06-15 19:15:48 +0100
committerDaniel Silverstone <dsilvers@digital-scurf.org>2019-06-15 19:15:52 +0100
commit36cf5c88cfe5a3141eaa784905f3a3bcc0f7f70c (patch)
tree2bcb1ca4b3bde910d5ac5bcad602535688045b47 /fable-cat-poc
parent6e610f2e840ab2f09b82bc0742037511c03819a7 (diff)
downloadfable-poc-36cf5c88cfe5a3141eaa784905f3a3bcc0f7f70c.tar.gz
Proof of concept extraction state machine
Diffstat (limited to 'fable-cat-poc')
-rw-r--r--fable-cat-poc/.gitignore2
-rw-r--r--fable-cat-poc/Cargo.lock64
-rw-r--r--fable-cat-poc/Cargo.toml12
-rw-r--r--fable-cat-poc/src/main.rs152
-rw-r--r--fable-cat-poc/test.md57
5 files changed, 287 insertions, 0 deletions
diff --git a/fable-cat-poc/.gitignore b/fable-cat-poc/.gitignore
new file mode 100644
index 0000000..7aef303
--- /dev/null
+++ b/fable-cat-poc/.gitignore
@@ -0,0 +1,2 @@
+target
+*~
diff --git a/fable-cat-poc/Cargo.lock b/fable-cat-poc/Cargo.lock
new file mode 100644
index 0000000..eb08ed7
--- /dev/null
+++ b/fable-cat-poc/Cargo.lock
@@ -0,0 +1,64 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "bitflags"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "fable-cat-poc"
+version = "0.1.0"
+dependencies = [
+ "pulldown-cmark 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "getopts"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "memchr"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "pulldown-cmark"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "getopts 0.2.19 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicase 2.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "unicase"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "unicode-width"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "version_check"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[metadata]
+"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd"
+"checksum getopts 0.2.19 (registry+https://github.com/rust-lang/crates.io-index)" = "72327b15c228bfe31f1390f93dd5e9279587f0463836393c9df719ce62a3e450"
+"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
+"checksum pulldown-cmark 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "051e60ace841b3bfecd402fe5051c06cb3bec4a6e6fdd060a37aa8eb829a1db3"
+"checksum unicase 2.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a84e5511b2a947f3ae965dcb29b13b7b1691b6e7332cf5dbc1744138d5acb7f6"
+"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
+"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
diff --git a/fable-cat-poc/Cargo.toml b/fable-cat-poc/Cargo.toml
new file mode 100644
index 0000000..86bc636
--- /dev/null
+++ b/fable-cat-poc/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+authors = ["Daniel Silverstone <dsilvers@digital-scurf.org>"]
+edition = "2018"
+name = "fable-cat-poc"
+version = "0.1.0"
+
+[features]
+default = []
+print_headers = []
+
+[dependencies]
+pulldown-cmark = "0.5.2"
diff --git a/fable-cat-poc/src/main.rs b/fable-cat-poc/src/main.rs
new file mode 100644
index 0000000..fb82b8a
--- /dev/null
+++ b/fable-cat-poc/src/main.rs
@@ -0,0 +1,152 @@
+use std::io::{self, Read};
+use std::ops::{Range, RangeBounds};
+
+use pulldown_cmark::{Event, Options, Parser, Tag};
+
+#[derive(Default)]
+struct Span {
+ first_line: usize,
+ first_column: usize,
+ last_line: usize,
+ last_column: usize,
+}
+
+fn location_to_span(input: &str, loc: &Range<usize>) -> Span {
+ let mut ret = Span::default();
+
+ let mut line = 1;
+ let mut col = 1;
+ for (pos, ch) in input.chars().enumerate() {
+ if ch == '\n' {
+ line += 1;
+ col = 1;
+ } else {
+ col += 1;
+ }
+ if pos == loc.start {
+ ret.first_line = line;
+ ret.first_column = col;
+ }
+ if pos == loc.end {
+ ret.last_line = line;
+ ret.last_column = col;
+ }
+ }
+
+ ret
+}
+
+fn print_thing(input: &str, loc: &Range<usize>) {
+ let content = &input[loc.clone()];
+ let span = location_to_span(input, loc);
+ println!(
+ "> From line {} column {} to line {} column {}",
+ span.first_line, span.first_column, span.last_line, span.last_column
+ );
+ for l in content.lines() {
+ println!("> `{}`", l);
+ }
+ if span.first_line == span.last_line {
+ print!("> `");
+ for _ in 0..span.first_column {
+ print!(" ");
+ }
+ for _ in span.first_column..=span.last_column {
+ print!("^");
+ }
+ println!("`");
+ }
+}
+
+fn main() -> io::Result<()> {
+ let mut input = String::new();
+ let inputlen = io::stdin().read_to_string(&mut input)?;
+ eprintln!("Read {} bytes of input", inputlen);
+ let options = Options::all();
+ let parser = Parser::new_ext(&input, options);
+
+ let mut depth = 0;
+ let mut in_scenario = false;
+ let mut in_scenario_block = false;
+ let mut in_header = false;
+ let mut last_header_depth = 1000;
+ let mut last_header_content: Option<String> = None;
+ for (event, span) in parser.into_offset_iter() {
+ eprintln!("{:?}", event);
+ match event {
+ Event::Start(tag) => {
+ depth += 1;
+ eprintln!("Depth now: {}", depth);
+ if depth == 1 {
+ match tag {
+ Tag::Header(level) => {
+ // Two ways to handle a header,
+ // If we're in a scenario, we care only if this header
+ // is at the same or a higher level, otherwise we
+ // always read the header in.
+ if in_scenario {
+ if level <= last_header_depth {
+ println!("```");
+ in_scenario = false;
+ in_header = true;
+ last_header_depth = level;
+ }
+ } else {
+ in_header = true;
+ last_header_depth = level;
+ }
+ }
+ Tag::CodeBlock(name) => {
+ if name.as_ref() == "fable" {
+ // Fable code block, only valid if we were
+ // after a header
+ if last_header_content.is_some() {
+ in_scenario_block = true;
+ }
+ }
+ }
+ _ => {}
+ }
+ }
+ }
+ Event::End(tag) => {
+ depth -= 1;
+ eprintln!("Depth now: {}", depth);
+ if depth == 0 {
+ match tag {
+ Tag::Header(_) => {
+ // We're ending a header now
+ in_header = false;
+ }
+ Tag::CodeBlock(_) => {
+ in_scenario_block = false;
+ }
+ _ => {}
+ }
+ }
+ }
+ Event::Text(s) => {
+ if in_header {
+ // We're parsing a header
+ last_header_content = Some(s.to_string());
+ in_header = false;
+ if cfg!(feature = "print_headers") {
+ print_thing(&input, &span);
+ }
+ } else if in_scenario_block {
+ if !in_scenario {
+ for _ in 0..last_header_depth {
+ print!("#");
+ }
+ println!(" {}", last_header_content.as_ref().unwrap());
+ in_scenario = true;
+ println!("```fable");
+ }
+ print!("{}", s);
+ }
+ }
+ _ => {}
+ }
+ }
+ Ok(())
+}
diff --git a/fable-cat-poc/test.md b/fable-cat-poc/test.md
new file mode 100644
index 0000000..384ee94
--- /dev/null
+++ b/fable-cat-poc/test.md
@@ -0,0 +1,57 @@
+# Testing for fable-cat-poc
+
+Essentially we're after checking the following properties:
+
+1. Only fenced code blocks labelled `fable` will be consumed
+2. Those code blocks must be at the top level
+3. Only headers at the top level will be consumed
+4. A header which is at the same or a higher level than the last scenario
+ will be used to indicate the end of a scenario
+5. Only the header for the scenario will be output
+6. The code blocks for a scenario will be merged by the test tool
+
+# First example
+
+```fable
+This content doesn't matter for now
+```
+
+# Second example
+
+## Second layer
+
+```fable
+This content also doesn't matter for now
+```
+
+## Third example
+
+```fable
+This is a third unimportant block of text
+```
+
+### This header is ignored
+
+#### This header is also ignored
+
+### Despite being higher than the above, this header doesn't stop the scenario
+
+- # This is a top level header yet is ignored by fable.
+
+> # Block quoted header ignored
+
+```fable
+This block is a part of the third example
+```
+
+# This header stopped the above fable
+
+## This header is used as the fourth fable scenario name
+
+```fable
+A fourth irrelevant block of text
+```
+
+- ```fable
+ Despite being a fenced block, this is ignored
+ ```