From 8a5ec6f143972670d0382ca94a8195d54fef75b3 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Tue, 16 Feb 2021 16:11:36 +0200 Subject: update why-rust.md for today's talk --- why-rust.md | 535 +++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 371 insertions(+), 164 deletions(-) diff --git a/why-rust.md b/why-rust.md index b166f03..4ff84d9 100644 --- a/why-rust.md +++ b/why-rust.md @@ -1,102 +1,147 @@ ---- -title: "Why Rust?" -author: Lars Wirzenius -... - # About me -* First learnt programming in April, 1984, so 36 years ago -* Been programming more or less daily ever since -* BASIC, Pascal, C, a little C++, Unix shell, AWK, a lot of Python, Lisp, a - little Haskell, a little Go, and now Rust +* Been programming since April, 1984 + +* BASIC, Pascal, a lot of C, a little C++, various Unix shells, AWK, a + taste of FORTRAN77, a smidgen of Perl, a lot of Python, a bit of + Lisp, a little Haskell, a little Go, and now Rust + * I really like Rust -* Rust is like Haskell and C had a love child with unicorn fairy godmothers ---- +----------------------------------------------------------------------------- -# Why I like Rust: type system +# About Rust -* strong, static, versatile typing +* systems programming - - helps avoid many bugs, reduces need to test every statement and - branch with unit tests - - not quite true that if a program compiles, it works, but Rust is a - lot closer to that than C, C++, Pascal, etc - - avoids entire classes of bugs: NULL pointer errors, unchecked - error returns, implicit but wrong value conversions +* emphasis on safety, performance -* type inference: mostly only need to specify function signatures, - structs, and similar parts and the compiler deduces the rest +* a bit like C and Haskell had a baby ---- +* Cargo build tool and dependency manager + +* long-term stability: editions + +* a lively ecosystem of libraries ("crates") + + + +----------------------------------------------------------------------------- + +# Why I like Rust: strong, static, versatile typing + +* prevents entire classes of bugs: NULL pointer errors, unchecked + error returns, implicit but wrong value conversions, data races + +* "if a program compiles, it usually works" (not really) + +----------------------------------------------------------------------------- + +# Why I like Rust: type inference + +* less typing, more power + +* mostly only need to specify function signatures, structs, and + similar parts and the compiler deduces the rest + +* also infers lifetimes when it's unambiguous + +----------------------------------------------------------------------------- # Why I like Rust: memory management * manual, a la C: malloc, free - error prone: easy to leak (no free), easy to break (more than one - free), hard to debug + free, or use after free), hard to debug * automatic, with garbage collection - - Lisp, Python, Go + - Lisp, Python, Go, Java, ... - requires a runtime - despite decades of research, inevitably there is overhead and short pauses -* Rust: automatic with borrow checker, lifetimes +* Rust: automatic, with borrow checker, lifetimes - all heap values heap are tied to values on the stack - when on-stack value is freed, the heap value is dropped - requires making sure stack values have the right lifetimes - - mutability must be declared explicitly - compiler can prove lack of data races + - bonus: mutability must be declared explicitly ---- +----------------------------------------------------------------------------- -# Why I like Rust: misc +# Why I like Rust: random bits -* not controlled by a megacorporation -* the ? operator and the Result type -* the Option type -* enums are powerful +* not controlled by a mega-corporation +* the `?` operator and the `Result` type +* the `Option` type +* enums are powerful, not just names for integers * pattern matching is powerful and exhaustive * no OOP, but traits * zero cost abstractions actually work * binaries execute quickly * excellent concurrency support ---- +----------------------------------------------------------------------------- -# Why I like Rust: tooling and ecosystem +# Why I like Rust: tooling, ecosystem, community -* cargo, crates.io work well and avoid most pitfalls other languages - have -* good doctests -* things get better, the community and ecosystem seem lively -* avoids the 20/80 principle: problems are solved well, "pragmatic - shortcuts" to allow implementation to be simpler are avoided, if the - shortcut would be likely to bit users -* the community is friendly and constructive +* `cargo` works well and avoids most pitfalls other language package + managers have ---- +* culture of good documentation, with doctests + +* things keep getting better, without breaking things + +* avoids the 20/80 principle: problems are solved thoroughly + - culture avoids "pragmatic shortcuts" to allow implementation to be + simpler, if the shortcut would be likely to bite users + +* the community is friendly and constructive and lively + +----------------------------------------------------------------------------- + +# Not everything is perfect: Rust is not finished + +* language and libraries change + + - rustup install/update preferred over distro packages + - new language release every six weeks + +* fairly young -# Not everything is perfect + - Rust: 2010 (1.0 in 2015) + - Go: 2009 + - C#: 2000 + - PHP, Java, JavaScript: 1995 + - Python: 1991 + - Haskell: 1990 + - Perl: 1987 + - C: 1972 + - Lisp: 1958 + - Fortran: 1957 + - Plankalkül: 1948 + + +----------------------------------------------------------------------------- + +# Not everything is perfect: portability, etc + +* supports a fraction of the targets of C + + - but all the big mainstream ones are supported -* things in the language and important libraries change - - but almost always with backwards compatibility) -* supports a lot fewer targets than C - - but the big mainstream ones are supported -* not well packaged for Linux distros; rustup is needed - - bug as the language stabilizes, this will improve * de facto static linking only - dynamic linking works, but Rust ABI is not yet stable, so static is the default -* compilation is somewhat slow ---- +* compilation is slow + +----------------------------------------------------------------------------- -# Example: hello +# Example: hello.rs (self-standing) ```{.rust .numberLines} fn main() { @@ -104,133 +149,295 @@ fn main() { } ``` ---- +Compiled with the Rust compiler directly: -# Example: Subplot docgen main program +~~~{.sh .numberLines} +$ rustc hello.rs +$ ./hello +Hello, world! +$ +~~~ -```{.rust .numberLines} -use chrono::{Local, TimeZone}; -use pandoc; -use std::fs; -use std::path::Path; -use std::path::PathBuf; -use std::time::UNIX_EPOCH; -use structopt::StructOpt; - -use subplot; -``` ---- +----------------------------------------------------------------------------- -```{.rust .numberLines} -// Define the command line arguments. -#[derive(Debug, StructOpt)] -#[structopt(name = "docgen", - about = "Subplot document generator.")] -struct Opt { - // One or more input filename. - #[structopt(parse(from_os_str))] - filenames: Vec, +# Example: hello with Cargo + +* `cargo` is the Rust build tool and dependency manager + +* `cargo init` + +* minimum files - // Set output file name. - #[structopt(name = "FILE", long = "--output", - short = "-o", parse(from_os_str))] - output: PathBuf, + * `Cargo.toml` + * `src/main.rs` - // Set date. - #[structopt(name = "DATE", long = "--date")] - date: Option, +----------------------------------------------------------------------------- + +# Example: hello with Cargo: `Cargo.toml` + +~~~{.toml .numberLines} +[package] +name = "hello" +version = "0.1.0" +authors = ["Lars Wirzenius "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +~~~ + +----------------------------------------------------------------------------- + +# Example: hello with Cargo: `src/main.rs` + +~~~{.rust .numberLines} +fn main() { + println!("Hello, world!"); } -``` ---- +~~~ -```{.rust .numberLines} -fn main() -> subplot::Result<()> { - let opt = Opt::from_args(); - let mut pandoc = pandoc::new(); - - // Tell Pandoc what the input files are. - let first_file = &opt.filenames[0]; - for filename in opt.filenames.iter() { - pandoc.add_input(&filename); - } -``` ---- +----------------------------------------------------------------------------- -```{.rust .numberLines} - // Tell Pandoc what the input file format is. - pandoc.set_input_format( - pandoc::InputFormat::Markdown, - vec![pandoc::MarkdownExtension::Citations], - ); - - // Add external Pandoc filters. - let citeproc = std::path::Path::new("pandoc-citeproc"); - pandoc.add_option( - pandoc::PandocOption::Filter( - citeproc.to_path_buf())); - - // This would be nicer if pandoc took a Pathbuf for output, - // instead of a String. Reported as - // - let output = format!("{}", opt.output.display()); - pandoc.set_output(pandoc::OutputKind::File(output)); -``` +# Example: hello with Cargo: commands + +~~~{.sh .numberLines} +$ cargo init --bin hello +$ cd hello +$ cargo run + Compiling hello v0.1.0 (/home/liw/wmf/talks/x/hello) + Finished dev [unoptimized + debuginfo] target(s) in 0.37s + Running `target/debug/hello` +Hello, world! +$ cargo run -q +Hello, world! +$ +~~~ + +----------------------------------------------------------------------------- + +# Long example: summain + +A real program. Outputs a _manifest_ with metadata about files named +on the command line. Testing tool for backup software. Uses +concurrency (threads) for speed. + +\ + +Actual source code: [`https://gitlab.com/larswirzenius/summain`](https://gitlab.com/larswirzenius/summain) + +----------------------------------------------------------------------------- + +# Summain output, sample + +~~~{.yaml .numberLines} --- +path: "./COPYING" +mode: "-rw-rw-r--" +mtime: 1613482516 +mtime_nsec: 369325267 +nlink: 1 +size: 35147 +sha256: 8ceb4b9ee5adedde47b31e975c1d90c73ad27b6b165a1dcd80c7c545eb65b903 +target: ~ -```{.rust .numberLines} - // Set options for Pandoc. - pandoc.add_option(pandoc::PandocOption::TableOfContents); - pandoc.add_option(pandoc::PandocOption::Standalone); - pandoc.add_option(pandoc::PandocOption::NumberSections); -``` --- +path: src +mode: drwxrwxr-x +mtime: 1613482516 +mtime_nsec: 369325267 +nlink: 3 +size: ~ +sha256: ~ +target: ~ + +~~~ + +----------------------------------------------------------------------------- + +# Example: summain: main program, part 1 + +~~~{.rust .numberLines} +fn main() -> anyhow::Result<()> { + let mut opt = Opt::from_args(); + opt.pathnames[..].sort(); + let v: Vec> = + opt.pathnames + .par_iter().map(|p| manifest(&p)) + .collect(); + for m in v { + let m = m?; + println!("{}", serde_yaml::to_string(&m)?); + } + Ok(()) +} +~~~ -```{.rust .numberLines} - // Metadata date from command line or file mtime. However, we - // can't set it directly, since we don't want to override the date - // in the actual document, if given, so we only set - // user-provided-date. Our parsing code will use that if date is - // not document metadata. - let date = if let Some(date) = opt.date { - date - } else { - // We have to parse the document to get the date from the - // metadata. We can't set the metadata for typesetting until - // we do. So we parse it twice. - let doc = subplot::Document::from_file(&first_file)?; - if let Some(date) = doc.meta().date() { - date.to_string() +----------------------------------------------------------------------------- + +# Example: summain: main program, part 2 + +~~~{.rust .numberLines} +#[derive(StructOpt, Debug)] +struct Opt { + #[structopt(parse(from_os_str))] + pathnames: Vec, +} + +fn manifest( + path: &Path +) -> anyhow::Result { + ManifestEntry::new(path) + .with_context(|| format!("{}", path.display())) +} +~~~ + +----------------------------------------------------------------------------- + +Example: summain: `struct ManifestEntry` + +~~~{.rust .numberLines} +#[derive(Serialize, Debug)] +pub struct ManifestEntry { + path: String, + #[serde(with = "mode")] + mode: u32, + mtime: i64, + mtime_nsec: i64, + nlink: u64, + size: Option, + sha256: Option, + target: Option, +} +~~~ + +----------------------------------------------------------------------------- + +# Example: summain: `impl ManifestEntry`, part 1 + +~~~{.rust .numberLines} +impl ManifestEntry { + pub fn new(path: &Path) -> std::io::Result { + let m = symlink_metadata(path)?; + let hash = if m.is_file() { + Some(file_checksum(path)?) + } else { + None + }; + let target = if m.file_type().is_symlink() { + Some(read_link(path)?) } else { - mtime(first_file)? + None + }; +~~~ + +----------------------------------------------------------------------------- + +# Example: summain: `impl ManifestEntry`, part 2 + +~~~{.rust .numberLines} + Ok(Self { + path: path.to_string_lossy().into_owned(), + mode: m.st_mode(), + mtime: m.st_mtime(), + mtime_nsec: m.st_mtime_nsec(), + nlink: m.st_nlink(), + size: if m.is_dir() { + None + } else { + Some(m.st_size()) + }, + sha256: hash, + target, + }) + } +} +~~~ + +----------------------------------------------------------------------------- + +# Example: summain: `file_checksum` + +~~~{.rust .numberLines} +fn file_checksum(path: &Path) -> std::io::Result { + let mut hasher = Sha256::new(); + + let file = File::open(path)?; + let mut reader = BufReader::new(file); + let mut buf = vec![0; BUF_SIZE]; + loop { + let n = reader.read(&mut buf)?; + if n == 0 { + break; } - }; - pandoc.add_option(pandoc::PandocOption::Meta("date".to_string(), Some(date))); -``` ---- + hasher.update(&buf[..n]); + } + let hash = hasher.finalize(); + Ok(format!("{:x}", hash)) +} +~~~ -```{.rust .numberLines} - // Run Pandoc to parse the inputs into an abstract syntax tree, - // then run our filter on that, then let Pandoc produce the output - // file from the AST. - pandoc.add_filter(|json| { - let mut doc = subplot::Document::from_json(&json).expect("error parsing JSON AST"); - doc.has_title().expect("document has no title"); - doc.typeset(); - doc.ast().expect("error serializing into JSON AST") - }); - pandoc.execute()?; +----------------------------------------------------------------------------- - Ok(()) +# Example: summain: `mod mode` + +~~~{.rust .numberLines} +mod mode { + use serde::{self, Serializer}; + + pub fn serialize( + mode: &u32, + serializer: S + ) -> Result + where + S: Serializer, + { + let s = unix_mode::to_string(*mode); + serializer.serialize_str(&s) + } } -``` +~~~ + +----------------------------------------------------------------------------- + +![](htop-summain.png) + +----------------------------------------------------------------------------- + +# SEE ALSO + +**Rust** +[`https://www.rust-lang.org/`](https://www.rust-lang.org/) + +**crates.io** +[`https://crates.io/`](https://crates.io/) + +**Summain** +[`https://gitlab.com/larswirzenius/summain`](https://gitlab.com/larswirzenius/summain) + +**Subplot** +[`https://gitlab.com/larswirzenius/subplot/`](https://gitlab.com/larswirzenius/subplot/) + +**These slides:** +[`http://git.liw.fi/wmf-talks`](http://git.liw.fi/wmf-talks) + +----------------------------------------------------------------------------- + + +# Legalese + + +Copyright 2021 Wikimedia Foundation + +This content is licensed under the Creative Commons +Attribution-ShareAlike 4.0 International ([CC BY-SA 4.0][]) licence. + +[CC BY-SA 4.0]: https://creativecommons.org/licenses/by-sa/4.0/ + --- +title: "Why Rust?" +subtitle: "A brief introduction" +author: Lars Wirzenius +date: 2021-02-16 +... -```{.rust .numberLines} -fn mtime(filename: &Path) -> subplot::Result { - let mtime = fs::metadata(filename)?.modified()?; - let secs = mtime.duration_since(UNIX_EPOCH)?.as_secs(); - let secs: i64 = format!("{}", secs).parse()?; - let dt = Local.timestamp(secs, 0); - Ok(dt.format("%Y-%m-%d %H:%M").to_string()) -} -``` -- cgit v1.2.1