diff --git a/Cargo.lock b/Cargo.lock index 78ba77b..d074bf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "anstream" version = "0.6.18" @@ -61,6 +67,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + [[package]] name = "arbitrary" version = "1.1.3" @@ -158,15 +173,6 @@ dependencies = [ "hex-conservative", ] -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "bumpalo" version = "3.16.0" @@ -181,12 +187,14 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cc" -version = "1.0.83" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ + "find-msvc-tools", "jobserver", "libc", + "shlex", ] [[package]] @@ -195,6 +203,20 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acc17a6284abccac6e50db35c1cee87f605474a72939b959a3a67d9371800efd" +dependencies = [ + "hashbrown", + "regex-automata", + "serde", + "stacker", + "unicode-ident", + "unicode-segmentation", +] + [[package]] name = "clap" version = "4.5.37" @@ -235,25 +257,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" -[[package]] -name = "cpufeatures" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" -dependencies = [ - "libc", -] - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "derive_arbitrary" version = "1.1.3" @@ -265,16 +268,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - [[package]] name = "either" version = "1.13.0" @@ -293,14 +286,22 @@ dependencies = [ ] [[package]] -name = "generic-array" -version = "0.14.7" +name = "equivalent" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "getrandom" @@ -321,6 +322,17 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8449d342b1c67f49169e92e71deb7b9b27f30062301a16dbc27a4cc8d2351b7" +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "hex-conservative" version = "0.2.1" @@ -377,9 +389,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.147" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "libfuzzer-sys" @@ -391,6 +403,16 @@ dependencies = [ "cc", ] +[[package]] +name = "line-index" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e27e0ed5a392a7f5ba0b3808a2afccff16c64933312c84b57618b49d1209bd2" +dependencies = [ + "nohash-hasher", + "text-size", +] + [[package]] name = "log" version = "0.4.22" @@ -414,55 +436,25 @@ dependencies = [ ] [[package]] -name = "once_cell" -version = "1.21.3" +name = "nohash-hasher" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" [[package]] -name = "pest" -version = "2.7.3" +name = "object" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7a4d085fd991ac8d5b05a147b437791b4260b76326baf0fc60cf7c9c27ecd33" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", - "thiserror", - "ucd-trie", ] [[package]] -name = "pest_derive" -version = "2.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bee7be22ce7918f641a33f08e3f43388c7656772244e2bbb2477f44cc9021a" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1511785c5e98d79a05e8a6bc34b4ac2168a0e3e92161862030ad84daa223141" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "pest_meta" -version = "2.7.3" +name = "once_cell" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42f0394d3123e33353ca5e1e89092e533d2cc490389f2bd6131c43c634ebc5f" -dependencies = [ - "once_cell", - "pest", - "sha2", -] +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "ppv-lite86" @@ -479,6 +471,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +dependencies = [ + "ar_archive_writer", + "cc", +] + [[package]] name = "quote" version = "1.0.33" @@ -636,15 +638,10 @@ dependencies = [ ] [[package]] -name = "sha2" -version = "0.10.7" +name = "shlex" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simplicity-lang" @@ -680,13 +677,13 @@ version = "0.3.0" dependencies = [ "arbitrary", "base64 0.21.3", + "chumsky", "clap", "either", "getrandom", "itertools", + "line-index", "miniscript", - "pest", - "pest_derive", "serde", "serde_json", "simplicity-lang", @@ -704,6 +701,19 @@ dependencies = [ "simplicityhl", ] +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys", +] + [[package]] name = "strsim" version = "0.11.1" @@ -733,36 +743,10 @@ dependencies = [ ] [[package]] -name = "thiserror" -version = "1.0.48" +name = "text-size" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "typenum" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" - -[[package]] -name = "ucd-trie" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" [[package]] name = "unicode-ident" @@ -771,16 +755,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] -name = "utf8parse" -version = "0.2.2" +name = "unicode-segmentation" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] -name = "version_check" -version = "0.9.4" +name = "utf8parse" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "wasi" diff --git a/Cargo.toml b/Cargo.toml index f14ab53..c9aec20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,8 +23,6 @@ serde = ["dep:serde", "dep:serde_json"] [dependencies] base64 = "0.21.2" -pest = "2.1.3" -pest_derive = "2.7.1" serde = { version = "1.0.188", features = ["derive"], optional = true } serde_json = { version = "1.0.105", optional = true } simplicity-lang = { version = "0.6.0" } @@ -33,6 +31,8 @@ either = "1.12.0" itertools = "0.13.0" arbitrary = { version = "1", optional = true, features = ["derive"] } clap = "4.5.37" +chumsky = "0.11.2" +line-index = "0.1.2" [target.wasm32-unknown-unknown.dependencies] getrandom = { version = "0.2", features = ["js"] } diff --git a/src/error.rs b/src/error.rs index d6667d9..0a415c3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,155 +1,112 @@ use std::fmt; -use std::num::NonZeroUsize; +use std::ops::Range; use std::sync::Arc; +use line_index::{LineCol, LineIndex, TextSize}; use simplicity::hashes::{sha256, Hash, HashEngine}; use simplicity::{elements, Cmr}; -use crate::parse::{MatchPattern, Rule}; +use crate::parse::MatchPattern; use crate::str::{AliasName, FunctionName, Identifier, JetName, ModuleName, WitnessName}; use crate::types::{ResolvedType, UIntType}; -/// Position of an object inside a file. -/// -/// [`pest::Position<'i>`] forces us to track lifetimes, so we introduce our own struct. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] -pub struct Position { - /// Line where the object is located. - /// - /// Starts at 1. - pub line: NonZeroUsize, - /// Column where the object is located. - /// - /// Starts at 1. - pub col: NonZeroUsize, -} - -impl Position { - /// A dummy position. - #[cfg(feature = "arbitrary")] - pub(crate) const DUMMY: Self = Self::new(1, 1); - - /// Create a new position. - /// - /// ## Panics - /// - /// Line or column are zero. - pub const fn new(line: usize, col: usize) -> Self { - // assert_ne not available in constfn - assert!(line != 0, "line must not be zero",); - // Safety: Checked above - let line = unsafe { NonZeroUsize::new_unchecked(line) }; - assert!(col != 0, "column must not be zero",); - // Safety: Checked above - let col = unsafe { NonZeroUsize::new_unchecked(col) }; - Self { line, col } - } -} +pub type Spanned = (T, Span); /// Area that an object spans inside a file. /// /// The area cannot be empty. -/// -/// [`pest::Span<'i>`] forces us to track lifetimes, so we introduce our own struct. #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] pub struct Span { /// Position where the object starts, inclusively. - pub start: Position, + pub start: usize, /// Position where the object ends, inclusively. - pub end: Position, + pub end: usize, } impl Span { /// A dummy span. #[cfg(feature = "arbitrary")] - pub(crate) const DUMMY: Self = Self::new(Position::DUMMY, Position::DUMMY); + pub(crate) const DUMMY: Self = Self::new(0, 0); /// Create a new span. /// /// ## Panics /// /// Start comes after end. - pub const fn new(start: Position, end: Position) -> Self { - // NonZeroUsize does not implement const comparisons (yet) - // So we call NonZeroUsize:get() to compare usize in const - assert!( - start.line.get() <= end.line.get(), - "Start cannot come after end" - ); - assert!( - start.line.get() < end.line.get() || start.col.get() <= end.col.get(), - "Start cannot come after end" - ); + pub const fn new(start: usize, end: usize) -> Self { + assert!(start <= end, "Start cannot come after end"); Self { start, end } } - /// Check if the span covers more than one line. - pub const fn is_multiline(&self) -> bool { - self.start.line.get() < self.end.line.get() - } - /// Return the CMR of the span. pub fn cmr(&self) -> Cmr { let mut hasher = sha256::HashEngine::default(); - hasher.input(&self.start.line.get().to_be_bytes()); - hasher.input(&self.start.col.get().to_be_bytes()); - hasher.input(&self.end.line.get().to_be_bytes()); - hasher.input(&self.end.col.get().to_be_bytes()); + hasher.input(&self.start.to_be_bytes()); + hasher.input(&self.end.to_be_bytes()); let hash = sha256::Hash::from_engine(hasher); Cmr::from_byte_array(hash.to_byte_array()) } /// Return a slice from the given `file` that corresponds to the span. - /// - /// Return `None` if the span runs out of bounds. pub fn to_slice<'a>(&self, file: &'a str) -> Option<&'a str> { - let mut current_line = 1; - let mut current_col = 1; - let mut start_index = None; + if file.is_empty() && self.start == 0 && self.end == 0 { + Some("") + } else if self.start >= file.len() || self.end >= file.len() { + None + } else { + Some(&file[self.start..self.end]) + } + } +} - for (i, c) in file.char_indices() { - if current_line == self.start.line.get() && current_col == self.start.col.get() { - start_index = Some(i); - } - if current_line == self.end.line.get() && current_col == self.end.col.get() { - let start_index = start_index.expect("start comes before end"); - let end_index = i; - return Some(&file[start_index..end_index]); - } - if c == '\n' { - current_line += 1; - current_col = 1; - } else { - current_col += 1; - } +impl chumsky::span::Span for Span { + type Context = (); + + type Offset = usize; + + fn new((): Self::Context, range: Range) -> Self { + Self { + start: range.start, + end: range.end, } + } + + fn context(&self) -> Self::Context {} + + fn start(&self) -> Self::Offset { + self.start + } + + fn end(&self) -> Self::Offset { + self.end + } +} - None +impl fmt::Display for Span { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}..{}", self.start, self.end)?; + Ok(()) } } -impl<'a> From<&'a pest::iterators::Pair<'_, Rule>> for Span { - fn from(pair: &'a pest::iterators::Pair) -> Self { - let (line, col) = pair.line_col(); - let start = Position::new(line, col); - // end_pos().line_col() is O(n) in file length - // https://github.com/pest-parser/pest/issues/560 - // We should generate `Span`s only on error paths - let (line, col) = pair.as_span().end_pos().line_col(); - let end = Position::new(line, col); - Self::new(start, end) +impl From for Span { + fn from(span: chumsky::span::SimpleSpan) -> Self { + Self { + start: span.start, + end: span.end, + } + } +} + +impl From> for Span { + fn from(range: Range) -> Self { + Self::new(range.start, range.end) } } impl From<&str> for Span { fn from(s: &str) -> Self { - let start = Position::new(1, 1); - let end_line = std::cmp::max(1, s.lines().count()); - let end_col = std::cmp::max(1, s.lines().next_back().unwrap_or("").len()); - let end = Position::new(end_line, end_col); - debug_assert!(start.line <= end.line); - debug_assert!(start.line < end.line || start.col <= end.col); - Span::new(start, end) + Span::new(0, s.len() - 1) } } @@ -235,9 +192,17 @@ impl fmt::Display for RichError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.file { Some(ref file) if !file.is_empty() => { - let start_line_index = self.span.start.line.get() - 1; - let n_spanned_lines = self.span.end.line.get() - start_line_index; - let line_num_width = self.span.end.line.get().to_string().len(); + let index = LineIndex::new(file); + + let start_pos = index.line_col(TextSize::from(self.span.start as u32)); + let end_pos = index.line_col(TextSize::from(self.span.end as u32)); + + let start_line_index = start_pos.line as usize; + let end_line_index = end_pos.line as usize; + + let n_spanned_lines = end_line_index - start_line_index + 1; + let line_num_width = (end_line_index + 1).to_string().len(); + writeln!(f, "{:width$} |", " ", width = line_num_width)?; let mut lines = file.lines().skip(start_line_index).peekable(); @@ -248,15 +213,25 @@ impl fmt::Display for RichError { writeln!(f, "{line_num:line_num_width$} | {line_str}")?; } - let (underline_start, underline_length) = match self.span.is_multiline() { + let line_start_byte = index + .offset(LineCol { + line: start_pos.line, + col: 0, + }) + .map_or(0, |ts| u32::from(ts) as usize); + + let start_col = file[line_start_byte..self.span.start].chars().count(); + + let (underline_start, underline_length) = match start_line_index != end_line_index { true => (0, start_line_len), - false => ( - self.span.start.col.get(), - self.span.end.col.get() - self.span.start.col.get(), - ), + false => { + let end_col = file[line_start_byte..self.span.end].chars().count(); + (start_col, end_col - start_col) + } }; + write!(f, "{:width$} |", " ", width = line_num_width)?; - write!(f, "{:width$}", " ", width = underline_start)?; + write!(f, "{:width$}", " ", width = underline_start + 1)?; write!(f, "{:^ for String { } } -impl From> for RichError { - fn from(error: pest::error::Error) -> Self { - let description = error.variant.message().to_string(); - let (start, end) = match error.line_col { - pest::error::LineColLocation::Pos((line, col)) => { - (Position::new(line, col), Position::new(line, col + 1)) - } - pest::error::LineColLocation::Span((line, col), (line_end, col_end)) => { - (Position::new(line, col), Position::new(line_end, col_end)) - } - }; - let span = Span::new(start, end); - Self::new(Error::Grammar(description), span) - } -} - /// An individual error. /// /// Records _what_ happened but not where. @@ -522,17 +481,18 @@ impl From for Error { mod tests { use super::*; - const FILE: &str = r#"let a1: List = None; + const FILE: &str = "let a1: List = None; let x: u32 = Left( Right(0) -);"#; +);"; const EMPTY_FILE: &str = ""; #[test] fn display_single_line() { let error = Error::ListBoundPow2(5) - .with_span(Span::new(Position::new(1, 14), Position::new(1, 20))) + .with_span(Span::new(13, 19)) .with_file(Arc::from(FILE)); + let expected = r#" | 1 | let a1: List = None; @@ -545,8 +505,9 @@ let x: u32 = Left( let error = Error::CannotParse( "Expected value of type `u32`, got `Either, _>`".to_string(), ) - .with_span(Span::new(Position::new(2, 21), Position::new(4, 2))) + .with_span(Span::new(41, FILE.len())) .with_file(Arc::from(FILE)); + let expected = r#" | 2 | let x: u32 = Left( @@ -559,8 +520,9 @@ let x: u32 = Left( #[test] fn display_entire_file() { let error = Error::CannotParse("This span covers the entire file".to_string()) - .with_span(Span::from(FILE)) + .with_span(Span::new(0, FILE.len())) .with_file(Arc::from(FILE)); + let expected = r#" | 1 | let a1: List = None; @@ -573,27 +535,22 @@ let x: u32 = Left( #[test] fn display_no_file() { - let error = Error::CannotParse("This error has no file".to_string()) - .with_span(Span::from(EMPTY_FILE)); + let error = + Error::CannotParse("This error has no file".to_string()).with_span(Span::new(0, 0)); let expected = "Cannot parse: This error has no file"; assert_eq!(&expected, &error.to_string()); - let error = Error::CannotParse("This error has no file".to_string()) - .with_span(Span::new(Position::new(1, 1), Position::new(2, 2))); + let error = + Error::CannotParse("This error has no file".to_string()).with_span(Span::new(5, 10)); assert_eq!(&expected, &error.to_string()); } #[test] fn display_empty_file() { let error = Error::CannotParse("This error has an empty file".to_string()) - .with_span(Span::from(EMPTY_FILE)) + .with_span(Span::new(0, 0)) .with_file(Arc::from(EMPTY_FILE)); let expected = "Cannot parse: This error has an empty file"; assert_eq!(&expected, &error.to_string()); - - let error = Error::CannotParse("This error has an empty file".to_string()) - .with_span(Span::new(Position::new(1, 1), Position::new(2, 2))) - .with_file(Arc::from(EMPTY_FILE)); - assert_eq!(&expected, &error.to_string()); } } diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..6a19c32 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,228 @@ +use chumsky::prelude::*; +use std::fmt; + +pub type Span = SimpleSpan; +pub type Spanned = (T, Span); + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Token<'src> { + // Keywords + Fn, + Let, + Type, + Mod, + Const, + Match, + + // Control symbols + Arrow, + Colon, + Semi, + Comma, + Eq, + FatArrow, + LParen, + RParen, + LBracket, + RBracket, + LBrace, + RBrace, + LAngle, + RAngle, + + // Number literals + DecLiteral(&'src str), + HexLiteral(&'src str), + BinLiteral(&'src str), + + // Boolean literal + Bool(bool), + + // Identifier + Ident(&'src str), + + // Jets, witnesses, and params + Jet(&'src str), + Witness(&'src str), + Param(&'src str), + + // Built-in types (List, Option, Either) + BuiltinType(&'src str), + + // Unsigned integer types + UnsignedType(&'src str), + + // Boolean type + BooleanType, + + // Built-in functions + BuiltinFn(&'src str), + + // Built-in aliases + BuiltinAlias(&'src str), + + // Comments and block comments + // + // We would discard them for the compiler, but they are needed, for example, for the formatter. + Comment, + BlockComment, +} + +impl<'src> fmt::Display for Token<'src> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Token::Fn => write!(f, "fn"), + Token::Let => write!(f, "let"), + Token::Type => write!(f, "type"), + Token::Mod => write!(f, "mod"), + Token::Const => write!(f, "const"), + Token::Match => write!(f, "match"), + + Token::Arrow => write!(f, "->"), + Token::Colon => write!(f, ":"), + Token::Semi => write!(f, ";"), + Token::Comma => write!(f, ","), + Token::Eq => write!(f, "="), + Token::FatArrow => write!(f, "=>"), + Token::LParen => write!(f, "("), + Token::RParen => write!(f, ")"), + Token::LBracket => write!(f, "["), + Token::RBracket => write!(f, "]"), + Token::LBrace => write!(f, "{{"), + Token::RBrace => write!(f, "}}"), + Token::LAngle => write!(f, "<"), + Token::RAngle => write!(f, ">"), + + Token::DecLiteral(s) => write!(f, "{}", s), + Token::HexLiteral(s) => write!(f, "0x{}", s), + Token::BinLiteral(s) => write!(f, "0b{}", s), + + Token::Ident(s) => write!(f, "{}", s), + + Token::Jet(s) => write!(f, "jet::{}", s), + Token::Witness(s) => write!(f, "witness::{}", s), + Token::Param(s) => write!(f, "param::{}", s), + + Token::BuiltinType(s) => write!(f, "{}", s), + Token::UnsignedType(s) => write!(f, "{}", s), + Token::BuiltinFn(s) => write!(f, "{}", s), + Token::BuiltinAlias(s) => write!(f, "{}", s), + + Token::BooleanType => write!(f, "bool"), + Token::Bool(b) => write!(f, "{}", b), + + Token::Comment => write!(f, "comment"), + Token::BlockComment => write!(f, "block_comment"), + } + } +} + +pub fn lexer<'src>( +) -> impl Parser<'src, &'src str, Vec>>, extra::Err>> { + let num = text::digits(10).to_slice().map(Token::DecLiteral); + let hex = just("0x") + .ignore_then(text::digits(16).to_slice()) + .map(Token::HexLiteral); + let bin = just("0b") + .ignore_then(text::digits(2).to_slice()) + .map(Token::BinLiteral); + + let macros = choice((just("assert!"), just("panic!"), just("dbg!"), just("list!"))) + .map(Token::BuiltinFn); + + let keyword = text::ident().map(|s| match s { + "fn" => Token::Fn, + "let" => Token::Let, + "type" => Token::Type, + "mod" => Token::Mod, + "const" => Token::Const, + "match" => Token::Match, + "true" => Token::Bool(true), + "false" => Token::Bool(false), + "List" | "Either" | "Option" => Token::BuiltinType(s), + "u1" | "u2" | "u4" | "u8" | "u16" | "u32" | "u64" | "u128" | "u256" => { + Token::UnsignedType(s) + } + "bool" => Token::BooleanType, + "unwrap_left" | "unwrap_right" | "array_fold" | "for_while" | "is_none" | "unwrap" + | "into" | "fold" => Token::BuiltinFn(s), + "Ctx8" | "Pubkey" | "Message64" | "Message" | "Signature" | "Scalar" | "Fe" | "Gej" + | "Ge" | "Point" | "Height" | "Time" | "Distance" | "Duration" | "Lock" | "Outpoint" + | "Confidential1" | "ExplicitAsset" | "Asset1" | "ExplicitAmount" | "Amount1" + | "ExplicitNonce" | "Nonce" | "TokenAmount1" => Token::BuiltinAlias(s), + _ => Token::Ident(s), + }); + + let jet = just("jet::") + .ignore_then(text::ident()) + .map(Token::Jet) + .labelled("jet"); + let witness = just("witness::") + .labelled("witness") + .ignore_then(text::ident()) + .map(Token::Witness); + let param = just("param::") + .ignore_then(text::ident()) + .map(Token::Param) + .labelled("param"); + + let op = choice(( + just("->").to(Token::Arrow), + just("=>").to(Token::FatArrow), + just("=").to(Token::Eq), + just(":").to(Token::Colon), + just(";").to(Token::Semi), + just(",").to(Token::Comma), + just("(").to(Token::LParen), + just(")").to(Token::RParen), + just("[").to(Token::LBracket), + just("]").to(Token::RBracket), + just("{").to(Token::LBrace), + just("}").to(Token::RBrace), + just("<").to(Token::LAngle), + just(">").to(Token::RAngle), + )); + + let comment = just("//") + .ignore_then(any().and_is(just('\n').not()).repeated()) + .to(Token::Comment); + + let block_comment = just("/*") + .ignore_then(just("*/").not().then(any()).repeated()) + .then_ignore(just("*/")) + .to(Token::BlockComment); + + let token = choice(( + comment, + block_comment, + jet, + witness, + param, + macros, + keyword, + hex, + bin, + num, + op, + )); + + token + .map_with(|tok, e| (tok, e.span())) + .padded() + .recover_with(skip_then_retry_until(any().ignored(), end())) + .repeated() + .collect() +} + +#[test] +fn lexer_test() { + use chumsky::prelude::*; + + // Check if the lexer parses the example file without errors. + let src = include_str!("../examples/last_will.simf"); + + let (tokens, lex_errs) = lexer().parse(src).into_output_errors(); + let _ = tokens.unwrap(); + + assert!(lex_errs.is_empty()); +} diff --git a/src/lib.rs b/src/lib.rs index ad5aadf..d3665b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ pub mod debug; pub mod dummy_env; pub mod error; pub mod jet; +pub mod lexer; pub mod named; pub mod num; pub mod parse; @@ -631,7 +632,6 @@ fn main() { } #[test] - #[ignore] fn fuzz_slow_unit_1() { parse::Program::parse_from_str("fn fnnfn(MMet:(((sssss,((((((sssss,ssssss,ss,((((((sssss,ss,((((((sssss,ssssss,ss,((((((sssss,ssssss,((((((sssss,sssssssss,(((((((sssss,sssssssss,(((((ssss,((((((sssss,sssssssss,(((((((sssss,ssss,((((((sssss,ss,((((((sssss,ssssss,ss,((((((sssss,ssssss,((((((sssss,sssssssss,(((((((sssss,sssssssss,(((((ssss,((((((sssss,sssssssss,(((((((sssss,sssssssssssss,(((((((((((u|(").unwrap_err(); } diff --git a/src/parse.rs b/src/parse.rs index cd98faa..277942f 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -6,25 +6,22 @@ use std::num::NonZeroUsize; use std::str::FromStr; use std::sync::Arc; +use chumsky::input::ValueInput; +use chumsky::prelude::*; use either::Either; -use itertools::Itertools; use miniscript::iter::{Tree, TreeLike}; -use pest::Parser; -use pest_derive::Parser; -use crate::error::{Error, RichError, Span, WithFile, WithSpan}; +use crate::error::RichError; +use crate::error::{Span, Spanned}; use crate::impl_eq_hash; +use crate::lexer::Token; use crate::num::NonZeroPow2Usize; use crate::pattern::Pattern; use crate::str::{ AliasName, Binary, Decimal, FunctionName, Hexadecimal, Identifier, JetName, ModuleName, WitnessName, }; -use crate::types::{AliasedType, BuiltinAlias, TypeConstructible, UIntType}; - -#[derive(Parser)] -#[grammar = "minimal.pest"] -struct IdentParser; +use crate::types::{AliasedType, BuiltinAlias, TypeConstructible}; /// A program is a sequence of items. #[derive(Clone, Debug)] @@ -57,7 +54,7 @@ pub enum Item { /// Definition of a function. #[derive(Clone, Debug)] pub struct Function { - name: FunctionName, + name: Spanned, params: Arc<[FunctionParam]>, ret: Option, body: Expression, @@ -67,6 +64,11 @@ pub struct Function { impl Function { /// Access the name of the function. pub fn name(&self) -> &FunctionName { + &self.name.0 + } + + /// Access the name of the function and it's span. + pub fn spanned_name(&self) -> &Spanned { &self.name } @@ -99,13 +101,17 @@ impl_eq_hash!(Function; name, params, ret, body); #[derive(Clone, Debug, Eq, PartialEq, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct FunctionParam { - identifier: Identifier, + identifier: Spanned, ty: AliasedType, } impl FunctionParam { /// Access the identifier of the parameter. pub fn identifier(&self) -> &Identifier { + &self.identifier.0 + } + + pub fn spanned_identifier(&self) -> &Spanned { &self.identifier } @@ -276,6 +282,16 @@ impl Expression { _ => self, } } + + pub fn empty(span: Span) -> Self { + Self { + inner: ExpressionInner::Single(SingleExpression { + inner: SingleExpressionInner::Tuple(Arc::new([])), + span, + }), + span, + } + } } impl_eq_hash!(Expression; inner); @@ -507,7 +523,7 @@ impl Module { #[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct ModuleAssignment { - name: WitnessName, + name: Spanned, ty: AliasedType, expression: Expression, span: Span, @@ -516,7 +532,7 @@ pub struct ModuleAssignment { impl ModuleAssignment { /// Access the assigned witness name. pub fn name(&self) -> &WitnessName { - &self.name + &self.name.0 } /// Access the assigned witness type. @@ -830,27 +846,17 @@ impl fmt::Display for MatchPattern { } } -/// Trait for types that can be parsed from a PEST pair. -trait PestParse: Sized { - /// Expected rule for parsing the type. - const RULE: Rule; - - /// Parse a value of the type from a PEST pair. - /// - /// # Panics - /// - /// The rule of the pair is not the expected rule ([`Self::RULE`]). - fn parse(pair: pest::iterators::Pair) -> Result; -} - macro_rules! impl_parse_wrapped_string { ($wrapper: ident, $rule: ident) => { - impl PestParse for $wrapper { - const RULE: Rule = Rule::$rule; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - Ok(Self::from_str_unchecked(pair.as_str())) + impl ChumskyParse for $wrapper { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + select! { + Token::Ident(ident) => Self::from_str_unchecked(ident) + } } } }; @@ -868,756 +874,876 @@ pub trait ParseFromStr: Sized { fn parse_from_str(s: &str) -> Result; } -impl ParseFromStr for A { - fn parse_from_str(s: &str) -> Result { - let mut pairs = IdentParser::parse(A::RULE, s) - .map_err(RichError::from) - .with_file(s)?; - let pair = pairs.next().unwrap(); - A::parse(pair).with_file(s) - } -} - -impl PestParse for Program { - const RULE: Rule = Rule::program; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let items = pair - .into_inner() - .filter_map(|pair| match pair.as_rule() { - Rule::item => Some(Item::parse(pair)), - _ => None, - }) - .collect::, RichError>>()?; - Ok(Program { items, span }) - } +/// Trait for generating parsers of themselves. +/// +/// Replacement for previous `PestParse` trait. +trait ChumskyParse: Sized { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>; } -impl PestParse for Item { - const RULE: Rule = Rule::item; +// Error handling here is a mess, because [`ParseFromStr`] only returning one error +impl ParseFromStr for A { + fn parse_from_str(s: &str) -> Result { + let (tokens, lex_errs) = crate::lexer::lexer().parse(s).into_output_errors(); + + let tokens = if let Some(tok) = tokens { + tok.into_iter() + .map(|(tok, span)| (tok, Span::from(span))) + .filter(|(tok, _)| !matches!(tok, Token::Comment | Token::BlockComment)) + .collect::>() + } else { + return Err({ + let err = lex_errs + .first() + .map(|err| (err.reason().to_string(), err.span())) + .unwrap(); + RichError::new(crate::error::Error::CannotParse(err.0), (*err.1).into()) + }); + }; - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let pair = pair.into_inner().next().unwrap(); - match pair.as_rule() { - Rule::type_alias => TypeAlias::parse(pair).map(Item::TypeAlias), - Rule::function => Function::parse(pair).map(Item::Function), - _ => Ok(Self::Module), + let (ty, parse_errs) = A::parser() + .map_with(|ast, e| (ast, e.span())) + .parse( + tokens + .as_slice() + .map((s.len()..s.len()).into(), |(t, s)| (t, s)), + ) + .into_output_errors(); + + if parse_errs.is_empty() { + Ok(ty + .ok_or(RichError::new( + crate::error::Error::CannotParse(String::new()), + Span::new(0, 0), + ))? + .0) + } else { + let err = parse_errs + .first() + .map(|err| (dbg!(err.reason().to_string()), err.span())) + .unwrap(); + Err(RichError::new( + crate::error::Error::CannotParse(err.0), + *err.1, + )) } } } -impl PestParse for Function { - const RULE: Rule = Rule::function; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let mut it = pair.into_inner(); - let _fn_keyword = it.next().unwrap(); - let name = FunctionName::parse(it.next().unwrap())?; - let params = { - let pair = it.next().unwrap(); - debug_assert!(matches!(pair.as_rule(), Rule::function_params)); - pair.into_inner() - .map(FunctionParam::parse) - .collect::, RichError>>()? - }; - let ret = match it.peek().unwrap().as_rule() { - Rule::function_return => { - let pair = it.next().unwrap(); - debug_assert!(matches!(pair.as_rule(), Rule::function_return)); - let pair = pair.into_inner().next().unwrap(); - let ty = AliasedType::parse(pair)?; - Some(ty) - } - _ => None, +fn parse_token_with_recovery<'tokens, 'src: 'tokens, I>( + tok: Token<'src>, +) -> impl Parser<'tokens, I, Token<'src>, extra::Err, Span>>> + Clone +where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, +{ + just(tok.clone()).recover_with(via_parser(empty().to(tok))) +} + +impl ChumskyParse for AliasedType { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let atom = select! { + Token::Ident(ident) => AliasedType::alias(AliasName::from_str_unchecked(ident)), + Token::BuiltinAlias(ident) => AliasedType::builtin(BuiltinAlias::from_str(ident).unwrap()), + Token::BooleanType => AliasedType::boolean(), + Token::UnsignedType(int) => { + match int + { + "u1" => AliasedType::u1(), + "u2" => AliasedType::u2(), + "u4" => AliasedType::u4(), + "u8" => AliasedType::u8(), + "u16" => AliasedType::u16(), + "u32" => AliasedType::u32(), + "u64" => AliasedType::u64(), + "u128" => AliasedType::u128(), + "u256" => AliasedType::u256(), + _ => unreachable!("Corrupt grammar") + } + }, }; - let body = Expression::parse(it.next().unwrap())?; - Ok(Self { - name, - params, - ret, - body, - span, + let angle_recovery = via_parser(nested_delimiters( + Token::LAngle, + Token::RAngle, + [ + (Token::LParen, Token::RParen), + (Token::LBracket, Token::RBracket), + ], + |_| AliasedType::alias(AliasName::from_str_unchecked("error")), + )); + + let bracket_recovery = via_parser(nested_delimiters( + Token::LBracket, + Token::RBracket, + [ + (Token::LParen, Token::RParen), + (Token::LAngle, Token::RAngle), + ], + |_| AliasedType::alias(AliasName::from_str_unchecked("error")), + )); + + let num = select! { Token::DecLiteral(i) => i } + .labelled("decimal number") + .recover_with(via_parser( + none_of([Token::RAngle, Token::RBracket]) + .ignored() + .or(empty()) + .to("0"), + )); + + recursive(|ty| { + let args = ty + .clone() + .then_ignore(parse_token_with_recovery(Token::Comma)) + .then(ty.clone()) + .delimited_by(just(Token::LAngle), just(Token::RAngle)); + + let sum_type = just(Token::BuiltinType("Either")) + .ignore_then(args) + .map(|(left, right)| AliasedType::either(left, right)) + .recover_with(angle_recovery.clone()) + .labelled("Either"); + + let option_type = just(Token::BuiltinType("Option")) + .ignore_then( + ty.clone() + .delimited_by(just(Token::LAngle), just(Token::RAngle)) + .recover_with(angle_recovery), + ) + .map(AliasedType::option) + .labelled("Option"); + + let tuple = ty + .clone() + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .recover_with(via_parser(nested_delimiters( + Token::LParen, + Token::RParen, + [ + (Token::LBracket, Token::RBracket), + (Token::LAngle, Token::RAngle), + ], + |_| vec![], + ))) + .map(|s: Vec| AliasedType::tuple(s)) + .labelled("tuple"); + + let array = ty + .clone() + .then_ignore(parse_token_with_recovery(Token::Semi)) + .then(num.clone()) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .map(|(ty, size)| AliasedType::array(ty, usize::from_str(size).unwrap_or_default())) + .recover_with(bracket_recovery) + .labelled("array"); + + let other_angle_recovery = via_parser( + any() + .filter(|t| !matches!(t, Token::RAngle | Token::RParen | Token::RBracket)) + .repeated() + .ignore_then(just(Token::RAngle).or_not()) + .to(( + AliasedType::alias(AliasName::from_str_unchecked("error")), + NonZeroPow2Usize::TWO, + )), + ); + + let list = just(Token::BuiltinType("List")) + .ignore_then( + ty.then_ignore(parse_token_with_recovery(Token::Comma)) + .then(num.clone().validate(|num, e, emit| { + match NonZeroPow2Usize::from_str(num) { + Ok(number) => number, + Err(err) => { + emit.emit(Rich::custom( + e.span(), + format!("Failed to parse List size: {}", err), + )); + // fallback to default value + NonZeroPow2Usize::TWO + } + } + })) + .delimited_by(just(Token::LAngle), just(Token::RAngle)) + .recover_with(other_angle_recovery), + ) + .map(|(ty, size)| AliasedType::list(ty, size)) + .labelled("List"); + + choice((sum_type, option_type, tuple, array, list, atom)) + .map_with(|inner, _| inner) + .labelled("type") }) } } -impl PestParse for FunctionParam { - const RULE: Rule = Rule::typed_identifier; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let mut it = pair.into_inner(); - let identifier = Identifier::parse(it.next().unwrap())?; - let ty = AliasedType::parse(it.next().unwrap())?; - Ok(Self { identifier, ty }) +impl ChumskyParse for Program { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + Item::parser() + .repeated() + .collect::>() + .map_with(|items, e| Program { + items: Arc::from(items), + span: e.span(), + }) } } -impl PestParse for Statement { - const RULE: Rule = Rule::statement; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let inner_pair = pair.into_inner().next().unwrap(); - match inner_pair.as_rule() { - Rule::assignment => Assignment::parse(inner_pair).map(Statement::Assignment), - Rule::expression => Expression::parse(inner_pair).map(Statement::Expression), - _ => unreachable!("Corrupt grammar"), - } +impl ChumskyParse for Item { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let func_parser = Function::parser().map(Item::Function); + let type_parser = TypeAlias::parser().map(Item::TypeAlias); + let mod_parser = Module::parser().map(|_| Item::Module); + + choice((func_parser, type_parser, mod_parser)) + } +} + +impl ChumskyParse for Function { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let params = FunctionParam::parser() + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect::>() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .recover_with(via_parser(nested_delimiters( + Token::LParen, + Token::RParen, + [ + (Token::LBracket, Token::RBracket), + (Token::LAngle, Token::RAngle), + ], + |_| Vec::new(), + ))) + .map(Arc::from); + + let ret = just(Token::Arrow) + .ignore_then(AliasedType::parser()) + .or_not(); + + let body = Expression::parser(); + + just(Token::Fn) + .ignore_then(FunctionName::parser().map_with(|name, e| (name, e.span()))) + .then(params) + .then(ret) + .then(body) + .map_with(|(((name, params), ret), body), e| Self { + name, + params, + ret, + body, + span: e.span(), + }) } } -impl PestParse for Pattern { - const RULE: Rule = Rule::pattern; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let pair = PatternPair(pair); - let mut output = vec![]; - - for data in pair.post_order_iter() { - match data.node.0.as_rule() { - Rule::pattern => {} - Rule::variable_pattern => { - let identifier = Identifier::parse(data.node.0.into_inner().next().unwrap())?; - output.push(Pattern::Identifier(identifier)); - } - Rule::ignore_pattern => { - output.push(Pattern::Ignore); - } - Rule::tuple_pattern => { - let size = data.node.n_children(); - let elements = output.split_off(output.len() - size); - debug_assert_eq!(elements.len(), size); - output.push(Pattern::tuple(elements)); - } - Rule::array_pattern => { - let size = data.node.n_children(); - let elements = output.split_off(output.len() - size); - debug_assert_eq!(elements.len(), size); - output.push(Pattern::array(elements)); - } - _ => unreachable!("Corrupt grammar"), - } +impl ChumskyParse for FunctionParam { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let identifier = select! { + Token::Ident(name) => Identifier::from_str_unchecked(name), } + .map_with(|ident, e| (ident, e.span())); - debug_assert!(output.len() == 1); - Ok(output.pop().unwrap()) - } -} - -impl PestParse for Assignment { - const RULE: Rule = Rule::assignment; + let ty = AliasedType::parser(); - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let mut it = pair.into_inner(); - let _let_keyword = it.next().unwrap(); - let pattern = Pattern::parse(it.next().unwrap())?; - let ty = AliasedType::parse(it.next().unwrap())?; - let expression = Expression::parse(it.next().unwrap())?; - Ok(Assignment { - pattern, - ty, - expression, - span, - }) - } -} - -impl PestParse for Call { - const RULE: Rule = Rule::call_expr; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let mut it = pair.into_inner(); - let name = CallName::parse(it.next().unwrap())?; - let args = { - let pair = it.next().unwrap(); - debug_assert!(matches!(pair.as_rule(), Rule::call_args)); - pair.into_inner() - .map(Expression::parse) - .collect::, RichError>>()? - }; - - Ok(Self { name, args, span }) - } -} - -impl PestParse for CallName { - const RULE: Rule = Rule::call_name; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let pair = pair.into_inner().next().unwrap(); - match pair.as_rule() { - Rule::jet => JetName::parse(pair).map(Self::Jet), - Rule::unwrap_left => { - let inner = pair.into_inner().next().unwrap(); - AliasedType::parse(inner).map(Self::UnwrapLeft) - } - Rule::unwrap_right => { - let inner = pair.into_inner().next().unwrap(); - AliasedType::parse(inner).map(Self::UnwrapRight) - } - Rule::is_none => { - let inner = pair.into_inner().next().unwrap(); - AliasedType::parse(inner).map(Self::IsNone) - } - Rule::unwrap => Ok(Self::Unwrap), - Rule::assert => Ok(Self::Assert), - Rule::panic => Ok(Self::Panic), - Rule::debug => Ok(Self::Debug), - Rule::type_cast => { - let inner = pair.into_inner().next().unwrap(); - AliasedType::parse(inner).map(Self::TypeCast) - } - Rule::fold => { - let mut it = pair.into_inner(); - let name = FunctionName::parse(it.next().unwrap())?; - let bound = NonZeroPow2Usize::parse(it.next().unwrap())?; - Ok(Self::Fold(name, bound)) - } - Rule::array_fold => { - let mut it = pair.into_inner(); - let name = FunctionName::parse(it.next().unwrap())?; - let non_zero_usize_parse = - |pair: pest::iterators::Pair| -> Result { - let size = pair.as_str().parse::().with_span(&pair)?; - NonZeroUsize::new(size) - .ok_or(Error::ArraySizeNonZero(size)) - .with_span(&pair) - }; - let size = non_zero_usize_parse(it.next().unwrap())?; - Ok(Self::ArrayFold(name, size)) - } - Rule::for_while => { - let mut it = pair.into_inner(); - let name = FunctionName::parse(it.next().unwrap())?; - Ok(Self::ForWhile(name)) - } - Rule::function_name => FunctionName::parse(pair).map(Self::Custom), - _ => panic!("Corrupt grammar"), - } + identifier + .then_ignore(just(Token::Colon)) + .then(ty) + .map(|(identifier, ty)| Self { identifier, ty }) } } -impl PestParse for JetName { - const RULE: Rule = Rule::jet; +impl Statement { + fn parser<'tokens, 'src: 'tokens, I, E>( + expr: E, + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + E: Parser<'tokens, I, Expression, extra::Err, Span>>> + + Clone + + 'tokens, + { + let assignment = Assignment::parser(expr.clone()).map(Statement::Assignment); - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let jet_name = pair.as_str().strip_prefix("jet::").unwrap(); - Ok(Self::from_str_unchecked(jet_name)) - } -} - -impl PestParse for TypeAlias { - const RULE: Rule = Rule::type_alias; + let expression = expr.map(Statement::Expression); - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let mut it = pair.into_inner(); - let _type_keyword = it.next().unwrap(); - let name = AliasName::parse(it.next().unwrap())?; - let ty = AliasedType::parse(it.next().unwrap())?; - Ok(Self { name, ty, span }) + choice((assignment, expression)) } } - -impl PestParse for Expression { - const RULE: Rule = Rule::expression; - - fn parse(pair: pest::iterators::Pair) -> Result { - let span = Span::from(&pair); - let pair = match pair.as_rule() { - Rule::expression => pair.into_inner().next().unwrap(), - Rule::block_expression | Rule::single_expression => pair, - _ => unreachable!("Corrupt grammar"), - }; - - let inner = match pair.as_rule() { - Rule::block_expression => { - let mut it = pair.into_inner().peekable(); - let statements = it - .peeking_take_while(|pair| matches!(pair.as_rule(), Rule::statement)) - .map(Statement::parse) - .collect::, RichError>>()?; - let expression = it - .next() - .map(|pair| Expression::parse(pair).map(Arc::new)) - .transpose()?; - ExpressionInner::Block(statements, expression) - } - Rule::single_expression => ExpressionInner::Single(SingleExpression::parse(pair)?), - _ => unreachable!("Corrupt grammar"), - }; - - Ok(Expression { inner, span }) +impl Assignment { + fn parser<'tokens, 'src: 'tokens, I, E>( + expr: E, + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + E: Parser<'tokens, I, Expression, extra::Err, Span>>> + + Clone + + 'tokens, + { + just(Token::Let) + .ignore_then(Pattern::parser()) + .then_ignore(parse_token_with_recovery(Token::Colon)) + .then(AliasedType::parser()) + .then_ignore(parse_token_with_recovery(Token::Eq)) + .then(expr) + .map_with(|((pattern, ty), expression), e| Self { + pattern, + ty, + expression, + span: e.span(), + }) } } -impl PestParse for SingleExpression { - const RULE: Rule = Rule::single_expression; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); +impl ChumskyParse for Pattern { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + recursive(|pat| { + let variable = select! { Token::Ident(name) => Identifier::from_str_unchecked(name) } + .map(Pattern::Identifier); - let span = Span::from(&pair); - let inner_pair = pair.into_inner().next().unwrap(); + let ignore = select! { + Token::Ident("_") => Pattern::Ignore, + }; - let inner = match inner_pair.as_rule() { - Rule::left_expr => { - let l = inner_pair.into_inner().next().unwrap(); - Expression::parse(l) - .map(Arc::new) - .map(Either::Left) - .map(SingleExpressionInner::Either)? - } - Rule::right_expr => { - let r = inner_pair.into_inner().next().unwrap(); - Expression::parse(r) - .map(Arc::new) - .map(Either::Right) - .map(SingleExpressionInner::Either)? - } - Rule::none_expr => SingleExpressionInner::Option(None), - Rule::some_expr => { - let r = inner_pair.into_inner().next().unwrap(); - Expression::parse(r) - .map(Arc::new) - .map(Some) - .map(SingleExpressionInner::Option)? - } - Rule::false_expr => SingleExpressionInner::Boolean(false), - Rule::true_expr => SingleExpressionInner::Boolean(true), - Rule::call_expr => SingleExpressionInner::Call(Call::parse(inner_pair)?), - Rule::bin_literal => Binary::parse(inner_pair).map(SingleExpressionInner::Binary)?, - Rule::hex_literal => { - Hexadecimal::parse(inner_pair).map(SingleExpressionInner::Hexadecimal)? - } - Rule::dec_literal => Decimal::parse(inner_pair).map(SingleExpressionInner::Decimal)?, - Rule::witness_expr => SingleExpressionInner::Witness(WitnessName::parse( - inner_pair.into_inner().next().unwrap(), - )?), - Rule::param_expr => SingleExpressionInner::Parameter(WitnessName::parse( - inner_pair.into_inner().next().unwrap(), - )?), - Rule::variable_expr => { - let identifier_pair = inner_pair.into_inner().next().unwrap(); - SingleExpressionInner::Variable(Identifier::parse(identifier_pair)?) - } - Rule::expression => { - SingleExpressionInner::Expression(Expression::parse(inner_pair).map(Arc::new)?) - } - Rule::match_expr => Match::parse(inner_pair).map(SingleExpressionInner::Match)?, - Rule::tuple_expr => inner_pair + let tuple = pat .clone() - .into_inner() - .map(Expression::parse) - .collect::, _>>() - .map(SingleExpressionInner::Tuple)?, - Rule::array_expr => inner_pair + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect::>() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .map(Pattern::tuple) + .recover_with(via_parser(nested_delimiters( + Token::LParen, + Token::RParen, + [(Token::LBracket, Token::RBracket)], + |_| Pattern::tuple(Vec::new()), + ))); + + let array = pat .clone() - .into_inner() - .map(Expression::parse) - .collect::, _>>() - .map(SingleExpressionInner::Array)?, - Rule::list_expr => { - let elements = inner_pair - .into_inner() - .map(|inner| Expression::parse(inner)) - .collect::, _>>()?; - SingleExpressionInner::List(elements) - } - _ => unreachable!("Corrupt grammar"), - }; - - Ok(SingleExpression { inner, span }) - } -} - -impl PestParse for Decimal { - const RULE: Rule = Rule::dec_literal; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let decimal = pair.as_str().replace('_', ""); - Ok(Self::from_str_unchecked(decimal.as_str())) - } -} - -impl PestParse for Binary { - const RULE: Rule = Rule::bin_literal; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let binary = pair.as_str().strip_prefix("0b").unwrap().replace('_', ""); - Ok(Self::from_str_unchecked(binary.as_str())) + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect::>() + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .map(Pattern::array) + .recover_with(via_parser(nested_delimiters( + Token::LBracket, + Token::RBracket, + [(Token::LParen, Token::RParen)], + |_| Pattern::array(Vec::new()), + ))); + + choice((ignore, variable, tuple, array)).labelled("pattern") + }) } } -impl PestParse for Hexadecimal { - const RULE: Rule = Rule::hex_literal; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let hexadecimal = pair.as_str().strip_prefix("0x").unwrap().replace('_', ""); - Ok(Self::from_str_unchecked(hexadecimal.as_str())) +impl Call { + pub fn parser<'tokens, 'src: 'tokens, I, E>( + expr: E, + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + E: Parser<'tokens, I, Expression, extra::Err, Span>>> + + Clone + + 'tokens, + { + let args = expr + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect::>() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .recover_with(via_parser(nested_delimiters( + Token::LParen, + Token::RParen, + [(Token::LBracket, Token::RBracket)], + |_| Vec::new(), + ))) + .map(Arc::from) + .labelled("call arguments"); + + CallName::parser() + .then(args) + .map_with(|(name, args), e| Self { + name, + args, + span: e.span(), + }) } } -impl PestParse for Match { - const RULE: Rule = Rule::match_expr; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let mut it = pair.into_inner(); - let _match_keyword = it.next().unwrap(); - let scrutinee_pair = it.next().unwrap(); - let scrutinee = Expression::parse(scrutinee_pair.clone()).map(Arc::new)?; - let first = MatchArm::parse(it.next().unwrap())?; - let second = MatchArm::parse(it.next().unwrap())?; +impl ChumskyParse for CallName { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let turbofish_start = just(Token::Colon) + .then(just(Token::Colon)) + .then(just(Token::LAngle)) + .ignored(); + + let generics_close = just(Token::RAngle); + + let type_cast = just(Token::LAngle) + .ignore_then(AliasedType::parser()) + .then_ignore(generics_close.clone()) + .then_ignore(just(Token::Colon).then(just(Token::Colon))) + .then_ignore(just(Token::BuiltinFn("into"))) + .map(CallName::TypeCast); + + let builtin_generic_ty = |name: &'static str, ctor: fn(AliasedType) -> Self| { + just(Token::BuiltinFn(name)) + .ignore_then(turbofish_start.clone()) + .ignore_then(AliasedType::parser()) + .then_ignore(generics_close.clone()) + .map(ctor) + }; - let (left, right) = match (&first.pattern, &second.pattern) { - (MatchPattern::Left(..), MatchPattern::Right(..)) => (first, second), - (MatchPattern::Right(..), MatchPattern::Left(..)) => (second, first), - (MatchPattern::None, MatchPattern::Some(..)) => (first, second), - (MatchPattern::False, MatchPattern::True) => (first, second), - (MatchPattern::Some(..), MatchPattern::None) => (second, first), - (MatchPattern::True, MatchPattern::False) => (second, first), - (p1, p2) => { - return Err(Error::IncompatibleMatchArms(p1.clone(), p2.clone())).with_span(span) - } + let unwrap_left = builtin_generic_ty("unwrap_left", CallName::UnwrapLeft); + let unwrap_right = builtin_generic_ty("unwrap_right", CallName::UnwrapRight); + let is_none = builtin_generic_ty("is_none", CallName::IsNone); + + let fold = just(Token::BuiltinFn("fold")) + .ignore_then(turbofish_start.clone()) + .ignore_then(FunctionName::parser()) + .then_ignore(just(Token::Comma)) + .then(select! { Token::DecLiteral(s) => s }) + .then_ignore(generics_close.clone()) + .try_map(|(func, bound_str), span| { + let bound = NonZeroPow2Usize::from_str(bound_str) + .map_err(|e| Rich::custom(span, format!("Invalid fold bound: {}", e)))?; + Ok(CallName::Fold(func, bound)) + }); + + let array_fold = just(Token::BuiltinFn("array_fold")) + .ignore_then(turbofish_start.clone()) + .ignore_then(FunctionName::parser()) + .then_ignore(just(Token::Comma)) + .then(select! { Token::DecLiteral(s) => s }) + .then_ignore(generics_close.clone()) + .try_map(|(func, size_str), span| { + let size_val = size_str + .parse::() + .map_err(|_| Rich::custom(span, "Invalid number"))?; + let size = NonZeroUsize::new(size_val) + .ok_or_else(|| Rich::custom(span, "Array fold size must be non-zero"))?; + Ok(CallName::ArrayFold(func, size)) + }); + + let for_while = just(Token::BuiltinFn("for_while")) + .ignore_then(turbofish_start.clone()) + .ignore_then(FunctionName::parser()) + .then_ignore(generics_close.clone()) + .map(CallName::ForWhile); + + let simple_builtins = select! { + Token::BuiltinFn("unwrap") => CallName::Unwrap, + Token::BuiltinFn("assert!") => CallName::Assert, + Token::BuiltinFn("panic!") => CallName::Panic, + Token::BuiltinFn("dbg!") => CallName::Debug, }; - Ok(Self { - scrutinee, - left, - right, - span, - }) + let jet = select! { Token::Jet(s) => JetName::from_str_unchecked(s) }.map(CallName::Jet); + + let custom_func = FunctionName::parser().map(CallName::Custom); + + choice(( + type_cast, + unwrap_left, + unwrap_right, + is_none, + fold, + array_fold, + for_while, + simple_builtins, + jet, + custom_func, + )) + } +} + +impl ChumskyParse for TypeAlias { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let name = AliasName::parser().map_with(|name, e| (name, e.span())); + + just(Token::Type) + .ignore_then(name) + .then_ignore(parse_token_with_recovery(Token::Eq)) + .then(AliasedType::parser()) + .then_ignore(just(Token::Semi)) + .map_with(|(name, ty), e| Self { + name: name.0, + ty, + span: e.span(), + }) } } +impl ChumskyParse for Expression { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + recursive(|expr| { + let block = { + let statement = Statement::parser(expr.clone()).then_ignore(just(Token::Semi)); + + let block_recovery = nested_delimiters( + Token::LBrace, + Token::RBrace, + [ + (Token::LParen, Token::RParen), + (Token::LBracket, Token::RBracket), + ], + |span| Expression::empty(span).inner().clone(), + ); + + let statements = statement + .repeated() + .collect::>() + .map(Arc::from) + .recover_with(skip_then_retry_until( + block_recovery.ignored().or(any().ignored()), + one_of([Token::Semi, Token::RParen, Token::RBracket, Token::RBrace]) + .ignored(), + )); + + let final_expr = expr.clone().map(Arc::new).or_not(); -impl PestParse for MatchArm { - const RULE: Rule = Rule::match_arm; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let mut it = pair.into_inner(); - let pattern = MatchPattern::parse(it.next().unwrap())?; - let expression = Expression::parse(it.next().unwrap()).map(Arc::new)?; - Ok(MatchArm { - pattern, - expression, + statements + .then(final_expr) + .delimited_by(just(Token::LBrace), just(Token::RBrace)) + .map(|(stmts, end_expr)| ExpressionInner::Block(stmts, end_expr)) + }; + + let single = SingleExpression::parser(expr.clone()).map(ExpressionInner::Single); + + choice((block, single)) + .map_with(|inner, e| Expression { + inner, + span: e.span(), + }) + .labelled("expression") }) } } -impl PestParse for MatchPattern { - const RULE: Rule = Rule::match_pattern; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let pair = pair.into_inner().next().unwrap(); - let ret = match pair.as_rule() { - rule @ (Rule::left_pattern | Rule::right_pattern | Rule::some_pattern) => { - let mut it = pair.into_inner(); - let identifier = Identifier::parse(it.next().unwrap())?; - let ty = AliasedType::parse(it.next().unwrap())?; - - match rule { - Rule::left_pattern => MatchPattern::Left(identifier, ty), - Rule::right_pattern => MatchPattern::Right(identifier, ty), - Rule::some_pattern => MatchPattern::Some(identifier, ty), - _ => unreachable!("Covered by outer match"), - } - } - Rule::none_pattern => MatchPattern::None, - Rule::false_pattern => MatchPattern::False, - Rule::true_pattern => MatchPattern::True, - _ => unreachable!("Corrupt grammar"), +impl SingleExpression { + pub fn parser<'tokens, 'src: 'tokens, I, E>( + expr: E, + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + E: Parser<'tokens, I, Expression, extra::Err, Span>>> + + Clone + + 'tokens, + { + let wrapper = |name: &'static str| { + select! { Token::Ident(i) if i == name => i }.ignore_then( + expr.clone() + .delimited_by(just(Token::LParen), just(Token::RParen)), + ) }; - Ok(ret) - } -} -impl PestParse for AliasedType { - const RULE: Rule = Rule::ty; - - fn parse(pair: pest::iterators::Pair) -> Result { - enum Item { - Type(AliasedType), - Size(usize), - Bound(NonZeroPow2Usize), - } - - impl Item { - fn unwrap_type(self) -> AliasedType { - match self { - Item::Type(ty) => ty, - _ => panic!("Not a type"), - } - } + let left = + wrapper("Left").map(|e| SingleExpressionInner::Either(Either::Left(Arc::new(e)))); + + let right = + wrapper("Right").map(|e| SingleExpressionInner::Either(Either::Right(Arc::new(e)))); + + let some = wrapper("Some").map(|e| SingleExpressionInner::Option(Some(Arc::new(e)))); + + let none = select! { Token::Ident("None") => SingleExpressionInner::Option(None) }; + + let boolean = select! { Token::Bool(b) => SingleExpressionInner::Boolean(b) }; + + let comma_separated = expr + .clone() + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect::>(); + + let array = comma_separated + .clone() + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .map(|es| SingleExpressionInner::Array(Arc::from(es))); + + let list = just(Token::BuiltinFn("list!")) + .ignore_then( + comma_separated + .clone() + .delimited_by(just(Token::LBracket), just(Token::RBracket)), + ) + .map(|es| SingleExpressionInner::List(Arc::from(es))); + + let tuple = expr + .clone() + .separated_by(just(Token::Comma)) + .collect::>() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .map(|es| SingleExpressionInner::Tuple(Arc::from(es))); + + let literal = select! { + Token::DecLiteral(s) => SingleExpressionInner::Decimal(Decimal::from_str_unchecked(s.replace('_', "").as_str())), + Token::HexLiteral(s) => SingleExpressionInner::Hexadecimal(Hexadecimal::from_str_unchecked(s.replace('_', "").as_str())), + Token::BinLiteral(s) => SingleExpressionInner::Binary(Binary::from_str_unchecked(s.replace('_', "").as_str())), + Token::Witness(s) => SingleExpressionInner::Witness(WitnessName::from_str_unchecked(s)), + Token::Param(s) => SingleExpressionInner::Parameter(WitnessName::from_str_unchecked(s)), + }; - fn unwrap_size(self) -> usize { - match self { - Item::Size(size) => size, - _ => panic!("Not a size"), - } - } + let call = Call::parser(expr.clone()).map(SingleExpressionInner::Call); - fn unwrap_bound(self) -> NonZeroPow2Usize { - match self { - Item::Bound(size) => size, - _ => panic!("Not a bound"), - } - } - } + let match_expr = Match::parser(expr.clone()).map(SingleExpressionInner::Match); - assert!(matches!(pair.as_rule(), Self::RULE)); - let pair = TyPair(pair); - let mut output = vec![]; + let variable = Identifier::parser().map(SingleExpressionInner::Variable); - for data in pair.post_order_iter() { - match data.node.0.as_rule() { - Rule::alias_name => { - let name = AliasName::parse(data.node.0)?; - output.push(Item::Type(AliasedType::alias(name))); - } - Rule::builtin_alias => { - let builtin = BuiltinAlias::parse(data.node.0)?; - output.push(Item::Type(AliasedType::builtin(builtin))); - } - Rule::unsigned_type => { - let uint_ty = UIntType::parse(data.node.0)?; - output.push(Item::Type(AliasedType::from(uint_ty))); - } - Rule::sum_type => { - let r = output.pop().unwrap().unwrap_type(); - let l = output.pop().unwrap().unwrap_type(); - output.push(Item::Type(AliasedType::either(l, r))); - } - Rule::option_type => { - let r = output.pop().unwrap().unwrap_type(); - output.push(Item::Type(AliasedType::option(r))); - } - Rule::boolean_type => { - output.push(Item::Type(AliasedType::boolean())); - } - Rule::tuple_type => { - let size = data.node.n_children(); - let elements: Vec = output - .split_off(output.len() - size) - .into_iter() - .map(Item::unwrap_type) - .collect(); - debug_assert_eq!(elements.len(), size); - output.push(Item::Type(AliasedType::tuple(elements))); - } - Rule::array_type => { - let size = output.pop().unwrap().unwrap_size(); - let el = output.pop().unwrap().unwrap_type(); - output.push(Item::Type(AliasedType::array(el, size))); - } - Rule::array_size => { - let size_str = data.node.0.as_str(); - let size = size_str.parse::().with_span(&data.node.0)?; - output.push(Item::Size(size)); - } - Rule::list_type => { - let bound = output.pop().unwrap().unwrap_bound(); - let el = output.pop().unwrap().unwrap_type(); - output.push(Item::Type(AliasedType::list(el, bound))); - } - Rule::list_bound => { - let bound = NonZeroPow2Usize::parse(data.node.0)?; - output.push(Item::Bound(bound)); - } - Rule::ty => {} - _ => unreachable!("Corrupt grammar"), - } - } - - debug_assert!(output.len() == 1); - Ok(output.pop().unwrap().unwrap_type()) + choice(( + left, right, some, none, boolean, match_expr, list, array, tuple, call, literal, + variable, + )) + .map_with(|inner, e| Self { + inner, + span: e.span(), + }) } } -impl PestParse for UIntType { - const RULE: Rule = Rule::unsigned_type; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let ret = match pair.as_str() { - "u1" => UIntType::U1, - "u2" => UIntType::U2, - "u4" => UIntType::U4, - "u8" => UIntType::U8, - "u16" => UIntType::U16, - "u32" => UIntType::U32, - "u64" => UIntType::U64, - "u128" => UIntType::U128, - "u256" => UIntType::U256, - _ => unreachable!("Corrupt grammar"), +impl ChumskyParse for MatchPattern { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let wrapper = |name: &'static str, ctor: fn(Identifier, AliasedType) -> Self| { + select! { Token::Ident(i) if i == name => i } + .ignore_then( + Identifier::parser() + .then_ignore(just(Token::Colon)) + .then(AliasedType::parser()) + .delimited_by(just(Token::LParen), just(Token::RParen)) + .recover_with(via_parser(nested_delimiters( + Token::LParen, + Token::RParen, + [(Token::LBracket, Token::RBracket)], + |_| { + ( + Identifier::from_str_unchecked(""), + AliasedType::alias(AliasName::from_str_unchecked("error")), + ) + }, + ))), + ) + .map(move |(id, ty)| ctor(id, ty)) }; - Ok(ret) - } -} - -impl PestParse for BuiltinAlias { - const RULE: Rule = Rule::builtin_alias; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - Self::from_str(pair.as_str()) - .map_err(Error::CannotParse) - .with_span(&pair) - } -} -impl PestParse for NonZeroPow2Usize { - // FIXME: This equates NonZeroPow2Usize with list bounds. Create wrapper for list bounds? - const RULE: Rule = Rule::list_bound; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let bound = pair.as_str().parse::().with_span(&pair)?; - NonZeroPow2Usize::new(bound) - .ok_or(Error::ListBoundPow2(bound)) - .with_span(&pair) + choice(( + wrapper("Left", MatchPattern::Left), + wrapper("Right", MatchPattern::Right), + wrapper("Some", MatchPattern::Some), + select! { Token::Ident("None") => MatchPattern::None }, + select! { Token::Bool(true) => MatchPattern::True }, + select! { Token::Bool(false) => MatchPattern::False }, + )) } } -impl PestParse for ModuleProgram { - const RULE: Rule = Rule::program; +impl MatchArm { + pub fn parser<'tokens, 'src: 'tokens, I, E>( + expr: E, + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + E: Parser<'tokens, I, Expression, extra::Err, Span>>> + + Clone + + 'tokens, + { + MatchPattern::parser() + .then_ignore(just(Token::FatArrow)) + .then(expr.map(Arc::new)) + .then(just(Token::Comma).or_not()) + .validate(|((pattern, expression), comma), e, emit| { + let is_block = matches!(expression.as_ref().inner, ExpressionInner::Block(_, _)); + + if !is_block && comma.is_none() { + emit.emit(Rich::custom(e.span(), "Missing comma after match arm")); + } - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let items = pair - .into_inner() - .filter_map(|pair| match pair.as_rule() { - Rule::item => Some(ModuleItem::parse(pair)), - _ => None, + Self { + pattern, + expression, + } }) - .collect::, RichError>>()?; - Ok(Self { items, span }) } } -impl PestParse for ModuleItem { - const RULE: Rule = Rule::item; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let pair = pair.into_inner().next().unwrap(); - match pair.as_rule() { - Rule::module => Module::parse(pair).map(Self::Module), - _ => Ok(Self::Ignored), - } +impl Match { + pub fn parser<'tokens, 'src: 'tokens, I, E>( + expr: E, + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + E: Parser<'tokens, I, Expression, extra::Err, Span>>> + + Clone + + 'tokens, + { + let scrutinee = expr.clone().map(Arc::new); + + let arms = MatchArm::parser(expr.clone()) + .then(MatchArm::parser(expr)) + .delimited_by(just(Token::LBrace), just(Token::RBrace)); + + just(Token::Match) + .ignore_then(scrutinee) + .then(arms) + .map_with(|(scrutinee, (first, second)), e| (scrutinee, first, second, e.span())) + .try_map(|(scrutinee, first, second, span), _| { + let (left, right) = match (&first.pattern, &second.pattern) { + (MatchPattern::Left(..), MatchPattern::Right(..)) => (first, second), + (MatchPattern::Right(..), MatchPattern::Left(..)) => (second, first), + + (MatchPattern::None, MatchPattern::Some(..)) => (first, second), + (MatchPattern::Some(..), MatchPattern::None) => (second, first), + + (MatchPattern::False, MatchPattern::True) => (first, second), + (MatchPattern::True, MatchPattern::False) => (second, first), + + (p1, p2) => { + return Err(Rich::custom( + span, + format!("Incompatible match arms: {:?} and {:?}", p1, p2), + )); + } + }; + + Ok(Self { + scrutinee, + left, + right, + span, + }) + }) } } -impl PestParse for Module { - const RULE: Rule = Rule::module; +impl ChumskyParse for ModuleItem { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let module = Module::parser().map(Self::Module); - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let mut it = pair.into_inner(); - let _mod_keyword = it.next().unwrap(); - let name = ModuleName::parse(it.next().unwrap())?; - let assignments = it - .map(ModuleAssignment::parse) - .collect::, RichError>>()?; - Ok(Self { - name, - assignments, - span, - }) + module } } -impl PestParse for ModuleAssignment { - const RULE: Rule = Rule::module_assign; - - fn parse(pair: pest::iterators::Pair) -> Result { - assert!(matches!(pair.as_rule(), Self::RULE)); - let span = Span::from(&pair); - let mut it = pair.into_inner(); - let _const_keyword = it.next().unwrap(); - let name = WitnessName::parse(it.next().unwrap())?; - let ty = AliasedType::parse(it.next().unwrap())?; - let expression = Expression::parse(it.next().unwrap())?; - Ok(Self { - name, - ty, - expression, - span, - }) +impl ChumskyParse for ModuleProgram { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + ModuleItem::parser() + .repeated() + .collect::>() + .map_with(|items, e| Self { + items: Arc::from(items), + span: e.span(), + }) } } -/// Pair of tokens from the 'pattern' rule. -#[derive(Clone, Debug)] -struct PatternPair<'a>(pest::iterators::Pair<'a, Rule>); - -impl TreeLike for PatternPair<'_> { - fn as_node(&self) -> Tree { - let mut it = self.0.clone().into_inner(); - match self.0.as_rule() { - Rule::variable_pattern | Rule::ignore_pattern => Tree::Nullary, - Rule::pattern => { - let l = it.next().unwrap(); - Tree::Unary(PatternPair(l)) - } - Rule::tuple_pattern | Rule::array_pattern => { - let children: Arc<[PatternPair]> = it.map(PatternPair).collect(); - Tree::Nary(children) - } - _ => unreachable!("Corrupt grammar"), - } +impl ChumskyParse for Module { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let name = ModuleName::parser().map_with(|name, e| (name, e.span())); + + let assignments = ModuleAssignment::parser() + .repeated() + .collect::>() + .delimited_by(just(Token::LBrace), just(Token::RBrace)) + .recover_with(via_parser(nested_delimiters( + Token::LBrace, + Token::RBrace, + [ + (Token::LParen, Token::RParen), + (Token::LBracket, Token::RBracket), + ], + |_| Vec::new(), + ))) + .map(Arc::from); + + just(Token::Mod) + .ignore_then(name) + .then(assignments) + .map_with(|(name, assignments), e| Self { + name: name.0, + assignments, + span: e.span(), + }) } } -/// Pair of tokens from the 'ty' rule. -#[derive(Clone, Debug)] -struct TyPair<'a>(pest::iterators::Pair<'a, Rule>); - -impl TreeLike for TyPair<'_> { - fn as_node(&self) -> Tree { - let mut it = self.0.clone().into_inner(); - match self.0.as_rule() { - Rule::boolean_type - | Rule::unsigned_type - | Rule::array_size - | Rule::list_bound - | Rule::alias_name - | Rule::builtin_alias => Tree::Nullary, - Rule::ty | Rule::option_type => { - let l = it.next().unwrap(); - Tree::Unary(TyPair(l)) - } - Rule::sum_type | Rule::array_type | Rule::list_type => { - let l = it.next().unwrap(); - let r = it.next().unwrap(); - Tree::Binary(TyPair(l), TyPair(r)) - } - Rule::tuple_type => Tree::Nary(it.map(TyPair).collect()), - _ => unreachable!("Corrupt grammar"), - } +impl ChumskyParse for ModuleAssignment { + fn parser<'tokens, 'src: 'tokens, I>( + ) -> impl Parser<'tokens, I, Self, extra::Err, Span>>> + Clone + where + I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, + { + let name = WitnessName::parser().map_with(|name, e| (name, e.span())); + + just(Token::Const) + .ignore_then(name) + .then_ignore(just(Token::Colon)) + .then(AliasedType::parser()) + .then_ignore(just(Token::Eq)) + .then(Expression::parser()) + .then_ignore(just(Token::Semi)) + .map_with(|((name, ty), expression), e| Self { + name, + ty, + expression, + span: e.span(), + }) } } @@ -1727,7 +1853,7 @@ impl crate::ArbitraryRec for Function { let ret = Option::::arbitrary(u)?; let body = Expression::arbitrary_rec(u, budget).map(Expression::into_block)?; Ok(Self { - name, + name: (name, Span::DUMMY), params, ret, body, @@ -1929,3 +2055,43 @@ impl crate::ArbitraryRec for Match { }) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::lexer::lexer; + + #[test] + fn lexer_test() { + use chumsky::prelude::*; + + let src = include_str!("../examples/last_will.simf"); + + let (tokens, lex_errs) = lexer().parse(src).into_output_errors(); + + let tokens = tokens + .unwrap() + .into_iter() + .map(|(tok, span)| (tok, Span::from(span))) + .filter(|(tok, _)| !matches!(tok, Token::Comment | Token::BlockComment)) + .collect::>(); + + dbg!(&tokens); + + let (program, errors) = Program::parser() + .map_with(|ast, e| (ast, e.span())) + .parse( + tokens + .as_slice() + .map((src.len()..src.len()).into(), |(t, s)| (t, s)), + ) + .into_output_errors(); + + dbg!(&program); + dbg!(errors); + + println!("{}", program.unwrap().0); + + assert!(lex_errs.is_empty()); + } +}