From 0e23b9fa82d95ca365523afac554a4fb6d461a23 Mon Sep 17 00:00:00 2001 From: John Turner Date: Tue, 21 Oct 2025 20:48:00 -0400 Subject: impl some basic parsers and combinators --- src/input.rs | 127 +++++++++ src/lib.rs | 821 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/mode.rs | 66 +++++ tests/sexpr.rs | 66 +++++ 4 files changed, 1080 insertions(+) create mode 100644 src/input.rs create mode 100644 src/lib.rs create mode 100644 src/mode.rs create mode 100644 tests/sexpr.rs diff --git a/src/input.rs b/src/input.rs new file mode 100644 index 0000000..0857c8c --- /dev/null +++ b/src/input.rs @@ -0,0 +1,127 @@ +use core::{fmt, iter, slice, str::CharIndices}; + +use crate::Span; + +pub trait Input: Clone { + type Item; + type Items: Clone + Iterator; + + fn items(&self) -> Self::Items; + + fn slice(&self, span: Span) -> Self; + + fn len(&self) -> usize; +} + +impl<'a> Input for &'a str { + type Item = char; + type Items = CharIndices<'a>; + + fn items(&self) -> Self::Items { + self.char_indices() + } + + fn slice(&self, span: Span) -> Self { + &self[span] + } + + fn len(&self) -> usize { + (*self).len() + } +} + +impl<'a> Input for &'a [u8] { + type Item = u8; + type Items = iter::Enumerate>>; + + fn items(&self) -> Self::Items { + self.iter().copied().enumerate() + } + + fn slice(&self, span: Span) -> Self { + &self[span] + } + + fn len(&self) -> usize { + (*self).len() + } +} + +pub trait Character { + fn is_alphabetic(&self) -> bool; + + fn is_numeric(&self) -> bool; + + fn is_whitespace(&self) -> bool; + + fn is_alphanumeric(&self) -> bool { + self.is_alphabetic() || self.is_numeric() + } +} + +impl Character for char { + fn is_alphabetic(&self) -> bool { + (*self).is_ascii_alphabetic() + } + + fn is_numeric(&self) -> bool { + (*self).is_numeric() + } + + fn is_whitespace(&self) -> bool { + (*self).is_whitespace() + } +} + +#[derive(Clone)] +pub struct InputIter { + pub it: I::Items, + pub input: I, +} + +impl InputIter +where + I: Input, +{ + pub fn new(input: I) -> Self { + Self { + it: input.items(), + input, + } + } + + pub fn position(&self) -> usize { + match self.it.clone().next() { + Some((i, _)) => i, + None => self.input.len(), + } + } + + pub fn is_finished(&self) -> bool { + self.clone().next().is_none() + } + + pub fn rest(&self) -> I { + self.input.slice(self.position()..self.input.len()) + } +} + +impl Iterator for InputIter +where + I: Input, +{ + type Item = (usize, I::Item); + + fn next(&mut self) -> Option { + self.it.next() + } +} + +impl fmt::Debug for InputIter +where + I: Input + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self.rest()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..fc91cb1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,821 @@ +use core::{cmp::PartialEq, fmt, iter::Iterator, marker::Sized, ops::Range}; + +use crate::{ + input::{Character, Input, InputIter}, + mode::{Check, Emit, Mode}, +}; + +pub mod input; +pub mod mode; + +pub type Span = Range; + +pub type ParserOk = (InputIter, O); + +pub type ParserResult = + Result::Output>, ::Output>>; + +pub trait Trace { + fn trace(parser: &str, input: InputIter); +} + +pub struct DebugTracer; + +impl Trace for DebugTracer +where + I: Input + fmt::Debug, +{ + fn trace(parser: &str, input: InputIter) { + eprintln!("{}:{:?}", parser, input) + } +} + +impl Trace for () +where + I: Input, +{ + fn trace(_: &str, _: InputIter) {} +} + +#[derive(Debug)] +pub enum ParserFinishedError { + Err(InputIter), + Unfinished(InputIter), +} + +pub trait Parser: Sized { + type Output; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult; + + fn parse(&mut self, it: InputIter) -> ParserResult { + self.run::(it) + } + + fn check(&mut self, it: InputIter) -> ParserResult { + self.run::(it) + } + + fn trace>(&mut self, it: InputIter) -> ParserResult { + self.run::(it) + } + + fn parse_finished(&mut self, it: InputIter) -> Result> { + match self.parse(it) { + Ok((rest, output)) if rest.is_finished() => Ok(output), + Ok((rest, _)) => Err(ParserFinishedError::Unfinished(rest)), + Err(rest) => Err(ParserFinishedError::Err(rest)), + } + } + + fn check_finished(&mut self, it: InputIter) -> Result<(), ParserFinishedError> { + match self.parse(it) { + Ok((rest, _)) if rest.is_finished() => Ok(()), + Ok((rest, _)) => Err(ParserFinishedError::Unfinished(rest)), + Err(rest) => Err(ParserFinishedError::Err(rest)), + } + } + + fn map(self, f: F) -> impl Parser + where + F: Fn(Self::Output) -> O, + { + Map { parser: self, f } + } + + fn and

(self, right: P) -> impl Parser + where + P: Parser, + { + And { left: self, right } + } + + fn or(self, right: P) -> impl Parser + where + Self: Parser, + P: Parser, + { + Or { left: self, right } + } + + fn and_not

(self, not: P) -> impl Parser + where + P: Parser, + { + AndNot { parser: self, not } + } + + fn preceded_by

(self, preceded: P) -> impl Parser + where + P: Parser, + { + PrecededBy { + parser: self, + preceded, + } + } + + fn followed_by

(self, followed: P) -> impl Parser + where + P: Parser, + { + FollowedBy { + parser: self, + followed, + } + } + + fn delimited_by(self, left: P1, right: P2) -> impl Parser + where + P1: Parser, + P2: Parser, + { + DelimitedBy { + parser: self, + left, + right, + } + } + + fn recognize(self) -> impl Parser { + Recognize { parser: self } + } + + fn separated_list

(self, delimiter: P) -> impl Parser> + where + P: Parser, + { + SeparatedList { + parser: self, + delimiter, + } + } + + fn separated_list1

(self, delimiter: P) -> impl Parser> + where + P: Parser, + { + SeparatedList1 { + parser: self, + delimiter, + } + } +} + +impl Parser for F +where + I: Input, + F: Fn(InputIter) -> Result, InputIter>, +{ + type Output = O; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("fn", it.clone()); + + match self(it) { + Ok((rest, output)) => Ok((rest, OM::bind(|| output))), + Err(rest) => Err(EM::bind(|| rest)), + } + } +} + +struct Map { + parser: P, + f: F, +} + +impl Parser for Map +where + I: Input, + P: Parser, + F: Fn(P::Output) -> O, +{ + type Output = O; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + match self.parser.run::(it) { + Ok((rest, output)) => Ok((rest, OM::map(output, |o| (self.f)(o)))), + Err(rest) => Err(rest), + } + } +} + +struct And { + left: P1, + right: P2, +} + +impl Parser for And +where + I: Input, + P1: Parser, + P2: Parser, +{ + type Output = (P1::Output, P2::Output); + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("and", it.clone()); + + let (rest, o1) = match self.left.run::(it) { + Ok((rest, output)) => (rest, output), + Err(rest) => return Err(rest), + }; + + let (rest, o2) = match self.right.run::(rest) { + Ok((rest, output)) => (rest, output), + Err(rest) => return Err(rest), + }; + + Ok((rest, OM::combine(o1, o2, |o1, o2| (o1, o2)))) + } +} + +struct AndNot { + parser: P1, + not: P2, +} + +impl Parser for AndNot +where + I: Input, + P1: Parser, + P2: Parser, +{ + type Output = P1::Output; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("andnot", it.clone()); + + match self.not.run::(it.clone()) { + Ok(_) => return Err(EM::bind(|| it.clone())), + _ => (), + }; + + self.parser.run::(it) + } +} + +struct Or { + left: P1, + right: P2, +} + +impl Parser for Or +where + I: Input, + P1: Parser, + P2: Parser, +{ + type Output = O; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("or", it.clone()); + + match ( + self.left.run::(it.clone()), + self.right.run::(it.clone()), + ) { + (Ok((rest, output)), _) => Ok((rest, output)), + (_, Ok((rest, output))) => Ok((rest, output)), + (_, Err(rest)) => Err(rest), + } + } +} + +pub struct Take { + amt: usize, +} + +impl Parser for Take +where + I: Input, +{ + type Output = I; + + fn run>( + &mut self, + mut it: InputIter, + ) -> ParserResult { + Tracer::trace("take", it.clone()); + + let start = it.clone(); + + for _ in 0..self.amt { + match it.next() { + Some(_) => continue, + None => return Err(EM::bind(|| it)), + } + } + + Ok(( + it.clone(), + OM::bind(|| it.input.slice(start.position()..it.position())), + )) + } +} + +pub fn take(amt: usize) -> impl Parser +where + I: Input, +{ + Take { amt } +} + +pub struct TakeWhile

{ + parser: P, +} + +impl Parser for TakeWhile

+where + I: Input, + P: Parser, +{ + type Output = I; + + fn run>( + &mut self, + mut it: InputIter, + ) -> ParserResult { + Tracer::trace("takewhile", it.clone()); + + let start = it.clone(); + + while let Ok((rest, _)) = self.parser.run::(it.clone()) { + it = rest; + } + + Ok(( + it.clone(), + OM::bind(|| it.input.slice(start.position()..it.position())), + )) + } +} + +pub fn take_while(parser: P) -> impl Parser +where + I: Input, + P: Parser, +{ + TakeWhile { parser } +} + +pub struct TakeWhile1

{ + parser: P, +} + +impl Parser for TakeWhile1

+where + I: Input, + P: Parser, +{ + type Output = I; + + fn run>( + &mut self, + mut it: InputIter, + ) -> ParserResult { + Tracer::trace("takewhile1", it.clone()); + + let start = it.clone(); + + while let Ok((rest, _)) = self.parser.run::(it.clone()) { + it = rest; + } + + if it.position() > start.position() { + Ok(( + it.clone(), + OM::bind(|| it.input.slice(start.position()..it.position())), + )) + } else { + Err(EM::bind(|| it)) + } + } +} + +pub fn take_while1(parser: P) -> impl Parser +where + I: Input, + P: Parser, +{ + TakeWhile1 { parser } +} + +pub struct OneOf { + it: It, +} + +impl Parser for OneOf +where + I: Input, + I::Item: PartialEq, + It: Iterator, +{ + type Output = I::Item; + + fn run>( + &mut self, + mut it: InputIter, + ) -> ParserResult { + Tracer::trace("oneof", it.clone()); + + match it.next() { + Some((_, item)) if self.it.any(|i| item == i) => Ok((it, OM::bind(|| item))), + _ => Err(EM::bind(|| it)), + } + } +} + +pub fn one_of(it: It) -> impl Parser +where + I: Input, + I::Item: PartialEq, + It: Iterator, +{ + OneOf { it } +} + +pub struct If { + f: F, +} + +impl Parser for If +where + I: Input, + F: Fn(&I::Item) -> bool, +{ + type Output = I::Item; + + fn run>( + &mut self, + mut it: InputIter, + ) -> ParserResult { + Tracer::trace("if", it.clone()); + + match it.next() { + Some((_, output)) if (self.f)(&output) => Ok((it, OM::bind(|| output))), + _ => Err(EM::bind(|| it)), + } + } +} + +pub fn r#if(f: F) -> impl Parser +where + I: Input, + F: Fn(&I::Item) -> bool, +{ + If { f } +} + +struct PrecededBy { + parser: P1, + preceded: P2, +} + +impl Parser for PrecededBy +where + I: Input, + P1: Parser, + P2: Parser, +{ + type Output = P1::Output; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("preceded by", it.clone()); + + let rest = match self.preceded.check(it.clone()) { + Ok((rest, _)) => rest, + Err(_) => return Err(EM::bind(|| it)), + }; + + self.parser.run::(rest) + } +} + +pub struct FollowedBy { + parser: P1, + followed: P2, +} + +impl Parser for FollowedBy +where + I: Input, + P1: Parser, + P2: Parser, +{ + type Output = P1::Output; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("followed by", it.clone()); + + let (rest, output) = match self.parser.run::(it) { + Ok((rest, output)) => (rest, output), + Err(rest) => return Err(rest), + }; + + let Ok((rest, _)) = self.followed.check(rest.clone()) else { + return Err(EM::bind(|| rest.clone())); + }; + + Ok((rest, output)) + } +} + +struct DelimitedBy { + parser: P1, + left: P2, + right: P3, +} + +impl Parser for DelimitedBy +where + I: Input, + P1: Parser, + P2: Parser, + P3: Parser, +{ + type Output = P1::Output; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("delimited by", it.clone()); + + let rest = match self.left.check(it.clone()) { + Ok((rest, _)) => rest, + Err(_) => return Err(EM::bind(|| it)), + }; + + let (rest, output) = match self.parser.run::(rest.clone()) { + Ok((rest, output)) => (rest, output), + Err(rest) => return Err(rest), + }; + + let Ok((rest, _)) = self.right.check(rest.clone()) else { + return Err(EM::bind(|| rest)); + }; + + Ok((rest, output)) + } +} + +struct Recognize

{ + parser: P, +} + +impl Parser for Recognize

+where + I: Input, + P: Parser, +{ + type Output = I; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("recognize", it.clone()); + + let start = it.clone(); + + let rest = match self.parser.check(it.clone()) { + Ok((rest, _)) => rest, + Err(_) => return Err(EM::bind(|| it)), + }; + + Ok(( + rest.clone(), + OM::bind(|| it.input.slice(start.position()..rest.position())), + )) + } +} + +struct Tag(T); + +impl Parser for Tag +where + I: Input + PartialEq, + T: Input, +{ + type Output = I; + + fn run>( + &mut self, + it: InputIter, + ) -> ParserResult { + Tracer::trace("tag", it.clone()); + + match take(self.0.len()).parse(it.clone()) { + Ok((rest, output)) if output == self.0 => Ok((rest, OM::bind(|| output))), + Ok(_) => return Err(EM::bind(|| it)), + Err(rest) => return Err(EM::bind(|| rest)), + } + } +} + +pub fn tag(tag: T) -> impl Parser +where + I: Input + PartialEq, + T: Input, +{ + Tag(tag) +} + +struct SeparatedList { + parser: P1, + delimiter: P2, +} + +impl Parser for SeparatedList +where + I: Input, + P1: Parser, + P2: Parser, +{ + type Output = Vec; + + fn run>( + &mut self, + mut it: InputIter, + ) -> ParserResult { + Tracer::trace("separated list", it.clone()); + + let mut outputs = OM::bind(|| Vec::new()); + + loop { + it = match self.parser.run::(it.clone()) { + Ok((rest, output)) => { + outputs = OM::combine(outputs, output, |mut acc, e| { + acc.push(e); + acc + }); + rest + } + _ => break, + }; + + it = match self.delimiter.check(it.clone()) { + Ok((rest, _)) => rest, + _ => break, + }; + } + + Ok((it, outputs)) + } +} + +pub struct SeparatedList1 { + parser: P1, + delimiter: P2, +} + +impl Parser for SeparatedList1 +where + I: Input, + P1: Parser, + P2: Parser, +{ + type Output = Vec; + + fn run>( + &mut self, + mut it: InputIter, + ) -> ParserResult { + Tracer::trace("separated list 1", it.clone()); + + it = match self.delimiter.check(it.clone()) { + Ok((rest, _)) => rest, + Err(_) => it, + }; + + let mut outputs = OM::bind(|| Vec::new()); + let mut len = 0; + + loop { + it = match self.parser.run::(it.clone()) { + Ok((rest, output)) => { + outputs = OM::combine(outputs, output, |mut acc, e| { + acc.push(e); + acc + }); + len += 1; + rest + } + _ => break, + }; + + it = match self.delimiter.check(it.clone()) { + Ok((rest, _)) => rest, + _ if len > 0 => break, + _ => return Err(EM::bind(|| it)), + }; + } + + Ok((it, outputs)) + } +} + +pub fn alpha() -> impl Parser +where + I: Input, + I::Item: Character, +{ + take_while(r#if(|c: &I::Item| c.is_alphabetic())) +} + +pub fn alpha1() -> impl Parser +where + I: Input, + I::Item: Character, +{ + take_while1(r#if(|c: &I::Item| c.is_alphabetic())) +} + +pub fn numeric() -> impl Parser +where + I: Input, + I::Item: Character, +{ + take_while(r#if(|c: &I::Item| c.is_numeric())) +} + +pub fn numeric1() -> impl Parser +where + I: Input, + I::Item: Character, +{ + take_while1(r#if(|c: &I::Item| c.is_numeric())) +} + +pub fn alphanumeric() -> impl Parser +where + I: Input, + I::Item: Character, +{ + take_while(r#if(|c: &I::Item| c.is_alphanumeric())) +} + +pub fn alphanumeric1() -> impl Parser +where + I: Input, + I::Item: Character, +{ + take_while1(r#if(|c: &I::Item| c.is_alphanumeric())) +} + +pub fn whitespace() -> impl Parser +where + I: Input, + I::Item: Character, +{ + take_while(r#if(|c: &I::Item| c.is_whitespace())) +} + +pub fn whitespace1() -> impl Parser +where + I: Input, + I::Item: Character, +{ + take_while1(r#if(|c: &I::Item| c.is_whitespace())) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_separated_list() { + let input = "a b c"; + let it = InputIter::new(input); + + alpha1() + .separated_list(whitespace()) + .check_finished(it) + .unwrap(); + } +} diff --git a/src/mode.rs b/src/mode.rs new file mode 100644 index 0000000..3825e31 --- /dev/null +++ b/src/mode.rs @@ -0,0 +1,66 @@ +pub trait Mode { + type Output; + + fn bind(f: F) -> Self::Output + where + F: FnOnce() -> T; + + fn map(t: Self::Output, f: F) -> Self::Output + where + F: FnOnce(T) -> U; + + fn combine(t: Self::Output, u: Self::Output, f: F) -> Self::Output + where + F: FnOnce(T, U) -> V; +} + +pub struct Emit; + +impl Mode for Emit { + type Output = T; + + fn bind(f: F) -> Self::Output + where + F: FnOnce() -> T, + { + f() + } + + fn map(t: Self::Output, f: F) -> Self::Output + where + F: FnOnce(T) -> U, + { + f(t) + } + + fn combine(t: Self::Output, u: Self::Output, f: F) -> Self::Output + where + F: FnOnce(T, U) -> V, + { + f(t, u) + } +} + +pub struct Check; + +impl Mode for Check { + type Output = (); + + fn bind(_: F) -> Self::Output + where + F: FnOnce() -> T, + { + } + + fn map(_: Self::Output, _: F) -> Self::Output + where + F: FnOnce(T) -> U, + { + } + + fn combine(_: Self::Output, _: Self::Output, _: F) -> Self::Output + where + F: FnOnce(T, U) -> V, + { + } +} diff --git a/tests/sexpr.rs b/tests/sexpr.rs new file mode 100644 index 0000000..651d534 --- /dev/null +++ b/tests/sexpr.rs @@ -0,0 +1,66 @@ +#![allow(dead_code)] + +use mon::{ + alpha1, alphanumeric, alphanumeric1, input::InputIter, numeric1, tag, whitespace, Parser, + ParserResult, +}; + +#[derive(Debug)] +enum Sexpr { + List(Vec), + Atom(String), + String(String), + Int(i64), +} + +fn atom<'a>() -> impl Parser<&'a str, Output = Sexpr> { + alpha1() + .and(alphanumeric()) + .recognize() + .map(|output: &str| Sexpr::Atom(output.to_string())) +} + +fn string<'a>() -> impl Parser<&'a str, Output = Sexpr> { + alphanumeric1() + .delimited_by(tag("\""), tag("\"")) + .map(|output: &str| Sexpr::String(output.to_string())) +} + +fn int<'a>() -> impl Parser<&'a str, Output = Sexpr> { + numeric1().map(|output: &str| Sexpr::Int(output.parse().unwrap())) +} + +fn sexpr<'a>(it: InputIter<&'a str>) -> ParserResult<&'a str, Sexpr> { + sexpr + .separated_list(whitespace()) + .delimited_by(tag("("), tag(")")) + .map(|output| Sexpr::List(output)) + .or(atom()) + .or(string()) + .or(int()) + .parse(it) +} + +#[test] +fn test_atom() { + let input = "atom"; + let it = InputIter::new(input); + + atom().check_finished(it).unwrap(); +} + +#[test] +fn test_string() { + let input = r#""string""#; + let it = InputIter::new(input); + + string().check_finished(it).unwrap() +} + +#[test] +fn test_sexpr() { + let input = r#"(let ((a "hello") (b 2)))"#; + let it = InputIter::new(input); + + dbg!(sexpr(it).unwrap()); +} -- cgit v1.2.3