From 7fa1e34c22f4c5bfa99925560be9c23bb2d6d670 Mon Sep 17 00:00:00 2001 From: John Turner Date: Thu, 20 Nov 2025 23:12:54 +0000 Subject: impl ascii alternatives to alphabetic, numeric and whitespace methods and parsers --- src/input.rs | 26 ++++++++++++++++++++ src/lib.rs | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/sexpr.rs | 14 +++++------ 3 files changed, 109 insertions(+), 7 deletions(-) diff --git a/src/input.rs b/src/input.rs index 0857c8c..0b38556 100644 --- a/src/input.rs +++ b/src/input.rs @@ -48,22 +48,44 @@ impl<'a> Input for &'a [u8] { } pub trait Character { + fn is_ascii(&self) -> bool; + fn is_alphabetic(&self) -> bool; + fn is_ascii_alphabetic(&self) -> bool; + fn is_numeric(&self) -> bool; + fn is_ascii_numeric(&self) -> bool { + self.is_ascii() && self.is_numeric() + } + fn is_whitespace(&self) -> bool; + fn is_ascii_whitespace(&self) -> bool; + fn is_alphanumeric(&self) -> bool { self.is_alphabetic() || self.is_numeric() } + + fn is_ascii_alphanumeric(&self) -> bool { + self.is_ascii_alphabetic() || self.is_ascii_numeric() + } } impl Character for char { + fn is_ascii(&self) -> bool { + (*self).is_ascii() + } + fn is_alphabetic(&self) -> bool { (*self).is_ascii_alphabetic() } + fn is_ascii_alphabetic(&self) -> bool { + (*self).is_ascii_alphabetic() + } + fn is_numeric(&self) -> bool { (*self).is_numeric() } @@ -71,6 +93,10 @@ impl Character for char { fn is_whitespace(&self) -> bool { (*self).is_whitespace() } + + fn is_ascii_whitespace(&self) -> bool { + (*self).is_ascii_whitespace() + } } #[derive(Clone)] diff --git a/src/lib.rs b/src/lib.rs index fe9a720..5d6c3d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -965,6 +965,14 @@ where r#if(|c: &I::Item| c.is_alphabetic()) } +pub fn ascii_alpha() -> impl Parser +where + I: Input, + I::Item: Character, +{ + r#if(|c: &I::Item| c.is_ascii_alphabetic()) +} + pub fn alpha1() -> impl Parser where I: Input, @@ -976,6 +984,17 @@ where .recognize() } +pub fn ascii_alpha1() -> impl Parser +where + I: Input, + I::Item: Character, +{ + r#if(|c: &I::Item| c.is_ascii_alphabetic()) + .repeated() + .at_least(1) + .recognize() +} + pub fn numeric() -> impl Parser where I: Input, @@ -984,6 +1003,14 @@ where r#if(|c: &I::Item| c.is_numeric()) } +pub fn ascii_numeric() -> impl Parser +where + I: Input, + I::Item: Character, +{ + r#if(|c: &I::Item| c.is_ascii() && c.is_numeric()) +} + pub fn numeric1() -> impl Parser where I: Input, @@ -995,6 +1022,17 @@ where .recognize() } +pub fn ascii_numeric1() -> impl Parser +where + I: Input, + I::Item: Character, +{ + r#if(|c: &I::Item| c.is_ascii() && c.is_numeric()) + .repeated() + .at_least(1) + .recognize() +} + pub fn alphanumeric() -> impl Parser where I: Input, @@ -1003,6 +1041,14 @@ where r#if(|c: &I::Item| c.is_alphanumeric()) } +pub fn ascii_alphanumeric() -> impl Parser +where + I: Input, + I::Item: Character, +{ + r#if(|c: &I::Item| c.is_ascii_alphanumeric()) +} + pub fn alphanumeric1() -> impl Parser where I: Input, @@ -1014,6 +1060,17 @@ where .recognize() } +pub fn ascii_alphanumeric1() -> impl Parser +where + I: Input, + I::Item: Character, +{ + r#if(|c: &I::Item| c.is_ascii_alphanumeric()) + .repeated() + .at_least(1) + .recognize() +} + pub fn whitespace() -> impl Parser where I: Input, @@ -1022,6 +1079,14 @@ where r#if(|c: &I::Item| c.is_whitespace()) } +pub fn ascii_whitespace() -> impl Parser +where + I: Input, + I::Item: Character, +{ + r#if(|c: &I::Item| c.is_ascii_whitespace()) +} + pub fn whitespace1() -> impl Parser where I: Input, @@ -1033,6 +1098,17 @@ where .recognize() } +pub fn ascii_whitespace1() -> impl Parser +where + I: Input, + I::Item: Character, +{ + r#if(|c: &I::Item| c.is_ascii_whitespace()) + .repeated() + .at_least(1) + .recognize() +} + pub fn any() -> impl Parser where I: Input, diff --git a/tests/sexpr.rs b/tests/sexpr.rs index 77ef3a0..a3783f0 100644 --- a/tests/sexpr.rs +++ b/tests/sexpr.rs @@ -1,8 +1,8 @@ #![allow(dead_code)] use mon::{ - Parser, ParserIter, alpha1, alphanumeric, alphanumeric1, input::InputIter, numeric1, tag, - whitespace, whitespace1, + Parser, ParserIter, ascii_alpha1, ascii_alphanumeric, ascii_alphanumeric1, ascii_numeric1, + ascii_whitespace1, input::InputIter, tag, }; #[derive(Debug)] @@ -14,20 +14,20 @@ enum Sexpr { } fn atom<'a>() -> impl Parser<&'a str, Output = Sexpr> { - alpha1() - .and(alphanumeric().repeated().many()) + ascii_alpha1() + .and(ascii_alphanumeric().repeated().many()) .recognize() .map(|output: &str| Sexpr::Atom(output.to_string())) } fn string<'a>() -> impl Parser<&'a str, Output = Sexpr> { - alphanumeric1() + ascii_alphanumeric1() .delimited_by(tag("\""), tag("\"")) .map(|output: &str| Sexpr::String(output.to_string())) } fn int<'a>() -> impl Parser<&'a str, Output = Sexpr> { - numeric1().map(|output: &str| Sexpr::Int(output.parse().unwrap())) + ascii_numeric1().map(|output: &str| Sexpr::Int(output.parse().unwrap())) } // Recursive parsers must avoid an infinite loop, you can do this @@ -35,7 +35,7 @@ fn int<'a>() -> impl Parser<&'a str, Output = Sexpr> { fn sexpr<'a>() -> impl Parser<&'a str, Output = Sexpr> { |it| { sexpr() - .separated_by(whitespace1()) + .separated_by(ascii_whitespace1()) .many() .delimited_by(tag("("), tag(")")) .map(|output| Sexpr::List(output)) -- cgit v1.2.3