diff options
Diffstat (limited to 'crates/atom/src/parsers.rs')
| -rw-r--r-- | crates/atom/src/parsers.rs | 599 |
1 files changed, 599 insertions, 0 deletions
diff --git a/crates/atom/src/parsers.rs b/crates/atom/src/parsers.rs new file mode 100644 index 0000000..2f8cb8c --- /dev/null +++ b/crates/atom/src/parsers.rs @@ -0,0 +1,599 @@ +use core::option::Option::None; + +use crate::{ + Atom, Blocker, BuildId, Category, Cp, Cpv, Name, Repo, Slot, SlotName, SlotOperator, + UseDep, UseDepCondition, UseDepNegate, UseDepSign, Version, VersionNumber, VersionNumbers, + VersionOperator, VersionSuffix, VersionSuffixKind, VersionSuffixes, Wildcard, +}; + +use mon::{ + Parser, ParserIter, ascii_alphanumeric, ascii_numeric, ascii_numeric1, eof, r#if, + input::InputIter, one_of, tag, +}; + +use parseable::Parseable; + +use useflag::UseFlag; + +impl<'a> Parseable<'a, &'a str> for Blocker { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + tag("!!") + .map(|_| Blocker::Strong) + .or(tag("!").map(|_| Blocker::Weak)) + } +} + +impl<'a> Parseable<'a, &'a str> for VersionOperator { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + tag("<=") + .map(|_| VersionOperator::LtEq) + .or(tag(">=").map(|_| VersionOperator::GtEq)) + .or(tag("<").map(|_| VersionOperator::Lt)) + .or(tag(">").map(|_| VersionOperator::Gt)) + .or(tag("=").map(|_| VersionOperator::Eq)) + .or(tag("~").map(|_| VersionOperator::Roughly)) + } +} + +impl<'a> Parseable<'a, &'a str> for VersionNumber { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + ascii_numeric1().map(|output: &str| VersionNumber(output.to_string())) + } +} + +impl<'a> Parseable<'a, &'a str> for BuildId { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + let start = ascii_numeric().and_not(tag("0")); + let rest = ascii_numeric().repeated().many(); + + start + .and(rest) + .recognize() + .or(tag("0")) + .map(|output: &str| BuildId(output.to_string())) + } +} + +impl<'a> Parseable<'a, &'a str> for VersionSuffixKind { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + tag("alpha") + .map(|_| VersionSuffixKind::Alpha) + .or(tag("beta").map(|_| VersionSuffixKind::Beta)) + .or(tag("pre").map(|_| VersionSuffixKind::Pre)) + .or(tag("rc").map(|_| VersionSuffixKind::Rc)) + .or(tag("p").map(|_| VersionSuffixKind::P)) + } +} + +impl<'a> Parseable<'a, &'a str> for VersionSuffix { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + VersionSuffixKind::parser() + .and(VersionNumber::parser().opt()) + .map(|(kind, number)| VersionSuffix { kind, number }) + } +} + +impl<'a> Parseable<'a, &'a str> for VersionNumbers { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + VersionNumber::parser() + .separated_by(tag(".")) + .at_least(1) + .map(VersionNumbers) + } +} + +impl<'a> Parseable<'a, &'a str> for VersionSuffixes { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + VersionSuffix::parser() + .separated_by(tag("_")) + .at_least(1) + .map(VersionSuffixes) + } +} + +impl<'a> Parseable<'a, &'a str> for Version { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + let rev = VersionNumber::parser().preceded_by(tag("-r")); + let build_id = BuildId::parser().preceded_by(tag("-")); + + VersionNumbers::parser() + .and(r#if(|c: &char| c.is_ascii_alphabetic() && c.is_ascii_lowercase()).opt()) + .and(VersionSuffixes::parser().preceded_by(tag("_")).opt()) + .and(rev.opt()) + .and(build_id.opt()) + .map(|((((numbers, letter), suffixes), rev), build_id)| Version { + numbers, + letter, + suffixes: suffixes.unwrap_or(VersionSuffixes(Vec::new())), + rev, + build_id, + }) + } +} + +impl<'a> Parseable<'a, &'a str> for Category { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + let start = ascii_alphanumeric().or(one_of("_".chars())); + let rest = ascii_alphanumeric() + .or(one_of("+_.-".chars())) + .repeated() + .many(); + + start + .and(rest) + .recognize() + .map(|output: &str| Category(output.to_string())) + } +} + +impl<'a> Parseable<'a, &'a str> for Name { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + let start = || ascii_alphanumeric().or(one_of("_".chars())); + + let rest = ascii_alphanumeric() + .or(one_of("_+".chars())) + .or(one_of("-".chars()).and_not( + Version::parser() + .preceded_by(tag("-")) + .followed_by(ascii_alphanumeric().or(one_of("_+-".chars())).not()), + )) + .repeated() + .many(); + + let verify = ascii_alphanumeric() + .or(one_of("_+".chars())) + .or(one_of("-".chars()) + .and_not(Version::parser().preceded_by(tag("-")).followed_by(eof()))) + .repeated() + .many(); + + start() + .and(rest) + .recognize() + .verify_output(move |output: &&str| { + verify.check_finished(InputIter::new(*output)).is_ok() + }) + .map(|output: &str| Name(output.to_string())) + } +} + +impl<'a> Parseable<'a, &'a str> for SlotOperator { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + tag("=") + .map(|_| SlotOperator::Eq) + .or(tag("*").map(|_| SlotOperator::Star)) + } +} + +impl<'a> Parseable<'a, &'a str> for SlotName { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + let start = ascii_alphanumeric().or(one_of("_".chars())); + let rest = ascii_alphanumeric() + .or(one_of("+_.-".chars())) + .repeated() + .many(); + + start + .and(rest) + .recognize() + .map(|output: &str| SlotName(output.to_string())) + } +} + +impl<'a> Parseable<'a, &'a str> for Slot { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + let wildcard = tag("*").map(|_| Slot::Wildcard); + let equal = tag("=").map(|_| Slot::Equal); + let name_equal = SlotName::parser() + .and(SlotName::parser().preceded_by(tag("/")).opt()) + .followed_by(tag("=")) + .map(|(primary, sub)| Slot::NameEqual { primary, sub }); + let name = SlotName::parser() + .and(SlotName::parser().preceded_by(tag("/")).opt()) + .map(|(primary, sub)| Self::Name { primary, sub }); + + wildcard.or(equal).or(name_equal).or(name) + } +} + +impl<'a> Parseable<'a, &'a str> for UseDepSign { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + tag("(-)") + .map(|_| UseDepSign::Disabled) + .or(tag("(+)").map(|_| UseDepSign::Enabled)) + } +} + +impl<'a> Parseable<'a, &'a str> for Repo { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + let start = ascii_alphanumeric().or(one_of("_".chars())); + let rest = ascii_alphanumeric() + .or(one_of("_-".chars())) + .repeated() + .many(); + + start + .and(rest) + .recognize() + .verify_output(move |output: &&str| { + Name::parser() + .check_finished(InputIter::new(*output)) + .is_ok() + }) + .map(|output: &str| Repo(output.to_string())) + } +} + +impl<'a> Parseable<'a, &'a str> for UseDep { + type Parser = impl Parser<&'a str, Output = Self>; + + #[allow(clippy::many_single_char_names)] + fn parser() -> Self::Parser { + let a = UseFlag::parser() + .and(UseDepSign::parser().opt()) + .preceded_by(tag("-")) + .map(|(flag, sign)| UseDep { + negate: Some(UseDepNegate::Minus), + flag, + sign, + condition: None, + }); + + let b = UseFlag::parser() + .and(UseDepSign::parser().opt()) + .preceded_by(tag("!")) + .followed_by(tag("?")) + .map(|(flag, sign)| UseDep { + negate: Some(UseDepNegate::Exclamation), + flag, + sign, + condition: Some(UseDepCondition::Question), + }); + + let c = UseFlag::parser() + .and(UseDepSign::parser().opt()) + .followed_by(tag("?")) + .map(|(flag, sign)| UseDep { + negate: None, + flag, + sign, + condition: Some(UseDepCondition::Question), + }); + + let d = UseFlag::parser() + .and(UseDepSign::parser().opt()) + .preceded_by(tag("!")) + .followed_by(tag("=")) + .map(|(flag, sign)| UseDep { + negate: Some(UseDepNegate::Exclamation), + flag, + sign, + condition: Some(UseDepCondition::Eq), + }); + + let e = UseFlag::parser() + .and(UseDepSign::parser().opt()) + .followed_by(tag("=")) + .map(|(flag, sign)| UseDep { + negate: None, + flag, + sign, + condition: Some(UseDepCondition::Eq), + }); + + let f = UseFlag::parser() + .and(UseDepSign::parser().opt()) + .map(|(flag, sign)| UseDep { + negate: None, + flag, + sign, + condition: None, + }); + + a.or(b).or(c).or(d).or(e).or(f) + } +} + +impl<'a> Parseable<'a, &'a str> for Atom { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + let usedeps = || { + UseDep::parser() + .separated_by(tag(",")) + .at_least(1) + .delimited_by(tag("["), tag("]")) + .opt() + }; + + let without_version = Blocker::parser() + .opt() + .and(Category::parser()) + .and(Name::parser().preceded_by(tag("/"))) + .and(Slot::parser().preceded_by(tag(":")).opt()) + .and(Repo::parser().preceded_by(tag("::")).opt()) + .and(usedeps()) + .map( + |(((((blocker, category), name), slot), repo), usedeps)| Atom { + blocker, + category, + name, + version: None, + slot, + repo, + usedeps: usedeps.unwrap_or(Vec::new()), + }, + ); + + let with_version = Blocker::parser() + .opt() + .and(VersionOperator::parser()) + .and(Category::parser()) + .and(Name::parser().preceded_by(tag("/"))) + .and(Version::parser().preceded_by(tag("-"))) + .and(tag("*").map(|_| Wildcard).opt()) + .and(Slot::parser().preceded_by(tag(":")).opt()) + .and(Repo::parser().preceded_by(tag("::")).opt()) + .and(usedeps()) + .verify_output( + |((((((((_, version_operator), _), _), version), star), _), _), _)| { + matches!( + (version_operator, star), + (VersionOperator::Eq, Some(_) | None) | (_, None) + ) && matches!((version.build_id(), star), (Some(_), None) | (None, _)) + }, + ) + .map( + |( + ( + ((((((blocker, version_operator), category), name), version), star), slot), + repo, + ), + usedeps, + )| { + Atom { + blocker, + category, + name, + version: Some((version_operator, version, star)), + slot, + repo, + usedeps: usedeps.unwrap_or(Vec::new()), + } + }, + ); + + with_version.or(without_version) + } +} + +impl<'a> Parseable<'a, &'a str> for Cp { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + Category::parser() + .and(Name::parser().preceded_by(tag("/"))) + .map(|(category, name)| Cp { category, name }) + } +} + +impl<'a> Parseable<'a, &'a str> for Cpv { + type Parser = impl Parser<&'a str, Output = Self>; + + fn parser() -> Self::Parser { + Category::parser() + .and(Name::parser().preceded_by(tag("/"))) + .and(Version::parser().preceded_by(tag("-"))) + .and(Slot::parser().preceded_by(tag(":")).opt()) + .map(|(((category, name), version), slot)| Cpv { + category, + name, + version, + slot, + }) + } +} + +#[cfg(test)] +mod test { + + use mon::input::InputIter; + + use super::*; + + #[test] + fn test_version() { + let it = InputIter::new("1.0.0v_alpha1_beta1-r1"); + + Version::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_name() { + let it = InputIter::new("foo-1-bar-1.0.0"); + + match Name::parser().parse(it) { + Ok((_, output)) => { + assert_eq!(output.0.as_str(), "foo-1-bar"); + } + _ => unreachable!(), + } + } + + #[test] + fn test_atom() { + let it = InputIter::new( + "!!>=cat/pkg-1-foo-1.0.0v_alpha1_p20250326-r1:primary/sub=[use,use=,!use=,use?,!use?,-use,use(+),use(-)]", + ); + + Atom::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_cursed_atom() { + let it = InputIter::new( + "!!>=_.+-0-/_-test-T-123_beta1_-4a-6+-_p--1.00.02b_alpha3_pre_p4-r5:slot/_-+6-9=[test(+),test(-)]", + ); + + Atom::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_atom_with_star_in_non_empty_slot() { + let it = InputIter::new("foo/bar:*/subslot"); + + assert!(Atom::parser().check_finished(it).is_err()); + } + + #[test] + fn test_invalid_usedep() { + let it = InputIter::new("foo-bar:slot/sub=[!use]"); + + assert!(Atom::parser().check_finished(it).is_err()); + } + + #[test] + fn test_empty_slot() { + let it = InputIter::new("=dev-ml/uucp-17*:"); + + assert!(Atom::parser().check_finished(it).is_err()); + } + + #[test] + fn test_usedep_with_underscore() { + let it = InputIter::new("foo/bar[use_dep]"); + + Atom::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_version_with_uppercase_letter() { + let it = InputIter::new("=foo/bar-1.0.0V"); + + assert!(Atom::parser().check_finished(it).is_err()); + } + + #[test] + fn test_version_with_version_operator_without_version() { + let it = InputIter::new("=foo/bar"); + + assert!(Atom::parser().check_finished(it).is_err()); + } + + #[test] + fn test_version_with_version_without_version_operator() { + let it = InputIter::new("foo/bar-1.0.0"); + + assert!(Atom::parser().check_finished(it).is_err()); + } + + #[test] + fn test_atom_with_eq_version_operator() { + let it = InputIter::new("=foo/bar-1.0.0"); + + Atom::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_atom_with_star_in_version() { + let it = InputIter::new("=foo/bar-1.2*"); + + Atom::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_atom_with_star_in_version_without_eq_version_operator() { + let it = InputIter::new(">=foo/bar-1.2*"); + + assert!(Atom::parser().check_finished(it).is_err()); + } + + #[test] + fn test_atom_with_trailing_dash_and_letter() { + let it = InputIter::new("dev-db/mysql-connector-c"); + + Atom::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_cpv_with_slot() { + let it = InputIter::new("foo/bar-1.0:slot/sub="); + + Cpv::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_cpv_without_version_but_trailing_almost_version() { + let it = InputIter::new("dev-perl/mod-p-2.3_"); + + assert!(Cpv::parser().parse_finished(it).is_err()); + } + + #[test] + fn test_empty_slot_with_operator() { + let it = InputIter::new("foo/bar:="); + + Atom::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_with_repo() { + let it = InputIter::new("=foo/bar-1.0.0:slot/sub=::gentoo[a,b,c]"); + + Atom::parser().check_finished(it).unwrap(); + } + + #[test] + fn test_against_fuzzer_false_positives() { + let atoms = [ + "media-libs/libsdl2[haptitick(+),sound(+)vd,eio(+)]", + "=kde-frameworks/kcodecs-6.19*86", + "=dev-ml/stdio-0.17*t:=[ocamlopt?]", + ">=dev-libs/libgee-0-8.5:0..8=", + "<dev-haskell/wai-3.3:=[]", + ">=kde-frameworks/kcrash-2.16.0:6*", + "0-f/merreka+m::k+", + "iev-a/h:/n=", + "=dev-ml/stdio-0-17*:=[ocamlopt?]", + ]; + + for atom in atoms { + assert!( + Atom::parser().check_finished(InputIter::new(atom)).is_err(), + "{atom}" + ); + } + } +} |
