diff options
| author | Jan200101 <sentrycraft123@gmail.com> | 2025-06-07 10:37:02 +0200 |
|---|---|---|
| committer | Jussi Pakkanen <jussi.pakkanen@mailbox.org> | 2025-06-08 23:51:31 +0300 |
| commit | c8432df30edea96557db871d67c731537654afbf (patch) | |
| tree | 8c073d471a4df3f600992f9e54fd2c5519c7088e /mesonbuild/utils/universal.py | |
| parent | 07cd9bc16b0c2215b5d835d738e48085baa8f251 (diff) | |
| download | meson-c8432df30edea96557db871d67c731537654afbf.tar.gz | |
replace regex implementation for cmake with a parser
cmake does preprocessing in a linear pass through the entire file
this allows recursive variable lookups which cannot be supported
by a basic regex implementation.
Diffstat (limited to 'mesonbuild/utils/universal.py')
| -rw-r--r-- | mesonbuild/utils/universal.py | 143 |
1 files changed, 93 insertions, 50 deletions
diff --git a/mesonbuild/utils/universal.py b/mesonbuild/utils/universal.py index bf08fd7a2..4b656a670 100644 --- a/mesonbuild/utils/universal.py +++ b/mesonbuild/utils/universal.py @@ -1240,7 +1240,7 @@ def do_replacement(regex: T.Pattern[str], line: str, if variable_format == 'meson': return do_replacement_meson(regex, line, confdata) elif variable_format in {'cmake', 'cmake@'}: - return do_replacement_cmake(regex, line, variable_format == 'cmake@', confdata) + return do_replacement_cmake(line, variable_format == 'cmake@', confdata) else: raise MesonException('Invalid variable format') @@ -1275,44 +1275,92 @@ def do_replacement_meson(regex: T.Pattern[str], line: str, return var_str return re.sub(regex, variable_replace, line), missing_variables -def do_replacement_cmake(regex: T.Pattern[str], line: str, at_only: bool, +def do_replacement_cmake(line: str, at_only: bool, confdata: T.Union[T.Dict[str, T.Tuple[str, T.Optional[str]]], 'ConfigurationData']) -> T.Tuple[str, T.Set[str]]: missing_variables: T.Set[str] = set() - def variable_replace(match: T.Match[str]) -> str: - # Pairs of escape characters before '@', '\@', '${' or '\${' - if match.group(0).endswith('\\'): - num_escapes = match.end(0) - match.start(0) - return '\\' * (num_escapes // 2) - # Handle cmake escaped \${} tags - elif not at_only and match.group(0) == '\\${': - return '${' - # \@escaped\@ variables - elif match.groupdict().get('escaped') is not None: - return match.group('escaped')[1:-2]+'@' + character_regex = re.compile(r''' + [^a-zA-Z0-9_/.+\-] + ''', re.VERBOSE) + + def variable_get(varname: str) -> str: + var_str = '' + if varname in confdata: + var, _ = confdata.get(varname) + if isinstance(var, str): + var_str = var + elif isinstance(var, bool): + var_str = str(int(var)) + elif isinstance(var, int): + var_str = str(var) + else: + msg = f'Tried to replace variable {varname!r} value with ' \ + f'something other than a string or int: {var!r}' + raise MesonException(msg) else: - # Template variable to be replaced - varname = match.group('variable') - if not varname: - varname = match.group('cmake_variable') - - var_str = '' - if varname in confdata: - var, _ = confdata.get(varname) - if isinstance(var, str): - var_str = var - elif isinstance(var, bool): - var_str = str(int(var)) - elif isinstance(var, int): - var_str = str(var) - else: - msg = f'Tried to replace variable {varname!r} value with ' \ - f'something other than a string or int: {var!r}' + missing_variables.add(varname) + return var_str + + def parse_line(line: str) -> str: + index = 0 + while len(line) > index: + if line[index] == '@': + next_at = line.find("@", index+1) + if next_at > index+1: + varname = line[index+1:next_at] + match = character_regex.search(varname) + + # at substituion doesn't occur if they key isn't valid + # however it also doesn't raise an error + if not match: + value = variable_get(varname) + line = line[:index] + value + line[next_at+1:] + + elif not at_only and line[index:index+2] == '${': + bracket_count = 1 + end_bracket = index + 2 + try: + while bracket_count > 0: + if line[end_bracket:end_bracket+2] == "${": + end_bracket += 2 + bracket_count += 1 + elif line[end_bracket] == "}": + end_bracket += 1 + bracket_count -= 1 + elif line[end_bracket] in {"@", "\n"}: + # these aren't valid variable characters + # but they are inconsequential at this point + end_bracket += 1 + elif character_regex.search(line[end_bracket]): + invalid_character = line[end_bracket] + variable = line[index+2:end_bracket] + msg = f'Found invalid character {invalid_character!r}' \ + f' in variable {variable!r}' + raise MesonException(msg) + else: + end_bracket += 1 + except IndexError: + msg = f'Found incomplete variable {line[index:-1]!r}' raise MesonException(msg) - else: - missing_variables.add(varname) - return var_str - return re.sub(regex, variable_replace, line), missing_variables + + if bracket_count == 0: + varname = parse_line(line[index+2:end_bracket-1]) + match = character_regex.search(varname) + if match: + invalid_character = line[end_bracket-2] + variable = line[index+2:end_bracket-3] + msg = f'Found invalid character {invalid_character!r}' \ + f' in variable {variable!r}' + raise MesonException(msg) + + value = variable_get(varname) + line = line[:index] + value + line[end_bracket:] + + index += 1 + + return line + + return parse_line(line), missing_variables def do_define_meson(regex: T.Pattern[str], line: str, confdata: 'ConfigurationData', subproject: T.Optional[SubProject] = None) -> str: @@ -1341,7 +1389,7 @@ def do_define_meson(regex: T.Pattern[str], line: str, confdata: 'ConfigurationDa else: raise MesonException('#mesondefine argument "%s" is of unknown type.' % varname) -def do_define_cmake(regex: T.Pattern[str], line: str, confdata: 'ConfigurationData', at_only: bool, +def do_define_cmake(line: str, confdata: 'ConfigurationData', at_only: bool, subproject: T.Optional[SubProject] = None) -> str: cmake_bool_define = 'cmakedefine01' in line @@ -1381,12 +1429,12 @@ def do_define_cmake(regex: T.Pattern[str], line: str, confdata: 'ConfigurationDa result = get_cmake_define(line, confdata) result = f'#define {varname} {result}'.strip() + '\n' - result, _ = do_replacement_cmake(regex, result, at_only, confdata) + result, _ = do_replacement_cmake(result, at_only, confdata) return result def get_variable_regex(variable_format: Literal['meson', 'cmake', 'cmake@'] = 'meson') -> T.Pattern[str]: # Only allow (a-z, A-Z, 0-9, _, -) as valid characters for a define - if variable_format in {'meson', 'cmake@'}: + if variable_format == 'meson': # Also allow escaping pairs of '@' with '\@' regex = re.compile(r''' (?:\\\\)+(?=\\?@) # Matches multiple backslashes followed by an @ symbol @@ -1395,17 +1443,13 @@ def get_variable_regex(variable_format: Literal['meson', 'cmake', 'cmake@'] = 'm | # OR (?P<escaped>\\@[-a-zA-Z0-9_]+\\@) # Match an escaped variable enclosed in @ symbols ''', re.VERBOSE) - else: + elif variable_format == 'cmake@': regex = re.compile(r''' - (?:\\\\)+(?=\\?(\$|@)) # Match multiple backslashes followed by a dollar sign or an @ symbol - | # OR - \\\${ # Match a backslash followed by a dollar sign and an opening curly brace - | # OR - \${(?P<cmake_variable>[-a-zA-Z0-9_]*)} # Match a variable enclosed in curly braces and capture the variable name - | # OR (?<!\\)@(?P<variable>[-a-zA-Z0-9_]+)@ # Match a variable enclosed in @ symbols and capture the variable name; no matches beginning with '\@' - | # OR - (?P<escaped>\\@[-a-zA-Z0-9_]+\\@) # Match an escaped variable enclosed in @ symbols + ''', re.VERBOSE) + elif variable_format == "cmake": + regex = re.compile(r''' + \${(?P<variable>[-a-zA-Z0-9_]*)} # Match a variable enclosed in curly braces and capture the variable name ''', re.VERBOSE) return regex @@ -1453,8 +1497,6 @@ def do_conf_str_cmake(src: str, data: T.List[str], confdata: 'ConfigurationData' if at_only: variable_format = 'cmake@' - regex = get_variable_regex(variable_format) - search_token = 'cmakedefine' result: T.List[str] = [] @@ -1466,11 +1508,12 @@ def do_conf_str_cmake(src: str, data: T.List[str], confdata: 'ConfigurationData' stripped_line = line.lstrip() if len(stripped_line) >= 2 and stripped_line[0] == '#' and stripped_line[1:].lstrip().startswith(search_token): confdata_useless = False - line = do_define_cmake(regex, line, confdata, at_only, subproject) + + line = do_define_cmake(line, confdata, at_only, subproject) else: if '#mesondefine' in line: raise MesonException(f'Format error in {src}: saw "{line.strip()}" when format set to "{variable_format}"') - line, missing = do_replacement_cmake(regex, line, at_only, confdata) + line, missing = do_replacement_cmake(line, at_only, confdata) missing_variables.update(missing) if missing: confdata_useless = False |
