diff options
| author | Nazir Bilal Yavuz <byavuz81@gmail.com> | 2023-06-12 13:47:19 +0300 |
|---|---|---|
| committer | Dylan Baker <dylan@pnwbakers.com> | 2023-07-13 09:38:55 -0700 |
| commit | bd3d2cf91894b1f91128011b2cf56a5bd2c326ae (patch) | |
| tree | 1955faa13e2a9b97630b0dc4d540e7428c228710 | |
| parent | 61984bcfa3e4e758d18174d13aa0aaedbf406889 (diff) | |
| download | meson-bd3d2cf91894b1f91128011b2cf56a5bd2c326ae.tar.gz | |
mtest: fix unencodable XML chars
Replace unencodable XML chars with their printable representation, so
that, xmllint can parse test outputs without error.
Closes #9894
Co-authored-by: Tristan Partin <tristan@partin.io>
| -rw-r--r-- | mesonbuild/mtest.py | 31 | ||||
| -rw-r--r-- | test cases/unit/110 replace unencodable xml chars/meson.build | 4 | ||||
| -rw-r--r-- | test cases/unit/110 replace unencodable xml chars/script.py | 37 | ||||
| -rw-r--r-- | unittests/allplatformstests.py | 51 |
4 files changed, 121 insertions, 2 deletions
diff --git a/mesonbuild/mtest.py b/mesonbuild/mtest.py index 8975dcdff..eb56c42be 100644 --- a/mesonbuild/mtest.py +++ b/mesonbuild/mtest.py @@ -72,6 +72,26 @@ GNU_ERROR_RETURNCODE = 99 # Exit if 3 Ctrl-C's are received within one second MAX_CTRLC = 3 +# Define unencodable xml characters' regex for replacing them with their +# printable representation +UNENCODABLE_XML_UNICHRS: T.List[T.Tuple[int, int]] = [ + (0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84), + (0x86, 0x9F), (0xFDD0, 0xFDEF), (0xFFFE, 0xFFFF)] +# Not narrow build +if sys.maxunicode >= 0x10000: + UNENCODABLE_XML_UNICHRS.extend([ + (0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), + (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF), + (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF), + (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), + (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF), + (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF), + (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), + (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)]) +UNENCODABLE_XML_CHR_RANGES = [fr'{chr(low)}-{chr(high)}' for (low, high) in UNENCODABLE_XML_UNICHRS] +UNENCODABLE_XML_CHRS_RE = re.compile('([' + ''.join(UNENCODABLE_XML_CHR_RANGES) + '])') + + def is_windows() -> bool: platname = platform.system().lower() return platname == 'windows' @@ -1148,14 +1168,21 @@ class TestRunRust(TestRun): TestRun.PROTOCOL_TO_CLASS[TestProtocol.RUST] = TestRunRust +# Check unencodable characters in xml output and replace them with +# their printable representation +def replace_unencodable_xml_chars(original_str: str) -> str: + # [1:-1] is needed for removing `'` characters from both start and end + # of the string + replacement_lambda = lambda illegal_chr: repr(illegal_chr.group())[1:-1] + return UNENCODABLE_XML_CHRS_RE.sub(replacement_lambda, original_str) def decode(stream: T.Union[None, bytes]) -> str: if stream is None: return '' try: - return stream.decode('utf-8') + return replace_unencodable_xml_chars(stream.decode('utf-8')) except UnicodeDecodeError: - return stream.decode('iso-8859-1', errors='ignore') + return replace_unencodable_xml_chars(stream.decode('iso-8859-1', errors='ignore')) async def read_decode(reader: asyncio.StreamReader, queue: T.Optional['asyncio.Queue[T.Optional[str]]'], diff --git a/test cases/unit/110 replace unencodable xml chars/meson.build b/test cases/unit/110 replace unencodable xml chars/meson.build new file mode 100644 index 000000000..2e6b1b793 --- /dev/null +++ b/test cases/unit/110 replace unencodable xml chars/meson.build @@ -0,0 +1,4 @@ +project('replace unencodable xml chars') + +test_script = find_program('script.py') +test('main', test_script) diff --git a/test cases/unit/110 replace unencodable xml chars/script.py b/test cases/unit/110 replace unencodable xml chars/script.py new file mode 100644 index 000000000..2f2d4d67b --- /dev/null +++ b/test cases/unit/110 replace unencodable xml chars/script.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +import sys + +# Print base string(\nHello Meson\n) to see valid chars are not replaced +print('\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n') +# Print invalid input from all known unencodable chars +print( + '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11' + '\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f') + +# Cover for potential encoding issues +try: + print( + '\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f' + '\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e' + '\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8' + '\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1' + '\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea' + '\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff') +except: + pass + +# Cover for potential encoding issues +try: + if sys.maxunicode >= 0x10000: + print( + '\U0001fffe\U0001ffff\U0002fffe\U0002ffff' + '\U0003fffe\U0003ffff\U0004fffe\U0004ffff' + '\U0005fffe\U0005ffff\U0006fffe\U0006ffff' + '\U0007fffe\U0007ffff\U0008fffe\U0008ffff' + '\U0009fffe\U0009ffff\U000afffe\U000affff' + '\U000bfffe\U000bffff\U000cfffe\U000cffff' + '\U000dfffe\U000dffff\U000efffe\U000effff' + '\U000ffffe\U000fffff\U0010fffe\U0010ffff') +except: + pass diff --git a/unittests/allplatformstests.py b/unittests/allplatformstests.py index 438e4fef5..db8a2f04b 100644 --- a/unittests/allplatformstests.py +++ b/unittests/allplatformstests.py @@ -59,6 +59,7 @@ from mesonbuild.linkers import linkers from mesonbuild.dependencies.pkgconfig import PkgConfigDependency from mesonbuild.build import Target, ConfigurationData, Executable, SharedLibrary, StaticLibrary +from mesonbuild import mtest import mesonbuild.modules.pkgconfig from mesonbuild.scripts import destdir_join @@ -398,6 +399,56 @@ class AllPlatformTests(BasePlatformTests): self.assertTrue(compdb[3]['file'].endswith("libfile4.c")) # FIXME: We don't have access to the linker command + def test_replace_unencodable_xml_chars(self): + ''' + Test that unencodable xml chars are replaced with their + printable representation + https://github.com/mesonbuild/meson/issues/9894 + ''' + # Create base string(\nHello Meson\n) to see valid chars are not replaced + base_string_invalid = '\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n' + base_string_valid = '\nHello Meson\n' + # Create invalid input from all known unencodable chars + invalid_string = ( + '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11' + '\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f' + '\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f' + '\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e' + '\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8' + '\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1' + '\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea' + '\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff') + if sys.maxunicode >= 0x10000: + invalid_string = invalid_string + ( + '\U0001fffe\U0001ffff\U0002fffe\U0002ffff' + '\U0003fffe\U0003ffff\U0004fffe\U0004ffff' + '\U0005fffe\U0005ffff\U0006fffe\U0006ffff' + '\U0007fffe\U0007ffff\U0008fffe\U0008ffff' + '\U0009fffe\U0009ffff\U000afffe\U000affff' + '\U000bfffe\U000bffff\U000cfffe\U000cffff' + '\U000dfffe\U000dffff\U000efffe\U000effff' + '\U000ffffe\U000fffff\U0010fffe\U0010ffff') + + valid_string = base_string_valid + repr(invalid_string)[1:-1] + base_string_valid + invalid_string = base_string_invalid + invalid_string + base_string_invalid + broken_xml_stream = invalid_string.encode() + decoded_broken_stream = mtest.decode(broken_xml_stream) + self.assertEqual(decoded_broken_stream, valid_string) + + def test_replace_unencodable_xml_chars_unit(self): + ''' + Test that unencodable xml chars are replaced with their + printable representation + https://github.com/mesonbuild/meson/issues/9894 + ''' + if not shutil.which('xmllint'): + raise SkipTest('xmllint not installed') + testdir = os.path.join(self.unit_test_dir, '110 replace unencodable xml chars') + self.init(testdir) + self.run_tests() + junit_xml_logs = Path(self.logdir, 'testlog.junit.xml') + subprocess.run(['xmllint', junit_xml_logs], check=True) + def test_run_target_files_path(self): ''' Test that run_targets are run from the correct directory |
