summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNazir Bilal Yavuz <byavuz81@gmail.com>2023-06-12 13:47:19 +0300
committerDylan Baker <dylan@pnwbakers.com>2023-07-13 09:38:55 -0700
commitbd3d2cf91894b1f91128011b2cf56a5bd2c326ae (patch)
tree1955faa13e2a9b97630b0dc4d540e7428c228710
parent61984bcfa3e4e758d18174d13aa0aaedbf406889 (diff)
downloadmeson-bd3d2cf91894b1f91128011b2cf56a5bd2c326ae.tar.gz
mtest: fix unencodable XML chars
Replace unencodable XML chars with their printable representation, so that, xmllint can parse test outputs without error. Closes #9894 Co-authored-by: Tristan Partin <tristan@partin.io>
-rw-r--r--mesonbuild/mtest.py31
-rw-r--r--test cases/unit/110 replace unencodable xml chars/meson.build4
-rw-r--r--test cases/unit/110 replace unencodable xml chars/script.py37
-rw-r--r--unittests/allplatformstests.py51
4 files changed, 121 insertions, 2 deletions
diff --git a/mesonbuild/mtest.py b/mesonbuild/mtest.py
index 8975dcdff..eb56c42be 100644
--- a/mesonbuild/mtest.py
+++ b/mesonbuild/mtest.py
@@ -72,6 +72,26 @@ GNU_ERROR_RETURNCODE = 99
# Exit if 3 Ctrl-C's are received within one second
MAX_CTRLC = 3
+# Define unencodable xml characters' regex for replacing them with their
+# printable representation
+UNENCODABLE_XML_UNICHRS: T.List[T.Tuple[int, int]] = [
+ (0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84),
+ (0x86, 0x9F), (0xFDD0, 0xFDEF), (0xFFFE, 0xFFFF)]
+# Not narrow build
+if sys.maxunicode >= 0x10000:
+ UNENCODABLE_XML_UNICHRS.extend([
+ (0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
+ (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
+ (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
+ (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
+ (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
+ (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
+ (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
+ (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)])
+UNENCODABLE_XML_CHR_RANGES = [fr'{chr(low)}-{chr(high)}' for (low, high) in UNENCODABLE_XML_UNICHRS]
+UNENCODABLE_XML_CHRS_RE = re.compile('([' + ''.join(UNENCODABLE_XML_CHR_RANGES) + '])')
+
+
def is_windows() -> bool:
platname = platform.system().lower()
return platname == 'windows'
@@ -1148,14 +1168,21 @@ class TestRunRust(TestRun):
TestRun.PROTOCOL_TO_CLASS[TestProtocol.RUST] = TestRunRust
+# Check unencodable characters in xml output and replace them with
+# their printable representation
+def replace_unencodable_xml_chars(original_str: str) -> str:
+ # [1:-1] is needed for removing `'` characters from both start and end
+ # of the string
+ replacement_lambda = lambda illegal_chr: repr(illegal_chr.group())[1:-1]
+ return UNENCODABLE_XML_CHRS_RE.sub(replacement_lambda, original_str)
def decode(stream: T.Union[None, bytes]) -> str:
if stream is None:
return ''
try:
- return stream.decode('utf-8')
+ return replace_unencodable_xml_chars(stream.decode('utf-8'))
except UnicodeDecodeError:
- return stream.decode('iso-8859-1', errors='ignore')
+ return replace_unencodable_xml_chars(stream.decode('iso-8859-1', errors='ignore'))
async def read_decode(reader: asyncio.StreamReader,
queue: T.Optional['asyncio.Queue[T.Optional[str]]'],
diff --git a/test cases/unit/110 replace unencodable xml chars/meson.build b/test cases/unit/110 replace unencodable xml chars/meson.build
new file mode 100644
index 000000000..2e6b1b793
--- /dev/null
+++ b/test cases/unit/110 replace unencodable xml chars/meson.build
@@ -0,0 +1,4 @@
+project('replace unencodable xml chars')
+
+test_script = find_program('script.py')
+test('main', test_script)
diff --git a/test cases/unit/110 replace unencodable xml chars/script.py b/test cases/unit/110 replace unencodable xml chars/script.py
new file mode 100644
index 000000000..2f2d4d67b
--- /dev/null
+++ b/test cases/unit/110 replace unencodable xml chars/script.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+
+import sys
+
+# Print base string(\nHello Meson\n) to see valid chars are not replaced
+print('\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n')
+# Print invalid input from all known unencodable chars
+print(
+ '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
+ '\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f')
+
+# Cover for potential encoding issues
+try:
+ print(
+ '\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
+ '\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
+ '\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
+ '\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
+ '\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
+ '\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
+except:
+ pass
+
+# Cover for potential encoding issues
+try:
+ if sys.maxunicode >= 0x10000:
+ print(
+ '\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
+ '\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
+ '\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
+ '\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
+ '\U0009fffe\U0009ffff\U000afffe\U000affff'
+ '\U000bfffe\U000bffff\U000cfffe\U000cffff'
+ '\U000dfffe\U000dffff\U000efffe\U000effff'
+ '\U000ffffe\U000fffff\U0010fffe\U0010ffff')
+except:
+ pass
diff --git a/unittests/allplatformstests.py b/unittests/allplatformstests.py
index 438e4fef5..db8a2f04b 100644
--- a/unittests/allplatformstests.py
+++ b/unittests/allplatformstests.py
@@ -59,6 +59,7 @@ from mesonbuild.linkers import linkers
from mesonbuild.dependencies.pkgconfig import PkgConfigDependency
from mesonbuild.build import Target, ConfigurationData, Executable, SharedLibrary, StaticLibrary
+from mesonbuild import mtest
import mesonbuild.modules.pkgconfig
from mesonbuild.scripts import destdir_join
@@ -398,6 +399,56 @@ class AllPlatformTests(BasePlatformTests):
self.assertTrue(compdb[3]['file'].endswith("libfile4.c"))
# FIXME: We don't have access to the linker command
+ def test_replace_unencodable_xml_chars(self):
+ '''
+ Test that unencodable xml chars are replaced with their
+ printable representation
+ https://github.com/mesonbuild/meson/issues/9894
+ '''
+ # Create base string(\nHello Meson\n) to see valid chars are not replaced
+ base_string_invalid = '\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n'
+ base_string_valid = '\nHello Meson\n'
+ # Create invalid input from all known unencodable chars
+ invalid_string = (
+ '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
+ '\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f'
+ '\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
+ '\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
+ '\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
+ '\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
+ '\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
+ '\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
+ if sys.maxunicode >= 0x10000:
+ invalid_string = invalid_string + (
+ '\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
+ '\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
+ '\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
+ '\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
+ '\U0009fffe\U0009ffff\U000afffe\U000affff'
+ '\U000bfffe\U000bffff\U000cfffe\U000cffff'
+ '\U000dfffe\U000dffff\U000efffe\U000effff'
+ '\U000ffffe\U000fffff\U0010fffe\U0010ffff')
+
+ valid_string = base_string_valid + repr(invalid_string)[1:-1] + base_string_valid
+ invalid_string = base_string_invalid + invalid_string + base_string_invalid
+ broken_xml_stream = invalid_string.encode()
+ decoded_broken_stream = mtest.decode(broken_xml_stream)
+ self.assertEqual(decoded_broken_stream, valid_string)
+
+ def test_replace_unencodable_xml_chars_unit(self):
+ '''
+ Test that unencodable xml chars are replaced with their
+ printable representation
+ https://github.com/mesonbuild/meson/issues/9894
+ '''
+ if not shutil.which('xmllint'):
+ raise SkipTest('xmllint not installed')
+ testdir = os.path.join(self.unit_test_dir, '110 replace unencodable xml chars')
+ self.init(testdir)
+ self.run_tests()
+ junit_xml_logs = Path(self.logdir, 'testlog.junit.xml')
+ subprocess.run(['xmllint', junit_xml_logs], check=True)
+
def test_run_target_files_path(self):
'''
Test that run_targets are run from the correct directory