Fix compatibility with Jython
This patch was taken from
https://github.com/yaml/pyyaml/issues/369#issuecomment-571596545,
authored by Pekka Klärck <peke@iki.fi>.
In short, Jython doesn't support lone surrogates, so importing yaml (and
in particular, loading `reader.py`) caused a UnicodeDecodeError. This
patch works around this through a clever use of `eval` to defer
evaluation of the string containing the lone surrogates, only doing it
on non-Jython platforms.
This is only done in `lib/yaml/reader.py` and not `lib3/yaml/reader.py`
because Jython does not support Python 3.
With this patch, Jython's behavior with respect to Unicode code points
over 0xFFFF becomes as it was before
0716ae21a1
. It still does not pass all the
unit tests on Jython (passes 1275, fails 3, errors on 1); all the
failing tests are related to unicode. Still, this is better than simply
crashing upon `import yaml`.
With this patch, all tests continue to pass on Python 2 / Python 3.
This commit is contained in:
parent
ee98abd7d7
commit
a60f7a19c0
|
@ -137,9 +137,14 @@ class Reader(object):
|
|||
self.update(1)
|
||||
|
||||
if has_ucs4:
|
||||
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
|
||||
NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]'
|
||||
elif sys.platform.startswith('java'):
|
||||
# Jython doesn't support lone surrogates https://bugs.jython.org/issue2048
|
||||
NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]'
|
||||
else:
|
||||
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)')
|
||||
# Need to use eval here due to the above Jython issue
|
||||
NON_PRINTABLE = eval(r"u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)'")
|
||||
NON_PRINTABLE = re.compile(NON_PRINTABLE)
|
||||
def check_printable(self, data):
|
||||
match = self.NON_PRINTABLE.search(data)
|
||||
if match:
|
||||
|
|
Loading…
Reference in New Issue