From 676eb3f2dd542be3e84780b18388253382d3e465 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Apr 2014 23:00:51 +0200 Subject: [PATCH] Fix unicode_escape (Fixes #2695) --- test/test_utils.py | 4 ++++ youtube_dl/utils.py | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 2348c0415..51eb0b6b9 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -38,6 +38,7 @@ xpath_with_ns, parse_iso8601, strip_jsonp, + uppercase_escape, ) if sys.version_info < (3, 0): @@ -279,6 +280,9 @@ def test_strip_jsonp(self): d = json.loads(stripped) self.assertEqual(d, [{"id": "532cb", "x": 3}]) + def test_uppercase_escpae(self): + self.assertEqual(uppercase_escape(u'aä'), u'aä') + self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5f1f664c8..92fee966f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import calendar +import codecs import contextlib import ctypes import datetime @@ -1263,9 +1264,11 @@ def getslice(self, start=0, end=None): def uppercase_escape(s): + unicode_escape = codecs.getdecoder('unicode_escape') return re.sub( r'\\U[0-9a-fA-F]{8}', - lambda m: m.group(0).decode('unicode-escape'), s) + lambda m: unicode_escape(m.group(0))[0], + s) try: struct.pack(u'!I', 0)