From 348c11e4571b534efdbd58a575bbea979c880b2f Mon Sep 17 00:00:00 2001 From: Tim Eves Date: Wed, 1 Mar 2017 14:23:46 +0700 Subject: [PATCH] Fix decoding of USV greater than U+110000 Add test cases too --- src/inc/UtfCodec.h | 4 ++-- tests/utftest/utftest.cpp | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/inc/UtfCodec.h b/src/inc/UtfCodec.h index 3417bac..9dc760f 100644 --- a/src/inc/UtfCodec.h +++ b/src/inc/UtfCodec.h @@ -124,7 +124,7 @@ struct _utf_codec<8> private: static const int8 sz_lut[16]; static const byte mask_lut[5]; - + static const uchar_t limit = 0x110000; public: typedef uint8 codeunit_t; @@ -157,7 +157,7 @@ public: case 0: l = -1; return 0xFFFD; } - if (l != seq_sz || toolong) + if (l != seq_sz || toolong || u >= limit) { l = -l; return 0xFFFD; diff --git a/tests/utftest/utftest.cpp b/tests/utftest/utftest.cpp index 21cb188..a23553a 100644 --- a/tests/utftest/utftest.cpp +++ b/tests/utftest/utftest.cpp @@ -8,6 +8,9 @@ struct test8 unsigned char str[12]; }; struct test8 tests8[] = { + { 0, 0, {0xF4, 0x90, 0x80, 0x80, 0, 0, 0, 0, 0, 0, 0, 0} }, // bad(4) [U+110000] + { 0, 0, {0xC0, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, // bad(4) [U+110000] + { 0, 0, {0xA0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, // bad(4) [U+110000] { 4, -1, {0x7F, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0, 0} }, // U+7F, U+7FF, U+FFFF, U+10FFF { 2, 3, {0x7F, 0xDF, 0xBF, 0xF0, 0x8F, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} }, // U+7F, U+7FF, long(U+FFFF), U+10FFF { 1, 1, {0x7F, 0xE0, 0x9F, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} }, // U+7F, long(U+7FF), U+FFFF, U+10FFF -- 2.12.2