1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Support/ConvertUTF.h"
11 #include "gtest/gtest.h"
18 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
19 // Src is the look of disapproval.
20 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
21 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
23 bool Success = convertUTF16ToUTF8String(Ref, Result);
25 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
26 EXPECT_EQ(Expected, Result);
29 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
30 // Src is the look of disapproval.
31 static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
32 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
34 bool Success = convertUTF16ToUTF8String(Ref, Result);
36 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
37 EXPECT_EQ(Expected, Result);
40 TEST(ConvertUTFTest, OddLengthInput) {
42 bool Success = convertUTF16ToUTF8String(ArrayRef<char>("xxxxx", 5), Result);
43 EXPECT_FALSE(Success);
46 TEST(ConvertUTFTest, Empty) {
48 bool Success = convertUTF16ToUTF8String(ArrayRef<char>(), Result);
50 EXPECT_TRUE(Result.empty());
53 TEST(ConvertUTFTest, HasUTF16BOM) {
54 bool HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xff\xfe", 2));
56 HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff", 2));
58 HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff ", 3));
59 EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
60 HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff\x00asdf", 6));
63 HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>());
65 HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe", 1));
69 struct ConvertUTFResultContainer {
70 ConversionResult ErrorCode;
71 std::vector<unsigned> UnicodeScalars;
73 ConvertUTFResultContainer(ConversionResult ErrorCode)
74 : ErrorCode(ErrorCode) {}
76 ConvertUTFResultContainer
77 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
78 unsigned US2 = 0x110000, unsigned US3 = 0x110000,
79 unsigned US4 = 0x110000, unsigned US5 = 0x110000,
80 unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
81 ConvertUTFResultContainer Result(*this);
83 Result.UnicodeScalars.push_back(US0);
85 Result.UnicodeScalars.push_back(US1);
87 Result.UnicodeScalars.push_back(US2);
89 Result.UnicodeScalars.push_back(US3);
91 Result.UnicodeScalars.push_back(US4);
93 Result.UnicodeScalars.push_back(US5);
95 Result.UnicodeScalars.push_back(US6);
97 Result.UnicodeScalars.push_back(US7);
102 std::pair<ConversionResult, std::vector<unsigned>>
103 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
104 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
106 const UTF8 *SourceNext = SourceStart;
107 std::vector<UTF32> Decoded(S.size(), 0);
108 UTF32 *TargetStart = Decoded.data();
111 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
112 Decoded.data() + Decoded.size(), lenientConversion);
114 Decoded.resize(TargetStart - Decoded.data());
116 return std::make_pair(ErrorCode, Decoded);
119 std::pair<ConversionResult, std::vector<unsigned>>
120 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
121 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
123 const UTF8 *SourceNext = SourceStart;
124 std::vector<UTF32> Decoded(S.size(), 0);
125 UTF32 *TargetStart = Decoded.data();
127 auto ErrorCode = ConvertUTF8toUTF32Partial(
128 &SourceNext, SourceStart + S.size(), &TargetStart,
129 Decoded.data() + Decoded.size(), lenientConversion);
131 Decoded.resize(TargetStart - Decoded.data());
133 return std::make_pair(ErrorCode, Decoded);
136 ::testing::AssertionResult
137 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
138 StringRef S, bool Partial = false) {
139 ConversionResult ErrorCode;
140 std::vector<unsigned> Decoded;
142 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
145 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
146 if (Expected.ErrorCode != ErrorCode)
147 return ::testing::AssertionFailure() << "Expected error code "
148 << Expected.ErrorCode << ", actual "
151 if (Expected.UnicodeScalars != Decoded)
152 return ::testing::AssertionFailure()
153 << "Expected lenient decoded result:\n"
154 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
155 << "Actual result:\n" << ::testing::PrintToString(Decoded);
157 return ::testing::AssertionSuccess();
160 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
166 // U+0041 LATIN CAPITAL LETTER A
167 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
168 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
174 // U+0283 LATIN SMALL LETTER ESH
175 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
176 ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
179 // U+03BA GREEK SMALL LETTER KAPPA
180 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
181 // U+03C3 GREEK SMALL LETTER SIGMA
182 // U+03BC GREEK SMALL LETTER MU
183 // U+03B5 GREEK SMALL LETTER EPSILON
184 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
185 ConvertUTFResultContainer(conversionOK)
186 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
187 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
193 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
194 // U+6587 CJK UNIFIED IDEOGRAPH-6587
195 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
196 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
197 "\xe4\xbe\x8b\xe6\x96\x87"));
199 // U+D55C HANGUL SYLLABLE HAN
200 // U+AE00 HANGUL SYLLABLE GEUL
201 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
202 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
203 "\xed\x95\x9c\xea\xb8\x80"));
205 // U+1112 HANGUL CHOSEONG HIEUH
206 // U+1161 HANGUL JUNGSEONG A
207 // U+11AB HANGUL JONGSEONG NIEUN
208 // U+1100 HANGUL CHOSEONG KIYEOK
209 // U+1173 HANGUL JUNGSEONG EU
210 // U+11AF HANGUL JONGSEONG RIEUL
211 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
212 ConvertUTFResultContainer(conversionOK)
213 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
214 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
221 // U+E0100 VARIATION SELECTOR-17
222 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
223 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
224 "\xf3\xa0\x84\x80"));
227 // First possible sequence of a certain length
231 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
232 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
233 StringRef("\x00", 1)));
235 // U+0080 PADDING CHARACTER
236 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
237 ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
240 // U+0800 SAMARITAN LETTER ALAF
241 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
242 ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
245 // U+10000 LINEAR B SYLLABLE B008 A
246 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
247 ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
248 "\xf0\x90\x80\x80"));
250 // U+200000 (invalid)
251 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
252 ConvertUTFResultContainer(sourceIllegal)
253 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
254 "\xf8\x88\x80\x80\x80"));
256 // U+4000000 (invalid)
257 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
258 ConvertUTFResultContainer(sourceIllegal)
259 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
260 "\xfc\x84\x80\x80\x80\x80"));
263 // Last possible sequence of a certain length
267 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
268 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
270 // U+07FF (unassigned)
271 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
272 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
275 // U+FFFF (noncharacter)
276 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
277 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
280 // U+1FFFFF (invalid)
281 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
282 ConvertUTFResultContainer(sourceIllegal)
283 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
284 "\xf7\xbf\xbf\xbf"));
286 // U+3FFFFFF (invalid)
287 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
288 ConvertUTFResultContainer(sourceIllegal)
289 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
290 "\xfb\xbf\xbf\xbf\xbf"));
292 // U+7FFFFFFF (invalid)
293 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
294 ConvertUTFResultContainer(sourceIllegal)
295 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
296 "\xfd\xbf\xbf\xbf\xbf\xbf"));
299 // Other boundary conditions
302 // U+D7FF (unassigned)
303 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
304 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
307 // U+E000 (private use)
308 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
309 ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
312 // U+FFFD REPLACEMENT CHARACTER
313 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
314 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
317 // U+10FFFF (noncharacter)
318 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
319 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
320 "\xf4\x8f\xbf\xbf"));
322 // U+110000 (invalid)
323 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
324 ConvertUTFResultContainer(sourceIllegal)
325 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
326 "\xf4\x90\x80\x80"));
329 // Unexpected continuation bytes
332 // A sequence of unexpected continuation bytes that don't follow a first
333 // byte, every byte is a maximal subpart.
335 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
336 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
337 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
338 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
339 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
340 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
342 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
343 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
345 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
346 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
348 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
349 ConvertUTFResultContainer(sourceIllegal)
350 .withScalars(0xfffd, 0xfffd, 0xfffd),
352 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
353 ConvertUTFResultContainer(sourceIllegal)
354 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
355 "\x80\xbf\x80\xbf"));
356 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
357 ConvertUTFResultContainer(sourceIllegal)
358 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
359 "\x80\xbf\x82\xbf\xaa"));
360 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
361 ConvertUTFResultContainer(sourceIllegal)
362 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
363 "\xaa\xb0\xbb\xbf\xaa\xa0"));
364 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
365 ConvertUTFResultContainer(sourceIllegal)
366 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
367 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
369 // All continuation bytes (0x80--0xbf).
370 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
371 ConvertUTFResultContainer(sourceIllegal)
372 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
373 0xfffd, 0xfffd, 0xfffd, 0xfffd)
374 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
375 0xfffd, 0xfffd, 0xfffd, 0xfffd)
376 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
377 0xfffd, 0xfffd, 0xfffd, 0xfffd)
378 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
379 0xfffd, 0xfffd, 0xfffd, 0xfffd)
380 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
381 0xfffd, 0xfffd, 0xfffd, 0xfffd)
382 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
383 0xfffd, 0xfffd, 0xfffd, 0xfffd)
384 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
385 0xfffd, 0xfffd, 0xfffd, 0xfffd)
386 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
387 0xfffd, 0xfffd, 0xfffd, 0xfffd),
388 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
389 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
390 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
391 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
394 // Lonely start bytes
397 // Start bytes of 2-byte sequences (0xc0--0xdf).
398 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
399 ConvertUTFResultContainer(sourceIllegal)
400 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
401 0xfffd, 0xfffd, 0xfffd, 0xfffd)
402 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
403 0xfffd, 0xfffd, 0xfffd, 0xfffd)
404 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
405 0xfffd, 0xfffd, 0xfffd, 0xfffd)
406 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
407 0xfffd, 0xfffd, 0xfffd, 0xfffd),
408 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
409 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
411 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
412 ConvertUTFResultContainer(sourceIllegal)
413 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
414 0xfffd, 0x0020, 0xfffd, 0x0020)
415 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
416 0xfffd, 0x0020, 0xfffd, 0x0020)
417 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
418 0xfffd, 0x0020, 0xfffd, 0x0020)
419 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
420 0xfffd, 0x0020, 0xfffd, 0x0020)
421 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
422 0xfffd, 0x0020, 0xfffd, 0x0020)
423 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
424 0xfffd, 0x0020, 0xfffd, 0x0020)
425 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
426 0xfffd, 0x0020, 0xfffd, 0x0020)
427 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
428 0xfffd, 0x0020, 0xfffd, 0x0020),
429 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
430 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
431 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
432 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
434 // Start bytes of 3-byte sequences (0xe0--0xef).
435 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
436 ConvertUTFResultContainer(sourceIllegal)
437 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
438 0xfffd, 0xfffd, 0xfffd, 0xfffd)
439 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
440 0xfffd, 0xfffd, 0xfffd, 0xfffd),
441 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
443 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
444 ConvertUTFResultContainer(sourceIllegal)
445 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
446 0xfffd, 0x0020, 0xfffd, 0x0020)
447 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
448 0xfffd, 0x0020, 0xfffd, 0x0020)
449 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
450 0xfffd, 0x0020, 0xfffd, 0x0020)
451 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
452 0xfffd, 0x0020, 0xfffd, 0x0020),
453 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
454 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
456 // Start bytes of 4-byte sequences (0xf0--0xf7).
457 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
458 ConvertUTFResultContainer(sourceIllegal)
459 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
460 0xfffd, 0xfffd, 0xfffd, 0xfffd),
461 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
463 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
464 ConvertUTFResultContainer(sourceIllegal)
465 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
466 0xfffd, 0x0020, 0xfffd, 0x0020)
467 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
468 0xfffd, 0x0020, 0xfffd, 0x0020),
469 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
471 // Start bytes of 5-byte sequences (0xf8--0xfb).
472 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
473 ConvertUTFResultContainer(sourceIllegal)
474 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
475 "\xf8\xf9\xfa\xfb"));
477 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
478 ConvertUTFResultContainer(sourceIllegal)
479 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
480 0xfffd, 0x0020, 0xfffd, 0x0020),
481 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
483 // Start bytes of 6-byte sequences (0xfc--0xfd).
484 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
485 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
488 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
489 ConvertUTFResultContainer(sourceIllegal)
490 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
491 "\xfc\x20\xfd\x20"));
494 // Other bytes (0xc0--0xc1, 0xfe--0xff).
497 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
498 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
499 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
500 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
501 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
502 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
503 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
504 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
506 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
507 ConvertUTFResultContainer(sourceIllegal)
508 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
509 "\xc0\xc1\xfe\xff"));
511 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
512 ConvertUTFResultContainer(sourceIllegal)
513 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
514 "\xfe\xfe\xff\xff"));
516 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
517 ConvertUTFResultContainer(sourceIllegal)
518 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
519 "\xfe\x80\x80\x80\x80\x80"));
521 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
522 ConvertUTFResultContainer(sourceIllegal)
523 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
524 "\xff\x80\x80\x80\x80\x80"));
526 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
527 ConvertUTFResultContainer(sourceIllegal)
528 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
529 0xfffd, 0x0020, 0xfffd, 0x0020),
530 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
533 // Sequences with one continuation byte missing
536 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
537 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
538 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
539 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
540 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
541 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
543 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
544 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
546 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
547 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
549 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
550 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
552 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
553 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
555 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
556 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
558 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
559 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
561 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
562 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
564 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
565 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
567 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
568 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
570 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
571 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
573 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
574 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
576 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
577 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
579 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
580 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
583 // Overlong sequences with one trailing byte missing.
584 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
585 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
587 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
588 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
591 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
593 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
594 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
596 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
597 ConvertUTFResultContainer(sourceIllegal)
598 .withScalars(0xfffd, 0xfffd, 0xfffd),
600 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
601 ConvertUTFResultContainer(sourceIllegal)
602 .withScalars(0xfffd, 0xfffd, 0xfffd),
604 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
605 ConvertUTFResultContainer(sourceIllegal)
606 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
607 "\xf8\x80\x80\x80"));
608 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
609 ConvertUTFResultContainer(sourceIllegal)
610 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
611 "\xfc\x80\x80\x80\x80"));
613 // Sequences that represent surrogates with one trailing byte missing.
615 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
616 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
618 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
619 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
621 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
622 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
625 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
626 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
628 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
629 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
631 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
632 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
635 // Ill-formed 4-byte sequences.
636 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
637 // U+1100xx (invalid)
638 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
639 ConvertUTFResultContainer(sourceIllegal)
640 .withScalars(0xfffd, 0xfffd, 0xfffd),
642 // U+13FBxx (invalid)
643 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
644 ConvertUTFResultContainer(sourceIllegal)
645 .withScalars(0xfffd, 0xfffd, 0xfffd),
647 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
648 ConvertUTFResultContainer(sourceIllegal)
649 .withScalars(0xfffd, 0xfffd, 0xfffd),
651 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
652 ConvertUTFResultContainer(sourceIllegal)
653 .withScalars(0xfffd, 0xfffd, 0xfffd),
655 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
656 ConvertUTFResultContainer(sourceIllegal)
657 .withScalars(0xfffd, 0xfffd, 0xfffd),
659 // U+1FFBxx (invalid)
660 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
661 ConvertUTFResultContainer(sourceIllegal)
662 .withScalars(0xfffd, 0xfffd, 0xfffd),
665 // Ill-formed 5-byte sequences.
666 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
667 // U+2000xx (invalid)
668 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
669 ConvertUTFResultContainer(sourceIllegal)
670 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
671 "\xf8\x88\x80\x80"));
672 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
673 ConvertUTFResultContainer(sourceIllegal)
674 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
675 "\xf8\xbf\xbf\xbf"));
676 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
677 ConvertUTFResultContainer(sourceIllegal)
678 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
679 "\xf9\x80\x80\x80"));
680 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
681 ConvertUTFResultContainer(sourceIllegal)
682 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
683 "\xfa\x80\x80\x80"));
684 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
685 ConvertUTFResultContainer(sourceIllegal)
686 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
687 "\xfb\x80\x80\x80"));
688 // U+3FFFFxx (invalid)
689 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
690 ConvertUTFResultContainer(sourceIllegal)
691 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
692 "\xfb\xbf\xbf\xbf"));
694 // Ill-formed 6-byte sequences.
695 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
696 // U+40000xx (invalid)
697 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
698 ConvertUTFResultContainer(sourceIllegal)
699 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
700 "\xfc\x84\x80\x80\x80"));
701 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
702 ConvertUTFResultContainer(sourceIllegal)
703 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
704 "\xfc\xbf\xbf\xbf\xbf"));
705 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
706 ConvertUTFResultContainer(sourceIllegal)
707 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
708 "\xfd\x80\x80\x80\x80"));
709 // U+7FFFFFxx (invalid)
710 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
711 ConvertUTFResultContainer(sourceIllegal)
712 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
713 "\xfd\xbf\xbf\xbf\xbf"));
716 // Sequences with two continuation bytes missing
719 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
720 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
722 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
723 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
725 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
726 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
728 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
729 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
731 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
732 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
734 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
735 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
738 // Overlong sequences with two trailing byte missing.
739 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
740 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
741 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
742 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
744 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
745 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
747 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
748 ConvertUTFResultContainer(sourceIllegal)
749 .withScalars(0xfffd, 0xfffd, 0xfffd),
751 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
752 ConvertUTFResultContainer(sourceIllegal)
753 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
754 "\xfc\x80\x80\x80"));
756 // Sequences that represent surrogates with two trailing bytes missing.
757 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
758 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
760 // Ill-formed 4-byte sequences.
761 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
762 // U+110yxx (invalid)
763 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
764 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
766 // U+13Fyxx (invalid)
767 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
768 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
770 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
771 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
773 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
774 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
776 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
777 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
779 // U+1FFyxx (invalid)
780 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
781 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
784 // Ill-formed 5-byte sequences.
785 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
786 // U+200yxx (invalid)
787 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
788 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
790 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
791 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
793 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
794 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
796 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
797 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
799 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
800 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
802 // U+3FFFyxx (invalid)
803 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
804 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
807 // Ill-formed 6-byte sequences.
808 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
809 // U+4000yxx (invalid)
810 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
811 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
812 "\xfc\x84\x80\x80"));
813 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
814 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
815 "\xfc\xbf\xbf\xbf"));
816 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
817 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
818 "\xfd\x80\x80\x80"));
819 // U+7FFFFyxx (invalid)
820 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
821 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
822 "\xfd\xbf\xbf\xbf"));
825 // Sequences with three continuation bytes missing
828 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
829 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
830 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
831 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
832 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
833 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
834 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
835 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
836 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
837 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
839 // Broken overlong sequences.
840 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
841 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
842 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
843 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
845 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
846 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
849 // Ill-formed 4-byte sequences.
850 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
851 // U+14yyxx (invalid)
852 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
853 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
854 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
855 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
856 // U+1Cyyxx (invalid)
857 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
858 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
860 // Ill-formed 5-byte sequences.
861 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
862 // U+20yyxx (invalid)
863 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
864 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
866 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
867 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
869 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
870 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
872 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
873 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
875 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
876 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
878 // U+3FCyyxx (invalid)
879 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
880 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
883 // Ill-formed 6-byte sequences.
884 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
885 // U+400yyxx (invalid)
886 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
887 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
889 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
890 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
892 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
893 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
895 // U+7FFCyyxx (invalid)
896 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
897 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
901 // Sequences with four continuation bytes missing
904 // Ill-formed 5-byte sequences.
905 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
906 // U+uzyyxx (invalid)
907 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
908 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
909 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
910 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
911 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
912 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
913 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
914 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
915 // U+3zyyxx (invalid)
916 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
917 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
919 // Broken overlong sequences.
920 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
921 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
922 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
923 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
926 // Ill-formed 6-byte sequences.
927 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
928 // U+uzzyyxx (invalid)
929 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
930 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
932 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
933 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
935 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
936 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
938 // U+7Fzzyyxx (invalid)
939 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
940 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
944 // Sequences with five continuation bytes missing
947 // Ill-formed 6-byte sequences.
948 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
949 // U+uzzyyxx (invalid)
950 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
951 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
952 // U+uuzzyyxx (invalid)
953 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
954 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
957 // Consecutive sequences with trailing bytes missing
960 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
961 ConvertUTFResultContainer(sourceIllegal)
962 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
963 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
964 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
965 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
966 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
967 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
968 "\xc0" "\xe0\x80" "\xf0\x80\x80"
970 "\xfc\x80\x80\x80\x80"
971 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
973 "\xfd\xbf\xbf\xbf\xbf"));
976 // Overlong UTF-8 sequences
980 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
981 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
983 // Overlong sequences of the above.
984 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
985 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
987 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
988 ConvertUTFResultContainer(sourceIllegal)
989 .withScalars(0xfffd, 0xfffd, 0xfffd),
991 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
992 ConvertUTFResultContainer(sourceIllegal)
993 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
994 "\xf0\x80\x80\xaf"));
995 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
996 ConvertUTFResultContainer(sourceIllegal)
997 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
998 "\xf8\x80\x80\x80\xaf"));
999 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1000 ConvertUTFResultContainer(sourceIllegal)
1001 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1002 "\xfc\x80\x80\x80\x80\xaf"));
1005 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1006 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1007 StringRef("\x00", 1)));
1009 // Overlong sequences of the above.
1010 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1011 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1013 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1014 ConvertUTFResultContainer(sourceIllegal)
1015 .withScalars(0xfffd, 0xfffd, 0xfffd),
1017 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1018 ConvertUTFResultContainer(sourceIllegal)
1019 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1020 "\xf0\x80\x80\x80"));
1021 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1022 ConvertUTFResultContainer(sourceIllegal)
1023 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1024 "\xf8\x80\x80\x80\x80"));
1025 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1026 ConvertUTFResultContainer(sourceIllegal)
1027 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1028 "\xfc\x80\x80\x80\x80\x80"));
1030 // Other overlong sequences.
1031 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1032 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1034 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1035 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1037 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1038 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1040 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1041 ConvertUTFResultContainer(sourceIllegal)
1042 .withScalars(0xfffd, 0xfffd, 0xfffd),
1044 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1045 ConvertUTFResultContainer(sourceIllegal)
1046 .withScalars(0xfffd, 0xfffd, 0xfffd),
1048 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1049 ConvertUTFResultContainer(sourceIllegal)
1050 .withScalars(0xfffd, 0xfffd, 0xfffd),
1052 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1053 ConvertUTFResultContainer(sourceIllegal)
1054 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1055 "\xf0\x8f\x80\x80"));
1056 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1057 ConvertUTFResultContainer(sourceIllegal)
1058 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1059 "\xf0\x8f\xbf\xbf"));
1060 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1061 ConvertUTFResultContainer(sourceIllegal)
1062 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1063 "\xf8\x87\xbf\xbf\xbf"));
1064 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1065 ConvertUTFResultContainer(sourceIllegal)
1066 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1067 "\xfc\x83\xbf\xbf\xbf\xbf"));
1070 // Isolated surrogates
1075 // D71. High-surrogate code point: A Unicode code point in the range
1076 // U+D800 to U+DBFF.
1078 // D73. Low-surrogate code point: A Unicode code point in the range
1079 // U+DC00 to U+DFFF.
1081 // Note: U+E0100 is <DB40 DD00> in UTF16.
1086 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1087 ConvertUTFResultContainer(sourceIllegal)
1088 .withScalars(0xfffd, 0xfffd, 0xfffd),
1092 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1093 ConvertUTFResultContainer(sourceIllegal)
1094 .withScalars(0xfffd, 0xfffd, 0xfffd),
1098 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1099 ConvertUTFResultContainer(sourceIllegal)
1100 .withScalars(0xfffd, 0xfffd, 0xfffd),
1106 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1107 ConvertUTFResultContainer(sourceIllegal)
1108 .withScalars(0xfffd, 0xfffd, 0xfffd),
1112 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1113 ConvertUTFResultContainer(sourceIllegal)
1114 .withScalars(0xfffd, 0xfffd, 0xfffd),
1118 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1119 ConvertUTFResultContainer(sourceIllegal)
1120 .withScalars(0xfffd, 0xfffd, 0xfffd),
1126 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1127 ConvertUTFResultContainer(sourceIllegal)
1128 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1129 "\xed\xa0\x80\xed\xb0\x80"));
1132 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1133 ConvertUTFResultContainer(sourceIllegal)
1134 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1135 "\xed\xa0\x80\xed\xb4\x80"));
1138 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1139 ConvertUTFResultContainer(sourceIllegal)
1140 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1141 "\xed\xa0\x80\xed\xbf\xbf"));
1144 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1145 ConvertUTFResultContainer(sourceIllegal)
1146 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1147 "\xed\xac\xa0\xed\xb0\x80"));
1150 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1151 ConvertUTFResultContainer(sourceIllegal)
1152 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1153 "\xed\xac\xa0\xed\xb4\x80"));
1156 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1157 ConvertUTFResultContainer(sourceIllegal)
1158 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1159 "\xed\xac\xa0\xed\xbf\xbf"));
1162 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1163 ConvertUTFResultContainer(sourceIllegal)
1164 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1165 "\xed\xaf\xbf\xed\xb0\x80"));
1168 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1169 ConvertUTFResultContainer(sourceIllegal)
1170 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1171 "\xed\xaf\xbf\xed\xb4\x80"));
1174 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1175 ConvertUTFResultContainer(sourceIllegal)
1176 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1177 "\xed\xaf\xbf\xed\xbf\xbf"));
1185 // D14. Noncharacter: A code point that is permanently reserved for
1186 // internal use and that should never be interchanged. Noncharacters
1187 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1188 // and the values U+FDD0..U+FDEF.
1191 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1192 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1196 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1197 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1201 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1202 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1203 "\xf0\x9f\xbf\xbe"));
1206 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1207 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1208 "\xf0\x9f\xbf\xbf"));
1211 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1212 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1213 "\xf0\xaf\xbf\xbe"));
1216 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1217 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1218 "\xf0\xaf\xbf\xbf"));
1221 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1222 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1223 "\xf0\xbf\xbf\xbe"));
1226 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1227 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1228 "\xf0\xbf\xbf\xbf"));
1231 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1232 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1233 "\xf1\x8f\xbf\xbe"));
1236 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1237 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1238 "\xf1\x8f\xbf\xbf"));
1241 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1242 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1243 "\xf1\x9f\xbf\xbe"));
1246 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1247 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1248 "\xf1\x9f\xbf\xbf"));
1251 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1252 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1253 "\xf1\xaf\xbf\xbe"));
1256 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1257 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1258 "\xf1\xaf\xbf\xbf"));
1261 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1262 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1263 "\xf1\xbf\xbf\xbe"));
1266 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1267 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1268 "\xf1\xbf\xbf\xbf"));
1271 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1272 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1273 "\xf2\x8f\xbf\xbe"));
1276 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1277 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1278 "\xf2\x8f\xbf\xbf"));
1281 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1282 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1283 "\xf2\x9f\xbf\xbe"));
1286 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1287 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1288 "\xf2\x9f\xbf\xbf"));
1291 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1292 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1293 "\xf2\xaf\xbf\xbe"));
1296 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1297 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1298 "\xf2\xaf\xbf\xbf"));
1301 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1302 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1303 "\xf2\xbf\xbf\xbe"));
1306 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1307 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1308 "\xf2\xbf\xbf\xbf"));
1311 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1312 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1313 "\xf3\x8f\xbf\xbe"));
1316 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1317 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1318 "\xf3\x8f\xbf\xbf"));
1321 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1322 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1323 "\xf3\x9f\xbf\xbe"));
1326 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1327 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1328 "\xf3\x9f\xbf\xbf"));
1331 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1332 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1333 "\xf3\xaf\xbf\xbe"));
1336 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1337 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1338 "\xf3\xaf\xbf\xbf"));
1341 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1342 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1343 "\xf3\xbf\xbf\xbe"));
1346 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1347 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1348 "\xf3\xbf\xbf\xbf"));
1351 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1352 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1353 "\xf4\x8f\xbf\xbe"));
1356 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1357 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1358 "\xf4\x8f\xbf\xbf"));
1361 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1362 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1366 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1367 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1371 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1372 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1376 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1377 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1381 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1382 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1386 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1387 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1391 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1392 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1396 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1397 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1401 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1402 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1406 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1407 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1411 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1412 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1416 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1417 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1421 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1422 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1426 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1427 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1431 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1432 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1436 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1437 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1441 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1442 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1446 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1447 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1451 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1452 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1456 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1457 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1461 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1462 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1466 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1467 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1471 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1472 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1476 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1477 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1481 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1482 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1486 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1487 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1491 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1492 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1496 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1497 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1501 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1502 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1506 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1507 ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1511 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1512 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1516 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1517 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1521 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1522 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1526 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1527 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1531 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1532 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1536 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1537 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1541 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1542 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1546 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1547 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1551 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1552 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1556 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1557 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1561 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1562 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1566 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1567 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1571 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1572 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1576 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1577 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1581 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1582 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1586 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1587 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1591 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1592 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1596 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1597 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1601 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1602 // U+0041 LATIN CAPITAL LETTER A
1603 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1604 ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1608 // Sequences with one continuation byte missing
1611 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1612 ConvertUTFResultContainer(sourceExhausted),
1614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1615 ConvertUTFResultContainer(sourceExhausted),
1617 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1618 ConvertUTFResultContainer(sourceExhausted),
1620 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1621 ConvertUTFResultContainer(sourceExhausted),
1623 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1624 ConvertUTFResultContainer(sourceExhausted),
1626 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1627 ConvertUTFResultContainer(sourceExhausted),
1629 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1630 ConvertUTFResultContainer(sourceExhausted),
1632 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1633 ConvertUTFResultContainer(sourceExhausted),
1635 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1636 ConvertUTFResultContainer(sourceExhausted),
1638 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1639 ConvertUTFResultContainer(sourceExhausted),
1641 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1642 ConvertUTFResultContainer(sourceExhausted),
1643 "\xf0\x90\x80", true));
1644 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1645 ConvertUTFResultContainer(sourceExhausted),
1646 "\xf0\xbf\xbf", true));
1647 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1648 ConvertUTFResultContainer(sourceExhausted),
1649 "\xf1\x80\x80", true));
1650 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1651 ConvertUTFResultContainer(sourceExhausted),
1652 "\xf3\xbf\xbf", true));
1653 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1654 ConvertUTFResultContainer(sourceExhausted),
1655 "\xf4\x80\x80", true));
1656 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1657 ConvertUTFResultContainer(sourceExhausted),
1658 "\xf4\x8f\xbf", true));
1660 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1661 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),