2 * Part of the D programming language runtime library.
6 * Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com
7 * Written by Walter Bright
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the authors be held liable for any damages
11 * arising from the use of this software.
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute it
15 * freely, in both source and binary form, subject to the following
18 * o The origin of this software must not be misrepresented; you must not
19 * claim that you wrote the original software. If you use this software
20 * in a product, an acknowledgment in the product documentation would be
21 * appreciated but is not required.
22 * o Altered source versions must be plainly marked as such, and must not
23 * be misrepresented as being the original software.
24 * o This notice may not be removed or altered from any source
29 * Modified by Sean Kelly for use with the D Runtime Project
34 /* This code handles decoding UTF strings for foreach_reverse loops.
35 * There are 6 combinations of conversions between char, wchar,
36 * and dchar, and 2 of each of those.
39 private import util.utf;
41 /**********************************************/
42 /* 1 argument versions */
44 // dg is D, but _aApplyRcd() is C
45 extern (D) typedef int delegate(void *) dg_t;
47 extern (C) int _aApplyRcd1(in char[] aa, dg_t dg)
50 debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length);
51 for (size_t i = aa.length; i != 0; )
57 { char c = cast(char)d;
61 while ((c & 0xC0) != 0xC0)
63 onUnicodeError("Invalid UTF-8 sequence", 0);
72 result = dg(cast(void *)&d);
81 debug(apply) printf("_aApplyRcd1.unittest\n");
86 foreach_reverse(dchar d; s)
90 case 0: assert(d == 'o'); break;
91 case 1: assert(d == 'l'); break;
92 case 2: assert(d == 'l'); break;
93 case 3: assert(d == 'e'); break;
94 case 4: assert(d == 'h'); break;
101 s = "a\u1234\U00100456b";
103 foreach_reverse(dchar d; s)
105 //printf("i = %d, d = %x\n", i, d);
108 case 0: assert(d == 'b'); break;
109 case 1: assert(d == '\U00100456'); break;
110 case 2: assert(d == '\u1234'); break;
111 case 3: assert(d == 'a'); break;
119 /*****************************/
121 extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg)
124 debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length);
125 for (size_t i = aa.length; i != 0; )
130 if (d >= 0xDC00 && d <= 0xDFFF)
132 onUnicodeError("Invalid UTF-16 sequence", 0);
134 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
136 result = dg(cast(void *)&d);
145 debug(apply) printf("_aApplyRwd1.unittest\n");
150 foreach_reverse(dchar d; s)
154 case 0: assert(d == 'o'); break;
155 case 1: assert(d == 'l'); break;
156 case 2: assert(d == 'l'); break;
157 case 3: assert(d == 'e'); break;
158 case 4: assert(d == 'h'); break;
165 s = "a\u1234\U00100456b";
167 foreach_reverse(dchar d; s)
169 //printf("i = %d, d = %x\n", i, d);
172 case 0: assert(d == 'b'); break;
173 case 1: assert(d == '\U00100456'); break;
174 case 2: assert(d == '\u1234'); break;
175 case 3: assert(d == 'a'); break;
183 /*****************************/
185 extern (C) int _aApplyRcw1(in char[] aa, dg_t dg)
188 debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length);
189 for (size_t i = aa.length; i != 0; )
196 { char c = cast(char)w;
200 while ((c & 0xC0) != 0xC0)
202 onUnicodeError("Invalid UTF-8 sequence", 0);
204 d |= (c & 0x3F) << j;
215 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
216 result = dg(cast(void *)&w);
219 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
222 result = dg(cast(void *)&w);
231 debug(apply) printf("_aApplyRcw1.unittest\n");
236 foreach_reverse(wchar d; s)
240 case 0: assert(d == 'o'); break;
241 case 1: assert(d == 'l'); break;
242 case 2: assert(d == 'l'); break;
243 case 3: assert(d == 'e'); break;
244 case 4: assert(d == 'h'); break;
251 s = "a\u1234\U00100456b";
253 foreach_reverse(wchar d; s)
255 //printf("i = %d, d = %x\n", i, d);
258 case 0: assert(d == 'b'); break;
259 case 1: assert(d == 0xDBC1); break;
260 case 2: assert(d == 0xDC56); break;
261 case 3: assert(d == 0x1234); break;
262 case 4: assert(d == 'a'); break;
270 /*****************************/
272 extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg)
275 debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length);
276 for (size_t i = aa.length; i != 0; )
282 if (d >= 0xDC00 && d <= 0xDFFF)
284 onUnicodeError("Invalid UTF-16 sequence", 0);
286 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
293 auto b = toUTF8(buf, d);
296 result = dg(cast(void *)&c2);
303 result = dg(cast(void *)&c);
312 debug(apply) printf("_aApplyRwc1.unittest\n");
317 foreach_reverse(char d; s)
321 case 0: assert(d == 'o'); break;
322 case 1: assert(d == 'l'); break;
323 case 2: assert(d == 'l'); break;
324 case 3: assert(d == 'e'); break;
325 case 4: assert(d == 'h'); break;
332 s = "a\u1234\U00100456b";
334 foreach_reverse(char d; s)
336 //printf("i = %d, d = %x\n", i, d);
339 case 0: assert(d == 'b'); break;
340 case 1: assert(d == 0xF4); break;
341 case 2: assert(d == 0x80); break;
342 case 3: assert(d == 0x91); break;
343 case 4: assert(d == 0x96); break;
344 case 5: assert(d == 0xE1); break;
345 case 6: assert(d == 0x88); break;
346 case 7: assert(d == 0xB4); break;
347 case 8: assert(d == 'a'); break;
355 /*****************************/
357 extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg)
360 debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length);
361 for (size_t i = aa.length; i != 0;)
369 auto b = toUTF8(buf, d);
372 result = dg(cast(void *)&c2);
382 result = dg(cast(void *)&c);
391 debug(apply) printf("_aApplyRdc1.unittest\n");
396 foreach_reverse(char d; s)
400 case 0: assert(d == 'o'); break;
401 case 1: assert(d == 'l'); break;
402 case 2: assert(d == 'l'); break;
403 case 3: assert(d == 'e'); break;
404 case 4: assert(d == 'h'); break;
411 s = "a\u1234\U00100456b";
413 foreach_reverse(char d; s)
415 //printf("i = %d, d = %x\n", i, d);
418 case 0: assert(d == 'b'); break;
419 case 1: assert(d == 0xF4); break;
420 case 2: assert(d == 0x80); break;
421 case 3: assert(d == 0x91); break;
422 case 4: assert(d == 0x96); break;
423 case 5: assert(d == 0xE1); break;
424 case 6: assert(d == 0x88); break;
425 case 7: assert(d == 0xB4); break;
426 case 8: assert(d == 'a'); break;
434 /*****************************/
436 extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg)
439 debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length);
440 for (size_t i = aa.length; i != 0; )
448 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
449 result = dg(cast(void *)&w);
452 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
454 result = dg(cast(void *)&w);
463 debug(apply) printf("_aApplyRdw1.unittest\n");
468 foreach_reverse(wchar d; s)
472 case 0: assert(d == 'o'); break;
473 case 1: assert(d == 'l'); break;
474 case 2: assert(d == 'l'); break;
475 case 3: assert(d == 'e'); break;
476 case 4: assert(d == 'h'); break;
483 s = "a\u1234\U00100456b";
485 foreach_reverse(wchar d; s)
487 //printf("i = %d, d = %x\n", i, d);
490 case 0: assert(d == 'b'); break;
491 case 1: assert(d == 0xDBC1); break;
492 case 2: assert(d == 0xDC56); break;
493 case 3: assert(d == 0x1234); break;
494 case 4: assert(d == 'a'); break;
503 /****************************************************************************/
504 /* 2 argument versions */
506 // dg is D, but _aApplyRcd2() is C
507 extern (D) typedef int delegate(void *, void *) dg2_t;
509 extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg)
512 size_t len = aa.length;
514 debug(apply) printf("_aApplyRcd2(), len = %d\n", len);
515 for (i = len; i != 0; )
521 { char c = cast(char)d;
525 while ((c & 0xC0) != 0xC0)
527 onUnicodeError("Invalid UTF-8 sequence", 0);
529 d |= (c & 0x3F) << j;
536 result = dg(&i, cast(void *)&d);
545 debug(apply) printf("_aApplyRcd2.unittest\n");
550 foreach_reverse(k, dchar d; s)
555 case 0: assert(d == 'o'); break;
556 case 1: assert(d == 'l'); break;
557 case 2: assert(d == 'l'); break;
558 case 3: assert(d == 'e'); break;
559 case 4: assert(d == 'h'); break;
566 s = "a\u1234\U00100456b";
568 foreach_reverse(k, dchar d; s)
570 //printf("i = %d, k = %d, d = %x\n", i, k, d);
573 case 0: assert(d == 'b'); assert(k == 8); break;
574 case 1: assert(d == '\U00100456'); assert(k == 4); break;
575 case 2: assert(d == '\u1234'); assert(k == 1); break;
576 case 3: assert(d == 'a'); assert(k == 0); break;
584 /*****************************/
586 extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg)
589 debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length);
590 for (size_t i = aa.length; i != 0; )
595 if (d >= 0xDC00 && d <= 0xDFFF)
597 onUnicodeError("Invalid UTF-16 sequence", 0);
599 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
601 result = dg(&i, cast(void *)&d);
610 debug(apply) printf("_aApplyRwd2.unittest\n");
615 foreach_reverse(k, dchar d; s)
617 //printf("i = %d, k = %d, d = %x\n", i, k, d);
621 case 0: assert(d == 'o'); break;
622 case 1: assert(d == 'l'); break;
623 case 2: assert(d == 'l'); break;
624 case 3: assert(d == 'e'); break;
625 case 4: assert(d == 'h'); break;
632 s = "a\u1234\U00100456b";
634 foreach_reverse(k, dchar d; s)
636 //printf("i = %d, k = %d, d = %x\n", i, k, d);
639 case 0: assert(k == 4); assert(d == 'b'); break;
640 case 1: assert(k == 2); assert(d == '\U00100456'); break;
641 case 2: assert(k == 1); assert(d == '\u1234'); break;
642 case 3: assert(k == 0); assert(d == 'a'); break;
650 /*****************************/
652 extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg)
655 debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length);
656 for (size_t i = aa.length; i != 0; )
663 { char c = cast(char)w;
667 while ((c & 0xC0) != 0xC0)
669 onUnicodeError("Invalid UTF-8 sequence", 0);
671 d |= (c & 0x3F) << j;
682 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
683 result = dg(&i, cast(void *)&w);
686 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
689 result = dg(&i, cast(void *)&w);
698 debug(apply) printf("_aApplyRcw2.unittest\n");
703 foreach_reverse(k, wchar d; s)
705 //printf("i = %d, k = %d, d = %x\n", i, k, d);
709 case 0: assert(d == 'o'); break;
710 case 1: assert(d == 'l'); break;
711 case 2: assert(d == 'l'); break;
712 case 3: assert(d == 'e'); break;
713 case 4: assert(d == 'h'); break;
720 s = "a\u1234\U00100456b";
722 foreach_reverse(k, wchar d; s)
724 //printf("i = %d, k = %d, d = %x\n", i, k, d);
727 case 0: assert(k == 8); assert(d == 'b'); break;
728 case 1: assert(k == 4); assert(d == 0xDBC1); break;
729 case 2: assert(k == 4); assert(d == 0xDC56); break;
730 case 3: assert(k == 1); assert(d == 0x1234); break;
731 case 4: assert(k == 0); assert(d == 'a'); break;
739 /*****************************/
741 extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg)
744 debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length);
745 for (size_t i = aa.length; i != 0; )
751 if (d >= 0xDC00 && d <= 0xDFFF)
753 onUnicodeError("Invalid UTF-16 sequence", 0);
755 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
762 auto b = toUTF8(buf, d);
765 result = dg(&i, cast(void *)&c2);
772 result = dg(&i, cast(void *)&c);
781 debug(apply) printf("_aApplyRwc2.unittest\n");
786 foreach_reverse(k, char d; s)
788 //printf("i = %d, k = %d, d = %x\n", i, k, d);
792 case 0: assert(d == 'o'); break;
793 case 1: assert(d == 'l'); break;
794 case 2: assert(d == 'l'); break;
795 case 3: assert(d == 'e'); break;
796 case 4: assert(d == 'h'); break;
803 s = "a\u1234\U00100456b";
805 foreach_reverse(k, char d; s)
807 //printf("i = %d, k = %d, d = %x\n", i, k, d);
810 case 0: assert(k == 4); assert(d == 'b'); break;
811 case 1: assert(k == 2); assert(d == 0xF4); break;
812 case 2: assert(k == 2); assert(d == 0x80); break;
813 case 3: assert(k == 2); assert(d == 0x91); break;
814 case 4: assert(k == 2); assert(d == 0x96); break;
815 case 5: assert(k == 1); assert(d == 0xE1); break;
816 case 6: assert(k == 1); assert(d == 0x88); break;
817 case 7: assert(k == 1); assert(d == 0xB4); break;
818 case 8: assert(k == 0); assert(d == 'a'); break;
826 /*****************************/
828 extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg)
831 debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length);
832 for (size_t i = aa.length; i != 0; )
840 auto b = toUTF8(buf, d);
843 result = dg(&i, cast(void *)&c2);
852 result = dg(&i, cast(void *)&c);
861 debug(apply) printf("_aApplyRdc2.unittest\n");
866 foreach_reverse(k, char d; s)
868 //printf("i = %d, k = %d, d = %x\n", i, k, d);
872 case 0: assert(d == 'o'); break;
873 case 1: assert(d == 'l'); break;
874 case 2: assert(d == 'l'); break;
875 case 3: assert(d == 'e'); break;
876 case 4: assert(d == 'h'); break;
883 s = "a\u1234\U00100456b";
885 foreach_reverse(k, char d; s)
887 //printf("i = %d, k = %d, d = %x\n", i, k, d);
890 case 0: assert(k == 3); assert(d == 'b'); break;
891 case 1: assert(k == 2); assert(d == 0xF4); break;
892 case 2: assert(k == 2); assert(d == 0x80); break;
893 case 3: assert(k == 2); assert(d == 0x91); break;
894 case 4: assert(k == 2); assert(d == 0x96); break;
895 case 5: assert(k == 1); assert(d == 0xE1); break;
896 case 6: assert(k == 1); assert(d == 0x88); break;
897 case 7: assert(k == 1); assert(d == 0xB4); break;
898 case 8: assert(k == 0); assert(d == 'a'); break;
906 /*****************************/
908 extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg)
911 debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length);
912 for (size_t i = aa.length; i != 0; )
920 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
921 result = dg(&i, cast(void *)&w);
924 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
926 result = dg(&i, cast(void *)&w);
935 debug(apply) printf("_aApplyRdw2.unittest\n");
940 foreach_reverse(k, wchar d; s)
942 //printf("i = %d, k = %d, d = %x\n", i, k, d);
946 case 0: assert(d == 'o'); break;
947 case 1: assert(d == 'l'); break;
948 case 2: assert(d == 'l'); break;
949 case 3: assert(d == 'e'); break;
950 case 4: assert(d == 'h'); break;
957 s = "a\u1234\U00100456b";
959 foreach_reverse(k, wchar d; s)
961 //printf("i = %d, k = %d, d = %x\n", i, k, d);
964 case 0: assert(k == 3); assert(d == 'b'); break;
965 case 1: assert(k == 2); assert(d == 0xDBC1); break;
966 case 2: assert(k == 2); assert(d == 0xDC56); break;
967 case 3: assert(k == 1); assert(d == 0x1234); break;
968 case 4: assert(k == 0); assert(d == 'a'); break;