ICU 4.2.1
Main Page
Related Pages
Modules
Data Structures
Files
File List
Globals
All
Data Structures
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
common
unicode
utf_old.h
Go to the documentation of this file.
1
/*
2
*******************************************************************************
3
*
4
* Copyright (C) 2002-2008, International Business Machines
5
* Corporation and others. All Rights Reserved.
6
*
7
*******************************************************************************
8
* file name: utf.h
9
* encoding: US-ASCII
10
* tab size: 8 (not used)
11
* indentation:4
12
*
13
* created on: 2002sep21
14
* created by: Markus W. Scherer
15
*/
16
146
#ifndef __UTF_OLD_H__
147
#define __UTF_OLD_H__
148
149
#ifndef U_HIDE_DEPRECATED_API
150
151
/* utf.h must be included first. */
152
#ifndef __UTF_H__
153
# include "
unicode/utf.h
"
154
#endif
155
156
/* Formerly utf.h, part 1 --------------------------------------------------- */
157
158
#ifdef U_USE_UTF_DEPRECATES
159
166
typedef
int32_t
UTextOffset;
167
#endif
168
170
#define UTF_SIZE 16
171
178
#define UTF_SAFE
179
180
#undef UTF_UNSAFE
181
182
#undef UTF_STRICT
183
198
#define UTF8_ERROR_VALUE_1 0x15
199
205
#define UTF8_ERROR_VALUE_2 0x9f
206
213
#define UTF_ERROR_VALUE 0xffff
214
221
#define UTF_IS_ERROR(c) \
222
(((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
223
229
#define UTF_IS_VALID(c) \
230
(UTF_IS_UNICODE_CHAR(c) && \
231
(c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
232
237
#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
238
244
#define UTF_IS_UNICODE_NONCHAR(c) \
245
((c)>=0xfdd0 && \
246
((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
247
(uint32_t)(c)<=0x10ffff)
248
264
#define UTF_IS_UNICODE_CHAR(c) \
265
((uint32_t)(c)<0xd800 || \
266
((uint32_t)(c)>0xdfff && \
267
(uint32_t)(c)<=0x10ffff && \
268
!UTF_IS_UNICODE_NONCHAR(c)))
269
270
/* Formerly utf8.h ---------------------------------------------------------- */
271
276
#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
277
282
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
283
285
#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
286
287
#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
288
289
#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
290
292
#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
293
307
#if 1
308
# define UTF8_CHAR_LENGTH(c) \
309
((uint32_t)(c)<=0x7f ? 1 : \
310
((uint32_t)(c)<=0x7ff ? 2 : \
311
((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
312
) \
313
)
314
#else
315
# define UTF8_CHAR_LENGTH(c) \
316
((uint32_t)(c)<=0x7f ? 1 : \
317
((uint32_t)(c)<=0x7ff ? 2 : \
318
((uint32_t)(c)<=0xffff ? 3 : \
319
((uint32_t)(c)<=0x10ffff ? 4 : \
320
((uint32_t)(c)<=0x3ffffff ? 5 : \
321
((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
322
) \
323
) \
324
) \
325
) \
326
)
327
#endif
328
330
#define UTF8_MAX_CHAR_LENGTH 4
331
333
#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
334
336
#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
337
int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
338
UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
339
UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
340
}
341
343
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
344
int32_t _utf8_get_char_safe_index=(int32_t)(i); \
345
UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
346
UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
347
}
348
350
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
351
(c)=(s)[(i)++]; \
352
if((uint8_t)((c)-0xc0)<0x35) { \
353
uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
354
UTF8_MASK_LEAD_BYTE(c, __count); \
355
switch(__count) { \
356
/* each following branch falls through to the next one */
\
357
case 3: \
358
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
359
case 2: \
360
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
361
case 1: \
362
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
363
/* no other branches to optimize switch() */
\
364
break; \
365
} \
366
} \
367
}
368
370
#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
371
if((uint32_t)(c)<=0x7f) { \
372
(s)[(i)++]=(uint8_t)(c); \
373
} else { \
374
if((uint32_t)(c)<=0x7ff) { \
375
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
376
} else { \
377
if((uint32_t)(c)<=0xffff) { \
378
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
379
} else { \
380
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
381
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
382
} \
383
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
384
} \
385
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
386
} \
387
}
388
390
#define UTF8_FWD_1_UNSAFE(s, i) { \
391
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
392
}
393
395
#define UTF8_FWD_N_UNSAFE(s, i, n) { \
396
int32_t __N=(n); \
397
while(__N>0) { \
398
UTF8_FWD_1_UNSAFE(s, i); \
399
--__N; \
400
} \
401
}
402
404
#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
405
while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
406
}
407
409
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
410
(c)=(s)[(i)++]; \
411
if((c)>=0x80) { \
412
if(UTF8_IS_LEAD(c)) { \
413
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
414
} else { \
415
(c)=UTF8_ERROR_VALUE_1; \
416
} \
417
} \
418
}
419
421
#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \
422
if((uint32_t)(c)<=0x7f) { \
423
(s)[(i)++]=(uint8_t)(c); \
424
} else { \
425
(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
426
} \
427
}
428
430
#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
431
433
#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
434
436
#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
437
439
#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
440
(c)=(s)[--(i)]; \
441
if(UTF8_IS_TRAIL(c)) { \
442
uint8_t __b, __count=1, __shift=6; \
443
\
444
/* c is a trail byte */
\
445
(c)&=0x3f; \
446
for(;;) { \
447
__b=(s)[--(i)]; \
448
if(__b>=0xc0) { \
449
UTF8_MASK_LEAD_BYTE(__b, __count); \
450
(c)|=(UChar32)__b<<__shift; \
451
break; \
452
} else { \
453
(c)|=(UChar32)(__b&0x3f)<<__shift; \
454
++__count; \
455
__shift+=6; \
456
} \
457
} \
458
} \
459
}
460
462
#define UTF8_BACK_1_UNSAFE(s, i) { \
463
while(UTF8_IS_TRAIL((s)[--(i)])) {} \
464
}
465
467
#define UTF8_BACK_N_UNSAFE(s, i, n) { \
468
int32_t __N=(n); \
469
while(__N>0) { \
470
UTF8_BACK_1_UNSAFE(s, i); \
471
--__N; \
472
} \
473
}
474
476
#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
477
UTF8_BACK_1_UNSAFE(s, i); \
478
UTF8_FWD_1_UNSAFE(s, i); \
479
}
480
482
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
483
(c)=(s)[--(i)]; \
484
if((c)>=0x80) { \
485
if((c)<=0xbf) { \
486
(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
487
} else { \
488
(c)=UTF8_ERROR_VALUE_1; \
489
} \
490
} \
491
}
492
494
#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
495
497
#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
498
500
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
501
502
/* Formerly utf16.h --------------------------------------------------------- */
503
505
#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
506
508
#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
509
511
#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
512
514
#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
515
517
#define UTF16_GET_PAIR_VALUE(first, second) \
518
(((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
519
521
#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
522
524
#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
525
527
#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
528
530
#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
531
533
#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
534
536
#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
537
539
#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
540
542
#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
543
545
#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
546
548
#define UTF16_MAX_CHAR_LENGTH 2
549
551
#define UTF16_ARRAY_SIZE(size) (size)
552
564
#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
565
(c)=(s)[i]; \
566
if(UTF_IS_SURROGATE(c)) { \
567
if(UTF_IS_SURROGATE_FIRST(c)) { \
568
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
569
} else { \
570
(c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
571
} \
572
} \
573
}
574
576
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
577
(c)=(s)[i]; \
578
if(UTF_IS_SURROGATE(c)) { \
579
uint16_t __c2; \
580
if(UTF_IS_SURROGATE_FIRST(c)) { \
581
if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
582
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
583
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
584
} else if(strict) {\
585
/* unmatched first surrogate */
\
586
(c)=UTF_ERROR_VALUE; \
587
} \
588
} else { \
589
if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
590
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
591
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
592
} else if(strict) {\
593
/* unmatched second surrogate */
\
594
(c)=UTF_ERROR_VALUE; \
595
} \
596
} \
597
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
598
(c)=UTF_ERROR_VALUE; \
599
} \
600
}
601
603
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
604
(c)=(s)[(i)++]; \
605
if(UTF_IS_FIRST_SURROGATE(c)) { \
606
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
607
} \
608
}
609
611
#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
612
if((uint32_t)(c)<=0xffff) { \
613
(s)[(i)++]=(uint16_t)(c); \
614
} else { \
615
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
616
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
617
} \
618
}
619
621
#define UTF16_FWD_1_UNSAFE(s, i) { \
622
if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
623
++(i); \
624
} \
625
}
626
628
#define UTF16_FWD_N_UNSAFE(s, i, n) { \
629
int32_t __N=(n); \
630
while(__N>0) { \
631
UTF16_FWD_1_UNSAFE(s, i); \
632
--__N; \
633
} \
634
}
635
637
#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
638
if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
639
--(i); \
640
} \
641
}
642
644
#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
645
(c)=(s)[(i)++]; \
646
if(UTF_IS_FIRST_SURROGATE(c)) { \
647
uint16_t __c2; \
648
if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
649
++(i); \
650
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
651
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
652
} else if(strict) {\
653
/* unmatched first surrogate */
\
654
(c)=UTF_ERROR_VALUE; \
655
} \
656
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
657
/* unmatched second surrogate or other non-character */
\
658
(c)=UTF_ERROR_VALUE; \
659
} \
660
}
661
663
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
664
if((uint32_t)(c)<=0xffff) { \
665
(s)[(i)++]=(uint16_t)(c); \
666
} else if((uint32_t)(c)<=0x10ffff) { \
667
if((i)+1<(length)) { \
668
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
669
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
670
} else
/* not enough space */
{ \
671
(s)[(i)++]=UTF_ERROR_VALUE; \
672
} \
673
} else
/* c>0x10ffff, write error value */
{ \
674
(s)[(i)++]=UTF_ERROR_VALUE; \
675
} \
676
}
677
679
#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
680
682
#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
683
685
#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
686
688
#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
689
(c)=(s)[--(i)]; \
690
if(UTF_IS_SECOND_SURROGATE(c)) { \
691
(c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
692
} \
693
}
694
696
#define UTF16_BACK_1_UNSAFE(s, i) { \
697
if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
698
--(i); \
699
} \
700
}
701
703
#define UTF16_BACK_N_UNSAFE(s, i, n) { \
704
int32_t __N=(n); \
705
while(__N>0) { \
706
UTF16_BACK_1_UNSAFE(s, i); \
707
--__N; \
708
} \
709
}
710
712
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
713
if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
714
++(i); \
715
} \
716
}
717
719
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
720
(c)=(s)[--(i)]; \
721
if(UTF_IS_SECOND_SURROGATE(c)) { \
722
uint16_t __c2; \
723
if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
724
--(i); \
725
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
726
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
727
} else if(strict) {\
728
/* unmatched second surrogate */
\
729
(c)=UTF_ERROR_VALUE; \
730
} \
731
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
732
/* unmatched first surrogate or other non-character */
\
733
(c)=UTF_ERROR_VALUE; \
734
} \
735
}
736
738
#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
739
741
#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
742
744
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
745
746
/* Formerly utf32.h --------------------------------------------------------- */
747
748
/*
749
* Old documentation:
750
*
751
* This file defines macros to deal with UTF-32 code units and code points.
752
* Signatures and semantics are the same as for the similarly named macros
753
* in utf16.h.
754
* utf32.h is included by utf.h after unicode/umachine.h</p>
755
* and some common definitions.
756
* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
757
* Compound statements (curly braces {}) must be used for if-else-while...
758
* bodies and all macro statements should be terminated with semicolon.</p>
759
*/
760
761
/* internal definitions ----------------------------------------------------- */
762
764
#define UTF32_IS_SAFE(c, strict) \
765
(!(strict) ? \
766
(uint32_t)(c)<=0x10ffff : \
767
UTF_IS_UNICODE_CHAR(c))
768
769
/*
770
* For the semantics of all of these macros, see utf16.h.
771
* The UTF-32 versions are trivial because any code point is
772
* encoded using exactly one code unit.
773
*/
774
775
/* single-code point definitions -------------------------------------------- */
776
777
/* classes of code unit values */
778
780
#define UTF32_IS_SINGLE(uchar) 1
781
782
#define UTF32_IS_LEAD(uchar) 0
783
784
#define UTF32_IS_TRAIL(uchar) 0
785
786
/* number of code units per code point */
787
789
#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
790
791
#define UTF32_CHAR_LENGTH(c) 1
792
793
#define UTF32_MAX_CHAR_LENGTH 1
794
795
/* average number of code units compared to UTF-16 */
796
798
#define UTF32_ARRAY_SIZE(size) (size)
799
801
#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
802
(c)=(s)[i]; \
803
}
804
806
#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
807
(c)=(s)[i]; \
808
if(!UTF32_IS_SAFE(c, strict)) { \
809
(c)=UTF_ERROR_VALUE; \
810
} \
811
}
812
813
/* definitions with forward iteration --------------------------------------- */
814
816
#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
817
(c)=(s)[(i)++]; \
818
}
819
821
#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
822
(s)[(i)++]=(c); \
823
}
824
826
#define UTF32_FWD_1_UNSAFE(s, i) { \
827
++(i); \
828
}
829
831
#define UTF32_FWD_N_UNSAFE(s, i, n) { \
832
(i)+=(n); \
833
}
834
836
#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
837
}
838
840
#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
841
(c)=(s)[(i)++]; \
842
if(!UTF32_IS_SAFE(c, strict)) { \
843
(c)=UTF_ERROR_VALUE; \
844
} \
845
}
846
848
#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
849
if((uint32_t)(c)<=0x10ffff) { \
850
(s)[(i)++]=(c); \
851
} else
/* c>0x10ffff, write 0xfffd */
{ \
852
(s)[(i)++]=0xfffd; \
853
} \
854
}
855
857
#define UTF32_FWD_1_SAFE(s, i, length) { \
858
++(i); \
859
}
860
862
#define UTF32_FWD_N_SAFE(s, i, length, n) { \
863
if(((i)+=(n))>(length)) { \
864
(i)=(length); \
865
} \
866
}
867
869
#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
870
}
871
872
/* definitions with backward iteration -------------------------------------- */
873
875
#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
876
(c)=(s)[--(i)]; \
877
}
878
880
#define UTF32_BACK_1_UNSAFE(s, i) { \
881
--(i); \
882
}
883
885
#define UTF32_BACK_N_UNSAFE(s, i, n) { \
886
(i)-=(n); \
887
}
888
890
#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
891
}
892
894
#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
895
(c)=(s)[--(i)]; \
896
if(!UTF32_IS_SAFE(c, strict)) { \
897
(c)=UTF_ERROR_VALUE; \
898
} \
899
}
900
902
#define UTF32_BACK_1_SAFE(s, start, i) { \
903
--(i); \
904
}
905
907
#define UTF32_BACK_N_SAFE(s, start, i, n) { \
908
(i)-=(n); \
909
if((i)<(start)) { \
910
(i)=(start); \
911
} \
912
}
913
915
#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
916
}
917
918
/* Formerly utf.h, part 2 --------------------------------------------------- */
919
925
#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
926
928
#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
929
931
#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
932
933
935
#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
936
938
#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
939
940
942
#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
943
945
#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
946
947
949
#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
950
952
#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
953
954
956
#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
957
959
#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
960
961
963
#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
964
966
#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
967
968
970
#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
971
973
#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
974
975
977
#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
978
980
#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
981
982
984
#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
985
987
#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
988
989
991
#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
992
994
#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
995
996
/* Define default macros (UTF-16 "safe") ------------------------------------ */
997
1003
#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
1004
1010
#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
1011
1017
#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
1018
1024
#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
1025
1031
#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
1032
1038
#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
1039
1049
#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
1050
1062
#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
1063
1075
#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
1076
1086
#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
1087
1097
#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
1098
1113
#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
1114
1126
#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
1127
1139
#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
1140
1152
#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
1153
1168
#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
1169
1170
#endif
/* U_HIDE_DEPRECATED_API */
1171
1172
#endif
1173
utf.h
C API: Code point macros.
int32_t
signed int int32_t
Define 64 bit limits.
Definition:
pwin32.h:143
Generated by
1.8.5