ICU 57.1  57.1
uchar.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1997-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File UCHAR.H
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 04/02/97 aliu Creation.
13 * 03/29/99 helena Updated for C APIs.
14 * 4/15/99 Madhu Updated for C Implementation and Javadoc
15 * 5/20/99 Madhu Added the function u_getVersion()
16 * 8/19/1999 srl Upgraded scripts to Unicode 3.0
17 * 8/27/1999 schererm UCharDirection constants: U_...
18 * 11/11/1999 weiv added u_isalnum(), cleaned comments
19 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
20 ******************************************************************************
21 */
22 
23 #ifndef UCHAR_H
24 #define UCHAR_H
25 
26 #include "unicode/utypes.h"
27 
29 
30 /*==========================================================================*/
31 /* Unicode version number */
32 /*==========================================================================*/
42 #define U_UNICODE_VERSION "8.0"
43 
124 #define UCHAR_MIN_VALUE 0
125 
134 #define UCHAR_MAX_VALUE 0x10ffff
135 
140 #define U_MASK(x) ((uint32_t)1<<(x))
141 
161 typedef enum UProperty {
162  /*
163  * Note: UProperty constants are parsed by preparseucd.py.
164  * It matches lines like
165  * UCHAR_<Unicode property name>=<integer>,
166  */
167 
168  /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
169  debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
170  rather than UCHAR_BINARY_START. Likewise for other *_START
171  identifiers. */
172 
400 #ifndef U_HIDE_DRAFT_API
401 
429 #endif /* U_HIDE_DRAFT_API */
430 
432 
440  UCHAR_BLOCK=0x1001,
468  UCHAR_SCRIPT=0x100A,
520 
534 
542 
545  UCHAR_AGE=0x4000,
554 #ifndef U_HIDE_DEPRECATED_API
555 
558 #endif /* U_HIDE_DEPRECATED_API */
559 
564  UCHAR_NAME=0x4005,
580 #ifndef U_HIDE_DEPRECATED_API
581 
586 #endif /* U_HIDE_DEPRECATED_API */
587 
595 
609 } UProperty;
610 
616 typedef enum UCharCategory
617 {
618  /*
619  * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
620  * It matches pairs of lines like
621  * / ** <Unicode 2-letter General_Category value> comment... * /
622  * U_<[A-Z_]+> = <integer>,
623  */
624 
689 } UCharCategory;
690 
705 #define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
706 
708 #define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
709 
710 #define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
711 
712 #define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
713 
714 #define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
715 
716 #define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
717 
719 #define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
720 
721 #define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
722 
723 #define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
724 
726 #define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
727 
728 #define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
729 
730 #define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
731 
733 #define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
734 
735 #define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
736 
737 #define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
738 
740 #define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
741 
742 #define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
743 
744 #define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
745 
746 #define U_GC_CS_MASK U_MASK(U_SURROGATE)
747 
749 #define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
750 
751 #define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
752 
753 #define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
754 
755 #define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
756 
757 #define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
758 
760 #define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
761 
762 #define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
763 
764 #define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
765 
766 #define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
767 
769 #define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
770 
771 #define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
772 
773 
775 #define U_GC_L_MASK \
776  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
777 
779 #define U_GC_LC_MASK \
780  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
781 
783 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
784 
786 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
787 
789 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
790 
792 #define U_GC_C_MASK \
793  (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
794 
796 #define U_GC_P_MASK \
797  (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
798  U_GC_PI_MASK|U_GC_PF_MASK)
799 
801 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
802 
807 typedef enum UCharDirection {
808  /*
809  * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
810  * It matches pairs of lines like
811  * / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
812  * U_<[A-Z_]+> = <integer>,
813  */
814 
864 
872  /*
873  * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
874  * It matches lines like
875  * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
876  */
877 
885  U_BPT_COUNT /* 3 */
887 
893  /*
894  * Note: UBlockCode constants are parsed by preparseucd.py.
895  * It matches lines like
896  * UBLOCK_<Unicode Block value name> = <integer>,
897  */
898 
900  UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
901 
903  UBLOCK_BASIC_LATIN = 1, /*[0000]*/
904 
907 
909  UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
910 
912  UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
913 
915  UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
916 
919 
922 
927  UBLOCK_GREEK =8, /*[0370]*/
928 
930  UBLOCK_CYRILLIC =9, /*[0400]*/
931 
933  UBLOCK_ARMENIAN =10, /*[0530]*/
934 
936  UBLOCK_HEBREW =11, /*[0590]*/
937 
939  UBLOCK_ARABIC =12, /*[0600]*/
940 
942  UBLOCK_SYRIAC =13, /*[0700]*/
943 
945  UBLOCK_THAANA =14, /*[0780]*/
946 
948  UBLOCK_DEVANAGARI =15, /*[0900]*/
949 
951  UBLOCK_BENGALI =16, /*[0980]*/
952 
954  UBLOCK_GURMUKHI =17, /*[0A00]*/
955 
957  UBLOCK_GUJARATI =18, /*[0A80]*/
958 
960  UBLOCK_ORIYA =19, /*[0B00]*/
961 
963  UBLOCK_TAMIL =20, /*[0B80]*/
964 
966  UBLOCK_TELUGU =21, /*[0C00]*/
967 
969  UBLOCK_KANNADA =22, /*[0C80]*/
970 
972  UBLOCK_MALAYALAM =23, /*[0D00]*/
973 
975  UBLOCK_SINHALA =24, /*[0D80]*/
976 
978  UBLOCK_THAI =25, /*[0E00]*/
979 
981  UBLOCK_LAO =26, /*[0E80]*/
982 
984  UBLOCK_TIBETAN =27, /*[0F00]*/
985 
987  UBLOCK_MYANMAR =28, /*[1000]*/
988 
990  UBLOCK_GEORGIAN =29, /*[10A0]*/
991 
993  UBLOCK_HANGUL_JAMO =30, /*[1100]*/
994 
996  UBLOCK_ETHIOPIC =31, /*[1200]*/
997 
999  UBLOCK_CHEROKEE =32, /*[13A0]*/
1000 
1003 
1005  UBLOCK_OGHAM =34, /*[1680]*/
1006 
1008  UBLOCK_RUNIC =35, /*[16A0]*/
1009 
1011  UBLOCK_KHMER =36, /*[1780]*/
1012 
1014  UBLOCK_MONGOLIAN =37, /*[1800]*/
1015 
1018 
1020  UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
1021 
1024 
1027 
1029  UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
1030 
1036 
1039 
1041  UBLOCK_NUMBER_FORMS =45, /*[2150]*/
1042 
1044  UBLOCK_ARROWS =46, /*[2190]*/
1045 
1048 
1051 
1053  UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
1054 
1057 
1060 
1062  UBLOCK_BOX_DRAWING =52, /*[2500]*/
1063 
1065  UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
1066 
1068  UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
1069 
1072 
1074  UBLOCK_DINGBATS =56, /*[2700]*/
1075 
1077  UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
1078 
1081 
1083  UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
1084 
1087 
1090 
1092  UBLOCK_HIRAGANA =62, /*[3040]*/
1093 
1095  UBLOCK_KATAKANA =63, /*[30A0]*/
1096 
1098  UBLOCK_BOPOMOFO =64, /*[3100]*/
1099 
1102 
1104  UBLOCK_KANBUN =66, /*[3190]*/
1105 
1108 
1111 
1114 
1117 
1120 
1122  UBLOCK_YI_SYLLABLES =72, /*[A000]*/
1123 
1125  UBLOCK_YI_RADICALS =73, /*[A490]*/
1126 
1128  UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
1129 
1131  UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
1132 
1135 
1137  UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
1138 
1148  UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
1159 
1162 
1165 
1168 
1171 
1174 
1177 
1180 
1182  UBLOCK_SPECIALS =86, /*[FFF0]*/
1183 
1186 
1187  /* New blocks in Unicode 3.1 */
1188 
1190  UBLOCK_OLD_ITALIC = 88, /*[10300]*/
1192  UBLOCK_GOTHIC = 89, /*[10330]*/
1194  UBLOCK_DESERET = 90, /*[10400]*/
1198  UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
1206  UBLOCK_TAGS = 96, /*[E0000]*/
1207 
1208  /* New blocks in Unicode 3.2 */
1209 
1218  UBLOCK_TAGALOG = 98, /*[1700]*/
1220  UBLOCK_HANUNOO = 99, /*[1720]*/
1222  UBLOCK_BUHID = 100, /*[1740]*/
1224  UBLOCK_TAGBANWA = 101, /*[1760]*/
1238  UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
1243 
1244  /* New blocks in Unicode 4 */
1245 
1247  UBLOCK_LIMBU = 111, /*[1900]*/
1249  UBLOCK_TAI_LE = 112, /*[1950]*/
1251  UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
1253  UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
1259  UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
1261  UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
1263  UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
1265  UBLOCK_UGARITIC = 120, /*[10380]*/
1267  UBLOCK_SHAVIAN = 121, /*[10450]*/
1269  UBLOCK_OSMANYA = 122, /*[10480]*/
1271  UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
1273  UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
1276 
1277  /* New blocks in Unicode 4.1 */
1278 
1282  UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
1284  UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
1286  UBLOCK_BUGINESE = 129, /*[1A00]*/
1288  UBLOCK_CJK_STROKES = 130, /*[31C0]*/
1292  UBLOCK_COPTIC = 132, /*[2C80]*/
1294  UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
1296  UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
1298  UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
1300  UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
1302  UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
1306  UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
1308  UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
1314  UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
1316  UBLOCK_TIFINAGH = 144, /*[2D30]*/
1318  UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
1319 
1320  /* New blocks in Unicode 5.0 */
1321 
1323  UBLOCK_NKO = 146, /*[07C0]*/
1325  UBLOCK_BALINESE = 147, /*[1B00]*/
1327  UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
1329  UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
1331  UBLOCK_PHAGS_PA = 150, /*[A840]*/
1333  UBLOCK_PHOENICIAN = 151, /*[10900]*/
1335  UBLOCK_CUNEIFORM = 152, /*[12000]*/
1339  UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
1340 
1341  /* New blocks in Unicode 5.1 */
1342 
1344  UBLOCK_SUNDANESE = 155, /*[1B80]*/
1346  UBLOCK_LEPCHA = 156, /*[1C00]*/
1348  UBLOCK_OL_CHIKI = 157, /*[1C50]*/
1350  UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
1352  UBLOCK_VAI = 159, /*[A500]*/
1354  UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
1356  UBLOCK_SAURASHTRA = 161, /*[A880]*/
1358  UBLOCK_KAYAH_LI = 162, /*[A900]*/
1360  UBLOCK_REJANG = 163, /*[A930]*/
1362  UBLOCK_CHAM = 164, /*[AA00]*/
1364  UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
1366  UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
1368  UBLOCK_LYCIAN = 167, /*[10280]*/
1370  UBLOCK_CARIAN = 168, /*[102A0]*/
1372  UBLOCK_LYDIAN = 169, /*[10920]*/
1374  UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
1376  UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
1377 
1378  /* New blocks in Unicode 5.2 */
1379 
1381  UBLOCK_SAMARITAN = 172, /*[0800]*/
1385  UBLOCK_TAI_THAM = 174, /*[1A20]*/
1387  UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
1389  UBLOCK_LISU = 176, /*[A4D0]*/
1391  UBLOCK_BAMUM = 177, /*[A6A0]*/
1395  UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
1399  UBLOCK_JAVANESE = 181, /*[A980]*/
1401  UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
1403  UBLOCK_TAI_VIET = 183, /*[AA80]*/
1405  UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
1409  UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
1411  UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
1413  UBLOCK_AVESTAN = 188, /*[10B00]*/
1417  UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
1419  UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
1421  UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
1423  UBLOCK_KAITHI = 193, /*[11080]*/
1425  UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
1432 
1433  /* New blocks in Unicode 6.0 */
1434 
1436  UBLOCK_MANDAIC = 198, /*[0840]*/
1438  UBLOCK_BATAK = 199, /*[1BC0]*/
1440  UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
1442  UBLOCK_BRAHMI = 201, /*[11000]*/
1444  UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
1446  UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
1448  UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
1452  UBLOCK_EMOTICONS = 206, /*[1F600]*/
1456  UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
1459 
1460  /* New blocks in Unicode 6.1 */
1461 
1463  UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
1467  UBLOCK_CHAKMA = 212, /*[11100]*/
1471  UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
1473  UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
1475  UBLOCK_MIAO = 216, /*[16F00]*/
1477  UBLOCK_SHARADA = 217, /*[11180]*/
1479  UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
1483  UBLOCK_TAKRI = 220, /*[11680]*/
1484 
1485  /* New blocks in Unicode 7.0 */
1486 
1488  UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
1490  UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
1492  UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
1496  UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
1498  UBLOCK_ELBASAN = 226, /*[10500]*/
1502  UBLOCK_GRANTHA = 228, /*[11300]*/
1504  UBLOCK_KHOJKI = 229, /*[11200]*/
1506  UBLOCK_KHUDAWADI = 230, /*[112B0]*/
1508  UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
1510  UBLOCK_LINEAR_A = 232, /*[10600]*/
1512  UBLOCK_MAHAJANI = 233, /*[11150]*/
1514  UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
1516  UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
1518  UBLOCK_MODI = 236, /*[11600]*/
1520  UBLOCK_MRO = 237, /*[16A40]*/
1522  UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
1524  UBLOCK_NABATAEAN = 239, /*[10880]*/
1526  UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
1528  UBLOCK_OLD_PERMIC = 241, /*[10350]*/
1530  UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
1532  UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
1534  UBLOCK_PALMYRENE = 244, /*[10860]*/
1536  UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
1538  UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
1542  UBLOCK_SIDDHAM = 248, /*[11580]*/
1546  UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
1548  UBLOCK_TIRHUTA = 251, /*[11480]*/
1550  UBLOCK_WARANG_CITI = 252, /*[118A0]*/
1551 
1552  /* New blocks in Unicode 8.0 */
1553 
1555  UBLOCK_AHOM = 253, /*[11700]*/
1557  UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/
1559  UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/
1565  UBLOCK_HATRAN = 258, /*[108E0]*/
1567  UBLOCK_MULTANI = 259, /*[11280]*/
1569  UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/
1573  UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/
1574 
1577 
1580 };
1581 
1583 typedef enum UBlockCode UBlockCode;
1584 
1592 typedef enum UEastAsianWidth {
1593  /*
1594  * Note: UEastAsianWidth constants are parsed by preparseucd.py.
1595  * It matches lines like
1596  * U_EA_<Unicode East_Asian_Width value name>
1597  */
1598 
1599  U_EA_NEUTRAL, /*[N]*/
1600  U_EA_AMBIGUOUS, /*[A]*/
1601  U_EA_HALFWIDTH, /*[H]*/
1602  U_EA_FULLWIDTH, /*[F]*/
1603  U_EA_NARROW, /*[Na]*/
1604  U_EA_WIDE, /*[W]*/
1605  U_EA_COUNT
1606 } UEastAsianWidth;
1607 
1619 typedef enum UCharNameChoice {
1622 #ifndef U_HIDE_DEPRECATED_API
1623 
1629 #endif /* U_HIDE_DEPRECATED_API */
1630 
1636 } UCharNameChoice;
1637 
1651 typedef enum UPropertyNameChoice {
1652  U_SHORT_PROPERTY_NAME,
1653  U_LONG_PROPERTY_NAME,
1654  U_PROPERTY_NAME_CHOICE_COUNT
1656 
1663 typedef enum UDecompositionType {
1664  /*
1665  * Note: UDecompositionType constants are parsed by preparseucd.py.
1666  * It matches lines like
1667  * U_DT_<Unicode Decomposition_Type value name>
1668  */
1669 
1670  U_DT_NONE, /*[none]*/
1671  U_DT_CANONICAL, /*[can]*/
1672  U_DT_COMPAT, /*[com]*/
1673  U_DT_CIRCLE, /*[enc]*/
1674  U_DT_FINAL, /*[fin]*/
1675  U_DT_FONT, /*[font]*/
1676  U_DT_FRACTION, /*[fra]*/
1677  U_DT_INITIAL, /*[init]*/
1678  U_DT_ISOLATED, /*[iso]*/
1679  U_DT_MEDIAL, /*[med]*/
1680  U_DT_NARROW, /*[nar]*/
1681  U_DT_NOBREAK, /*[nb]*/
1682  U_DT_SMALL, /*[sml]*/
1683  U_DT_SQUARE, /*[sqr]*/
1684  U_DT_SUB, /*[sub]*/
1685  U_DT_SUPER, /*[sup]*/
1686  U_DT_VERTICAL, /*[vert]*/
1687  U_DT_WIDE, /*[wide]*/
1688  U_DT_COUNT /* 18 */
1690 
1697 typedef enum UJoiningType {
1698  /*
1699  * Note: UJoiningType constants are parsed by preparseucd.py.
1700  * It matches lines like
1701  * U_JT_<Unicode Joining_Type value name>
1702  */
1703 
1704  U_JT_NON_JOINING, /*[U]*/
1705  U_JT_JOIN_CAUSING, /*[C]*/
1706  U_JT_DUAL_JOINING, /*[D]*/
1707  U_JT_LEFT_JOINING, /*[L]*/
1708  U_JT_RIGHT_JOINING, /*[R]*/
1709  U_JT_TRANSPARENT, /*[T]*/
1710  U_JT_COUNT /* 6 */
1711 } UJoiningType;
1712 
1719 typedef enum UJoiningGroup {
1720  /*
1721  * Note: UJoiningGroup constants are parsed by preparseucd.py.
1722  * It matches lines like
1723  * U_JG_<Unicode Joining_Group value name>
1724  */
1725 
1726  U_JG_NO_JOINING_GROUP,
1727  U_JG_AIN,
1728  U_JG_ALAPH,
1729  U_JG_ALEF,
1730  U_JG_BEH,
1731  U_JG_BETH,
1732  U_JG_DAL,
1733  U_JG_DALATH_RISH,
1734  U_JG_E,
1735  U_JG_FEH,
1736  U_JG_FINAL_SEMKATH,
1737  U_JG_GAF,
1738  U_JG_GAMAL,
1739  U_JG_HAH,
1741  U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
1742  U_JG_HE,
1743  U_JG_HEH,
1744  U_JG_HEH_GOAL,
1745  U_JG_HETH,
1746  U_JG_KAF,
1747  U_JG_KAPH,
1748  U_JG_KNOTTED_HEH,
1749  U_JG_LAM,
1750  U_JG_LAMADH,
1751  U_JG_MEEM,
1752  U_JG_MIM,
1753  U_JG_NOON,
1754  U_JG_NUN,
1755  U_JG_PE,
1756  U_JG_QAF,
1757  U_JG_QAPH,
1758  U_JG_REH,
1759  U_JG_REVERSED_PE,
1760  U_JG_SAD,
1761  U_JG_SADHE,
1762  U_JG_SEEN,
1763  U_JG_SEMKATH,
1764  U_JG_SHIN,
1765  U_JG_SWASH_KAF,
1766  U_JG_SYRIAC_WAW,
1767  U_JG_TAH,
1768  U_JG_TAW,
1769  U_JG_TEH_MARBUTA,
1770  U_JG_TETH,
1771  U_JG_WAW,
1772  U_JG_YEH,
1773  U_JG_YEH_BARREE,
1774  U_JG_YEH_WITH_TAIL,
1775  U_JG_YUDH,
1776  U_JG_YUDH_HE,
1777  U_JG_ZAIN,
1813  U_JG_COUNT
1814 } UJoiningGroup;
1815 
1823  /*
1824  * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
1825  * It matches lines like
1826  * U_GCB_<Unicode Grapheme_Cluster_Break value name>
1827  */
1828 
1829  U_GCB_OTHER = 0, /*[XX]*/
1830  U_GCB_CONTROL = 1, /*[CN]*/
1831  U_GCB_CR = 2, /*[CR]*/
1832  U_GCB_EXTEND = 3, /*[EX]*/
1833  U_GCB_L = 4, /*[L]*/
1834  U_GCB_LF = 5, /*[LF]*/
1835  U_GCB_LV = 6, /*[LV]*/
1836  U_GCB_LVT = 7, /*[LVT]*/
1837  U_GCB_T = 8, /*[T]*/
1838  U_GCB_V = 9, /*[V]*/
1839  U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
1840  U_GCB_PREPEND = 11, /*[PP]*/
1841  U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
1842  U_GCB_COUNT = 13
1844 
1852 typedef enum UWordBreakValues {
1853  /*
1854  * Note: UWordBreakValues constants are parsed by preparseucd.py.
1855  * It matches lines like
1856  * U_WB_<Unicode Word_Break value name>
1857  */
1858 
1859  U_WB_OTHER = 0, /*[XX]*/
1860  U_WB_ALETTER = 1, /*[LE]*/
1861  U_WB_FORMAT = 2, /*[FO]*/
1862  U_WB_KATAKANA = 3, /*[KA]*/
1863  U_WB_MIDLETTER = 4, /*[ML]*/
1864  U_WB_MIDNUM = 5, /*[MN]*/
1865  U_WB_NUMERIC = 6, /*[NU]*/
1866  U_WB_EXTENDNUMLET = 7, /*[EX]*/
1867  U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
1868  U_WB_EXTEND = 9, /*[Extend]*/
1869  U_WB_LF = 10, /*[LF]*/
1870  U_WB_MIDNUMLET =11, /*[MB]*/
1871  U_WB_NEWLINE =12, /*[NL]*/
1872  U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
1873  U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
1874  U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
1875  U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
1876  U_WB_COUNT = 17
1878 
1885 typedef enum USentenceBreak {
1886  /*
1887  * Note: USentenceBreak constants are parsed by preparseucd.py.
1888  * It matches lines like
1889  * U_SB_<Unicode Sentence_Break value name>
1890  */
1891 
1892  U_SB_OTHER = 0, /*[XX]*/
1893  U_SB_ATERM = 1, /*[AT]*/
1894  U_SB_CLOSE = 2, /*[CL]*/
1895  U_SB_FORMAT = 3, /*[FO]*/
1896  U_SB_LOWER = 4, /*[LO]*/
1897  U_SB_NUMERIC = 5, /*[NU]*/
1898  U_SB_OLETTER = 6, /*[LE]*/
1899  U_SB_SEP = 7, /*[SE]*/
1900  U_SB_SP = 8, /*[SP]*/
1901  U_SB_STERM = 9, /*[ST]*/
1902  U_SB_UPPER = 10, /*[UP]*/
1903  U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
1904  U_SB_EXTEND = 12, /*[EX]*/
1905  U_SB_LF = 13, /*[LF]*/
1906  U_SB_SCONTINUE = 14, /*[SC]*/
1907  U_SB_COUNT = 15
1908 } USentenceBreak;
1909 
1916 typedef enum ULineBreak {
1917  /*
1918  * Note: ULineBreak constants are parsed by preparseucd.py.
1919  * It matches lines like
1920  * U_LB_<Unicode Line_Break value name>
1921  */
1922 
1923  U_LB_UNKNOWN = 0, /*[XX]*/
1924  U_LB_AMBIGUOUS = 1, /*[AI]*/
1925  U_LB_ALPHABETIC = 2, /*[AL]*/
1926  U_LB_BREAK_BOTH = 3, /*[B2]*/
1927  U_LB_BREAK_AFTER = 4, /*[BA]*/
1928  U_LB_BREAK_BEFORE = 5, /*[BB]*/
1929  U_LB_MANDATORY_BREAK = 6, /*[BK]*/
1930  U_LB_CONTINGENT_BREAK = 7, /*[CB]*/
1931  U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
1932  U_LB_COMBINING_MARK = 9, /*[CM]*/
1933  U_LB_CARRIAGE_RETURN = 10, /*[CR]*/
1934  U_LB_EXCLAMATION = 11, /*[EX]*/
1935  U_LB_GLUE = 12, /*[GL]*/
1936  U_LB_HYPHEN = 13, /*[HY]*/
1937  U_LB_IDEOGRAPHIC = 14, /*[ID]*/
1939  U_LB_INSEPARABLE = 15, /*[IN]*/
1940  U_LB_INSEPERABLE = U_LB_INSEPARABLE,
1941  U_LB_INFIX_NUMERIC = 16, /*[IS]*/
1942  U_LB_LINE_FEED = 17, /*[LF]*/
1943  U_LB_NONSTARTER = 18, /*[NS]*/
1944  U_LB_NUMERIC = 19, /*[NU]*/
1945  U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/
1946  U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/
1947  U_LB_PREFIX_NUMERIC = 22, /*[PR]*/
1948  U_LB_QUOTATION = 23, /*[QU]*/
1949  U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/
1950  U_LB_SURROGATE = 25, /*[SG]*/
1951  U_LB_SPACE = 26, /*[SP]*/
1952  U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
1953  U_LB_ZWSPACE = 28, /*[ZW]*/
1954  U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
1955  U_LB_WORD_JOINER = 30, /*[WJ]*/
1956  U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
1957  U_LB_H3 = 32, /*[H3]*/
1958  U_LB_JL = 33, /*[JL]*/
1959  U_LB_JT = 34, /*[JT]*/
1960  U_LB_JV = 35, /*[JV]*/
1961  U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
1962  U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
1963  U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
1964  U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
1965  U_LB_COUNT = 40
1966 } ULineBreak;
1967 
1974 typedef enum UNumericType {
1975  /*
1976  * Note: UNumericType constants are parsed by preparseucd.py.
1977  * It matches lines like
1978  * U_NT_<Unicode Numeric_Type value name>
1979  */
1980 
1981  U_NT_NONE, /*[None]*/
1982  U_NT_DECIMAL, /*[de]*/
1983  U_NT_DIGIT, /*[di]*/
1984  U_NT_NUMERIC, /*[nu]*/
1985  U_NT_COUNT
1986 } UNumericType;
1987 
1994 typedef enum UHangulSyllableType {
1995  /*
1996  * Note: UHangulSyllableType constants are parsed by preparseucd.py.
1997  * It matches lines like
1998  * U_HST_<Unicode Hangul_Syllable_Type value name>
1999  */
2000 
2001  U_HST_NOT_APPLICABLE, /*[NA]*/
2002  U_HST_LEADING_JAMO, /*[L]*/
2003  U_HST_VOWEL_JAMO, /*[V]*/
2004  U_HST_TRAILING_JAMO, /*[T]*/
2005  U_HST_LV_SYLLABLE, /*[LV]*/
2006  U_HST_LVT_SYLLABLE, /*[LVT]*/
2007  U_HST_COUNT
2009 
2036 U_STABLE UBool U_EXPORT2
2038 
2051 U_STABLE UBool U_EXPORT2
2053 
2066 U_STABLE UBool U_EXPORT2
2068 
2081 U_STABLE UBool U_EXPORT2
2083 
2102 U_STABLE UBool U_EXPORT2
2104 
2142 U_STABLE int32_t U_EXPORT2
2144 
2163 U_STABLE int32_t U_EXPORT2
2165 
2192 U_STABLE int32_t U_EXPORT2
2194 
2217 U_STABLE double U_EXPORT2
2219 
2227 #define U_NO_NUMERIC_VALUE ((double)-123456789.)
2228 
2252 U_STABLE UBool U_EXPORT2
2253 u_islower(UChar32 c);
2254 
2279 U_STABLE UBool U_EXPORT2
2280 u_isupper(UChar32 c);
2281 
2296 U_STABLE UBool U_EXPORT2
2297 u_istitle(UChar32 c);
2298 
2317 U_STABLE UBool U_EXPORT2
2318 u_isdigit(UChar32 c);
2319 
2338 U_STABLE UBool U_EXPORT2
2339 u_isalpha(UChar32 c);
2340 
2359 U_STABLE UBool U_EXPORT2
2360 u_isalnum(UChar32 c);
2361 
2382 U_STABLE UBool U_EXPORT2
2383 u_isxdigit(UChar32 c);
2384 
2398 U_STABLE UBool U_EXPORT2
2399 u_ispunct(UChar32 c);
2400 
2417 U_STABLE UBool U_EXPORT2
2418 u_isgraph(UChar32 c);
2419 
2446 U_STABLE UBool U_EXPORT2
2447 u_isblank(UChar32 c);
2448 
2471 U_STABLE UBool U_EXPORT2
2472 u_isdefined(UChar32 c);
2473 
2492 U_STABLE UBool U_EXPORT2
2493 u_isspace(UChar32 c);
2494 
2513 U_STABLE UBool U_EXPORT2
2515 
2553 U_STABLE UBool U_EXPORT2
2555 
2577 U_STABLE UBool U_EXPORT2
2578 u_iscntrl(UChar32 c);
2579 
2592 U_STABLE UBool U_EXPORT2
2594 
2610 U_STABLE UBool U_EXPORT2
2611 u_isprint(UChar32 c);
2612 
2631 U_STABLE UBool U_EXPORT2
2632 u_isbase(UChar32 c);
2633 
2650 U_STABLE UCharDirection U_EXPORT2
2652 
2668 U_STABLE UBool U_EXPORT2
2670 
2690 U_STABLE UChar32 U_EXPORT2
2692 
2709 U_STABLE UChar32 U_EXPORT2
2711 
2723 U_STABLE int8_t U_EXPORT2
2724 u_charType(UChar32 c);
2725 
2739 #define U_GET_GC_MASK(c) U_MASK(u_charType(c))
2740 
2758 typedef UBool U_CALLCONV
2759 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
2760 
2780 U_STABLE void U_EXPORT2
2781 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
2782 
2783 #if !UCONFIG_NO_NORMALIZATION
2784 
2792 U_STABLE uint8_t U_EXPORT2
2794 
2795 #endif
2796 
2820 U_STABLE int32_t U_EXPORT2
2822 
2832 U_STABLE UBlockCode U_EXPORT2
2834 
2867 U_STABLE int32_t U_EXPORT2
2868 u_charName(UChar32 code, UCharNameChoice nameChoice,
2869  char *buffer, int32_t bufferLength,
2870  UErrorCode *pErrorCode);
2871 
2872 #ifndef U_HIDE_DEPRECATED_API
2873 
2891 U_DEPRECATED int32_t U_EXPORT2
2893  char *dest, int32_t destCapacity,
2894  UErrorCode *pErrorCode);
2895 #endif /* U_HIDE_DEPRECATED_API */
2896 
2917 U_STABLE UChar32 U_EXPORT2
2918 u_charFromName(UCharNameChoice nameChoice,
2919  const char *name,
2920  UErrorCode *pErrorCode);
2921 
2939 typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
2940  UChar32 code,
2941  UCharNameChoice nameChoice,
2942  const char *name,
2943  int32_t length);
2944 
2966 U_STABLE void U_EXPORT2
2967 u_enumCharNames(UChar32 start, UChar32 limit,
2968  UEnumCharNamesFn *fn,
2969  void *context,
2970  UCharNameChoice nameChoice,
2971  UErrorCode *pErrorCode);
2972 
3004 U_STABLE const char* U_EXPORT2
3005 u_getPropertyName(UProperty property,
3006  UPropertyNameChoice nameChoice);
3007 
3027 U_STABLE UProperty U_EXPORT2
3028 u_getPropertyEnum(const char* alias);
3029 
3077 U_STABLE const char* U_EXPORT2
3079  int32_t value,
3080  UPropertyNameChoice nameChoice);
3081 
3113 U_STABLE int32_t U_EXPORT2
3115  const char* alias);
3116 
3134 U_STABLE UBool U_EXPORT2
3135 u_isIDStart(UChar32 c);
3136 
3158 U_STABLE UBool U_EXPORT2
3159 u_isIDPart(UChar32 c);
3160 
3181 U_STABLE UBool U_EXPORT2
3183 
3200 U_STABLE UBool U_EXPORT2
3202 
3221 U_STABLE UBool U_EXPORT2
3223 
3246 U_STABLE UChar32 U_EXPORT2
3247 u_tolower(UChar32 c);
3248 
3271 U_STABLE UChar32 U_EXPORT2
3272 u_toupper(UChar32 c);
3273 
3296 U_STABLE UChar32 U_EXPORT2
3297 u_totitle(UChar32 c);
3298 
3300 #define U_FOLD_CASE_DEFAULT 0
3301 
3318 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
3319 
3342 U_STABLE UChar32 U_EXPORT2
3343 u_foldCase(UChar32 c, uint32_t options);
3344 
3383 U_STABLE int32_t U_EXPORT2
3384 u_digit(UChar32 ch, int8_t radix);
3385 
3414 U_STABLE UChar32 U_EXPORT2
3415 u_forDigit(int32_t digit, int8_t radix);
3416 
3431 U_STABLE void U_EXPORT2
3432 u_charAge(UChar32 c, UVersionInfo versionArray);
3433 
3445 U_STABLE void U_EXPORT2
3446 u_getUnicodeVersion(UVersionInfo versionArray);
3447 
3448 #if !UCONFIG_NO_NORMALIZATION
3449 
3470 U_STABLE int32_t U_EXPORT2
3471 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
3472 
3473 #endif
3474 
3475 
3477 
3478 #endif /*_UCHAR*/
3479 /*eof*/
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:57
Binary property Ideographic.
Definition: uchar.h:237
Unicode 4.0.1 renames the &quot;Cyrillic Supplementary&quot; block to &quot;Cyrillic Supplement&quot;.
Definition: uchar.h:1216
Binary property Changes_When_Lowercased.
Definition: uchar.h:389
Binary property IDS_Binary_Operator (new in Unicode 3.2).
Definition: uchar.h:241
Binary property Case_Ignorable.
Definition: uchar.h:387
UBool u_isUWhiteSpace(UChar32 c)
Check if a code point has the White_Space Unicode property.
UBool u_istitle(UChar32 c)
Determines whether the specified code point is a titlecase letter.
Enumerated property NFC_Quick_Check.
Definition: uchar.h:480
UChar32 u_totitle(UChar32 c)
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
Miscellaneous property Script_Extensions (new in Unicode 6.0).
Definition: uchar.h:601
const char * u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases...
Same as UBLOCK_PRIVATE_USE_AREA.
Definition: uchar.h:1158
UChar32 u_foldCase(UChar32 c, uint32_t options)
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
First constant for enumerated/integer Unicode properties.
Definition: uchar.h:437
UChar32 u_getBidiPairedBracket(UChar32 c)
Maps the specified character to its paired bracket character.
Binary property XID_Start.
Definition: uchar.h:294
Binary property Join_Control.
Definition: uchar.h:248
Binary property Logical_Order_Exception (new in Unicode 3.2).
Definition: uchar.h:252
Binary property White_Space.
Definition: uchar.h:287
String property Titlecase_Mapping.
Definition: uchar.h:579
One more than the last constant for enumerated/integer Unicode properties.
Definition: uchar.h:519
Enumerated property Numeric_Type.
Definition: uchar.h:465
Binary property xdigit (a C/POSIX character class).
Definition: uchar.h:383
UBlockCode ublock_getCode(UChar32 c)
Returns the Unicode allocation block that contains the character.
Binary property Alphabetic.
Definition: uchar.h:175
First constant for double Unicode properties.
Definition: uchar.h:539
UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with th...
Definition: uchar.h:2939
UBool u_isgraph(UChar32 c)
Determines whether the specified code point is a &quot;graphic&quot; character (printable, excluding spaces)...
Binary property Emoji.
Definition: uchar.h:407
Cn &quot;Other, Not Assigned (no characters in [UnicodeData.txt] have this property)&quot; (same as U_UNASSIGNE...
Definition: uchar.h:628
String property Simple_Case_Folding.
Definition: uchar.h:567
Binary property NFC_Inert.
Definition: uchar.h:330
Binary property graph (a C/POSIX character class).
Definition: uchar.h:373
String property Bidi_Mirroring_Glyph.
Definition: uchar.h:550
One more than the last constant for bit-mask Unicode properties.
Definition: uchar.h:533
UBool u_isdefined(UChar32 c)
Determines whether the specified code point is &quot;defined&quot;, which usually means that it is assigned a c...
Enumerated property Block.
Definition: uchar.h:440
Represents a nonexistent or invalid property or property value.
Definition: uchar.h:608
Renamed from the misspelled &quot;inseperable&quot; in Unicode 4.0.1/ICU 3.0.
Definition: uchar.h:1939
Binary property Radical (new in Unicode 3.2).
Definition: uchar.h:267
UCharDirection
This specifies the language directional property of a character set.
Definition: uchar.h:807
Binary property IDS_Trinary_Operator (new in Unicode 3.2).
Definition: uchar.h:245
Binary property Grapheme_Link (new in Unicode 3.2).
Definition: uchar.h:219
Enumerated property Decomposition_Type.
Definition: uchar.h:446
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:849
String property Case_Folding.
Definition: uchar.h:553
String property Name.
Definition: uchar.h:564
String property Simple_Uppercase_Mapping.
Definition: uchar.h:576
UChar32 u_forDigit(int32_t digit, int8_t radix)
Determines the character representation for a specific digit in the specified radix.
Enumerated property Bidi_Class.
Definition: uchar.h:435
int32_t u_charDigitValue(UChar32 c)
Returns the decimal digit value of a decimal digit character.
Enumerated property General_Category.
Definition: uchar.h:453
Sm.
Definition: uchar.h:676
String property Unicode_1_Name.
Definition: uchar.h:585
int32_t u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Get the FC_NFKC_Closure property string for a character.
UNumericType
Numeric Type constants.
Definition: uchar.h:1974
Binary property Pattern_White_Space (new in Unicode 4.1).
Definition: uchar.h:358
Close paired bracket.
Definition: uchar.h:883
UBool u_iscntrl(UChar32 c)
Determines whether the specified code point is a control character (as defined by this function)...
UBool UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start&lt;=c...
Definition: uchar.h:2759
Binary property Changes_When_Casefolded.
Definition: uchar.h:395
Binary property NFD_Inert.
Definition: uchar.h:316
Binary property Diacritic.
Definition: uchar.h:200
Binary property Terminal_Punctuation.
Definition: uchar.h:276
UChar32 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
Find a Unicode character by its name and return its code point value.
UBool u_isUAlphabetic(UChar32 c)
Check if a code point has the Alphabetic Unicode property.
Enumerated property NFD_Quick_Check.
Definition: uchar.h:474
void u_charAge(UChar32 c, UVersionInfo versionArray)
Get the &quot;age&quot; of the code point.
int32_t u_getPropertyValueEnum(UProperty property, const char *alias)
Return the property value integer for a given value name, as specified in the Unicode database file P...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:82
Binary property STerm (new in Unicode 4.0.1).
Definition: uchar.h:303
Enumerated property Joining_Group.
Definition: uchar.h:456
Binary property ID_Continue.
Definition: uchar.h:230
Binary property Emoji_Presentation.
Definition: uchar.h:414
Open paired bracket.
Definition: uchar.h:881
Binary property blank (a C/POSIX character class).
Definition: uchar.h:368
Binary property Quotation_Mark.
Definition: uchar.h:263
#define U_DEPRECATED
This is used to declare a function as a deprecated public ICU C API.
Definition: umachine.h:113
Binary property Changes_When_NFKC_Casefolded.
Definition: uchar.h:399
First constant for binary Unicode properties.
Definition: uchar.h:177
Binary property Noncharacter_Code_Point.
Definition: uchar.h:261
Binary property Hyphen.
Definition: uchar.h:225
Enumerated property East_Asian_Width.
Definition: uchar.h:450
ULineBreak
Line Break constants.
Definition: uchar.h:1916
Binary property Full_Composition_Exclusion.
Definition: uchar.h:208
Bitmask property General_Category_Mask.
Definition: uchar.h:529
String property Simple_Titlecase_Mapping.
Definition: uchar.h:573
Unicode 3.2 renames this block to &quot;Combining Diacritical Marks for Symbols&quot;.
Definition: uchar.h:1035
int32_t u_digit(UChar32 ch, int8_t radix)
Returns the decimal digit value of the code point in the specified radix.
UDecompositionType
Decomposition Type constants.
Definition: uchar.h:1663
UBool u_isprint(UChar32 c)
Determines whether the specified code point is a printable character.
UBool u_isxdigit(UChar32 c)
Determines whether the specified code point is a hexadecimal digit.
UHangulSyllableType
Hangul Syllable Type constants.
Definition: uchar.h:1994
String property Simple_Lowercase_Mapping.
Definition: uchar.h:570
Binary property print (a C/POSIX character class).
Definition: uchar.h:378
Binary property Case_Sensitive.
Definition: uchar.h:298
Standard or synthetic character name.
Definition: uchar.h:1631
Binary property Bidi_Mirrored.
Definition: uchar.h:188
Binary property NFKC_Inert.
Definition: uchar.h:337
int32_t u_getIntPropertyValue(UChar32 c, UProperty which)
Get the property value for an enumerated or integer Unicode property for a code point.
Binary property Changes_When_Casemapped.
Definition: uchar.h:397
First constant for string Unicode properties.
Definition: uchar.h:547
Binary property Grapheme_Extend (new in Unicode 3.2).
Definition: uchar.h:216
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
UGraphemeClusterBreak
Grapheme Cluster Break constants.
Definition: uchar.h:1822
New No_Block value in Unicode 4.
Definition: uchar.h:900
Binary property Extender.
Definition: uchar.h:204
Double property Numeric_Value.
Definition: uchar.h:537
Binary property Math.
Definition: uchar.h:257
Unicode character name (Name property).
Definition: uchar.h:1621
Binary property Grapheme_Base (new in Unicode 3.2).
Definition: uchar.h:212
Binary property NFKD_Inert.
Definition: uchar.h:323
uint8_t u_getCombiningClass(UChar32 c)
Returns the combining class of the code point as specified in UnicodeData.txt.
UBool u_isbase(UChar32 c)
Determines whether the specified code point is a base character.
UCharCategory
Data for enumerated Unicode general category types.
Definition: uchar.h:616
const char * u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property value, as given in the Unicode database file PropertyVal...
Enumerated property Sentence_Break (new in Unicode 4.1).
Definition: uchar.h:507
double u_getNumericValue(UChar32 c)
Get the numeric value for a Unicode code point as defined in the Unicode Character Database...
Binary property Lowercase.
Definition: uchar.h:255
UBool u_isJavaIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in a Java identifier...
First constant for bit-mask Unicode properties.
Definition: uchar.h:531
Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
Definition: uchar.h:517
UBool u_isspace(UChar32 c)
Determines if the specified character is a space character or not.
USentenceBreak
Sentence Break constants.
Definition: uchar.h:1885
Binary property Unified_Ideograph (new in Unicode 3.2).
Definition: uchar.h:280
Enumerated property Canonical_Combining_Class.
Definition: uchar.h:443
UCharNameChoice
Selector constants for u_charName().
Definition: uchar.h:1619
One more than the last constant for binary Unicode properties.
Definition: uchar.h:431
UBool u_isJavaIDPart(UChar32 c)
Determines if the specified character is permissible in a Java identifier.
Enumerated property Script.
Definition: uchar.h:468
Unicode 3.2 renames this block to &quot;Greek and Coptic&quot;.
Definition: uchar.h:927
Binary property Hex_Digit.
Definition: uchar.h:222
String property Uppercase_Mapping.
Definition: uchar.h:589
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
Definition: uchar.h:1651
String property Lowercase_Mapping.
Definition: uchar.h:561
UCharDirection u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirection...
UBool u_islower(UChar32 c)
Determines whether the specified code point has the general category &quot;Ll&quot; (lowercase letter)...
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:83
Enumerated property NFKC_Quick_Check.
Definition: uchar.h:483
Not a paired bracket.
Definition: uchar.h:879
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:161
void u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
Enumerate efficiently all code points with their Unicode general categories.
Enumerated property Hangul_Syllable_Type, new in Unicode 4.
Definition: uchar.h:471
Binary property Dash.
Definition: uchar.h:190
Binary property alnum (a C/POSIX character class).
Definition: uchar.h:363
Cf.
Definition: uchar.h:660
Binary property Variation_Selector (new in Unicode 4.0.1).
Definition: uchar.h:309
UBool u_isUUppercase(UChar32 c)
Check if a code point has the Uppercase Unicode property.
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
Definition: uchar.h:892
Enumerated property Word_Break (new in Unicode 4.1).
Definition: uchar.h:512
Binary property Deprecated (new in Unicode 3.2).
Definition: uchar.h:197
Binary property Bidi_Control.
Definition: uchar.h:183
Binary property XID_Continue.
Definition: uchar.h:291
Same as UBLOCK_PRIVATE_USE.
Definition: uchar.h:1148
UBool u_hasBinaryProperty(UChar32 c, UProperty which)
Check a binary Unicode property for a code point.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
Binary property Uppercase.
Definition: uchar.h:283
void u_getUnicodeVersion(UVersionInfo versionArray)
Gets the Unicode version information.
Binary property Changes_When_Uppercased.
Definition: uchar.h:391
UJoiningGroup
Joining Group constants.
Definition: uchar.h:1719
Binary property Cased.
Definition: uchar.h:385
Cs.
Definition: uchar.h:664
UEastAsianWidth
East Asian Width constants.
Definition: uchar.h:1592
UBool u_isupper(UChar32 c)
Determines whether the specified code point has the general category &quot;Lu&quot; (uppercase letter)...
Enumerated property Trail_Canonical_Combining_Class.
Definition: uchar.h:497
UBool u_isULowercase(UChar32 c)
Check if a code point has the Lowercase Unicode property.
Non-category for unassigned and non-character code points.
Definition: uchar.h:626
UBool u_ispunct(UChar32 c)
Determines whether the specified code point is a punctuation character.
First constant for Unicode properties with unusual value types.
Definition: uchar.h:603
UWordBreakValues
Word Break constants.
Definition: uchar.h:1852
void u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive...
int32_t u_getIntPropertyMaxValue(UProperty which)
Get the maximum value for an enumerated/integer/binary Unicode property.
Enumerated property Joining_Type.
Definition: uchar.h:459
One more than the last constant for double Unicode properties.
Definition: uchar.h:541
Binary property Emoji_Modifier.
Definition: uchar.h:421
Basic definitions for ICU, for both C and C++ APIs.
UBool u_isIDPart(UChar32 c)
Determines if the specified character is permissible in an identifier according to Java...
String property Bidi_Paired_Bracket (new in Unicode 6.3).
Definition: uchar.h:592
Enumerated property Lead_Canonical_Combining_Class.
Definition: uchar.h:490
Binary property ASCII_Hex_Digit.
Definition: uchar.h:179
UJoiningType
Joining Type constants.
Definition: uchar.h:1697
UBool u_isIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in an identifier accordin...
Binary property Soft_Dotted (new in Unicode 3.2).
Definition: uchar.h:272
One more than the last constant for string Unicode properties.
Definition: uchar.h:594
Binary property Emoji_Modifier_Base.
Definition: uchar.h:428
Binary Property Segment_Starter.
Definition: uchar.h:348
UChar32 u_toupper(UChar32 c)
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
Binary property ID_Start.
Definition: uchar.h:234
int32_t u_getIntPropertyMinValue(UProperty which)
Get the minimum value for an enumerated/integer/binary Unicode property.
UBool u_isJavaSpaceChar(UChar32 c)
Determine if the specified code point is a space character according to Java.
UBool u_isMirrored(UChar32 c)
Determines whether the code point has the Bidi_Mirrored property.
Corrected name from NameAliases.txt.
Definition: uchar.h:1633
Binary property Changes_When_Titlecased.
Definition: uchar.h:393
UChar32 u_tolower(UChar32 c)
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
Enumerated property Line_Break.
Definition: uchar.h:462
UBool u_isIDIgnorable(UChar32 c)
Determines if the specified character should be regarded as an ignorable character in an identifier...
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
Definition: uchar.h:502
UBool u_isdigit(UChar32 c)
Determines whether the specified code point is a digit character according to Java.
UProperty u_getPropertyEnum(const char *alias)
Return the UProperty enum for a given property name, as specified in the Unicode database file Proper...
UBool u_isWhitespace(UChar32 c)
Determines if the specified code point is a whitespace character according to Java/ICU.
Deprecated string property ISO_Comment.
Definition: uchar.h:557
UBool u_isblank(UChar32 c)
Determines whether the specified code point is a &quot;blank&quot; or &quot;horizontal space&quot;, a character that visi...
UBool u_isalpha(UChar32 c)
Determines whether the specified code point is a letter character.
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
Definition: uchar.h:194
The Unicode_1_Name property value which is of little practical value.
Definition: uchar.h:1628
int32_t u_getISOComment(UChar32 c, char *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Returns an empty string.
One higher than the last enum UCharCategory constant.
Definition: uchar.h:688
UBool u_isalnum(UChar32 c)
Determines whether the specified code point is an alphanumeric character (letter or digit) according ...
Binary property Pattern_Syntax (new in Unicode 4.1).
Definition: uchar.h:353
One more than the last constant for Unicode properties with unusual value types.
Definition: uchar.h:606
UBidiPairedBracketType
Bidi Paired Bracket Type constants.
Definition: uchar.h:871
UChar32 u_charMirror(UChar32 c)
Maps the specified character to a &quot;mirror-image&quot; character.
UBool u_isISOControl(UChar32 c)
Determines whether the specified code point is an ISO control code.
int8_t u_charType(UChar32 c)
Returns the general category value for the code point.
int32_t u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
Retrieve the name of a Unicode character.
String property Age.
Definition: uchar.h:545
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
Enumerated property NFKD_Quick_Check.
Definition: uchar.h:477