ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ucnv.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2009, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6  * ucnv.h:
7  * External APIs for the ICU's codeset conversion library
8  * Bertrand A. Damiba
9  *
10  * Modification History:
11  *
12  * Date Name Description
13  * 04/04/99 helena Fixed internal header inclusion.
14  * 05/11/00 helena Added setFallback and usesFallback APIs.
15  * 06/29/2000 helena Major rewrite of the callback APIs.
16  * 12/07/2000 srl Update of documentation
17  */
18 
47 #ifndef UCNV_H
48 #define UCNV_H
49 
50 #include "unicode/ucnv_err.h"
51 #include "unicode/uenum.h"
52 
53 #ifndef __USET_H__
54 
64 struct USet;
66 typedef struct USet USet;
67 
68 #endif
69 
70 #if !UCONFIG_NO_CONVERSION
71 
73 
75 #define UCNV_MAX_CONVERTER_NAME_LENGTH 60
76 
77 #define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
78 
80 #define UCNV_SI 0x0F
81 
82 #define UCNV_SO 0x0E
83 
89 typedef enum {
90  UCNV_UNSUPPORTED_CONVERTER = -1,
91  UCNV_SBCS = 0,
92  UCNV_DBCS = 1,
93  UCNV_MBCS = 2,
94  UCNV_LATIN_1 = 3,
95  UCNV_UTF8 = 4,
96  UCNV_UTF16_BigEndian = 5,
97  UCNV_UTF16_LittleEndian = 6,
98  UCNV_UTF32_BigEndian = 7,
99  UCNV_UTF32_LittleEndian = 8,
100  UCNV_EBCDIC_STATEFUL = 9,
101  UCNV_ISO_2022 = 10,
102 
103  UCNV_LMBCS_1 = 11,
104  UCNV_LMBCS_2,
105  UCNV_LMBCS_3,
106  UCNV_LMBCS_4,
107  UCNV_LMBCS_5,
108  UCNV_LMBCS_6,
109  UCNV_LMBCS_8,
110  UCNV_LMBCS_11,
111  UCNV_LMBCS_16,
112  UCNV_LMBCS_17,
113  UCNV_LMBCS_18,
114  UCNV_LMBCS_19,
115  UCNV_LMBCS_LAST = UCNV_LMBCS_19,
116  UCNV_HZ,
117  UCNV_SCSU,
118  UCNV_ISCII,
119  UCNV_US_ASCII,
120  UCNV_UTF7,
121  UCNV_BOCU1,
122  UCNV_UTF16,
123  UCNV_UTF32,
124  UCNV_CESU8,
125  UCNV_IMAP_MAILBOX,
126 
127  /* Number of converter types for which we have conversion routines. */
128  UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
129 
131 
141 typedef enum {
142  UCNV_UNKNOWN = -1,
143  UCNV_IBM = 0
145 
162  const void* context,
164  const char *codeUnits,
165  int32_t length,
167  UErrorCode *pErrorCode);
168 
185  const void* context,
187  const UChar* codeUnits,
188  int32_t length,
189  UChar32 codePoint,
191  UErrorCode *pErrorCode);
192 
194 
200 #define UCNV_OPTION_SEP_CHAR ','
201 
207 #define UCNV_OPTION_SEP_STRING ","
208 
214 #define UCNV_VALUE_SEP_CHAR '='
215 
221 #define UCNV_VALUE_SEP_STRING "="
222 
231 #define UCNV_LOCALE_OPTION_STRING ",locale="
232 
244 #define UCNV_VERSION_OPTION_STRING ",version="
245 
256 #define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
257 
274 ucnv_compareNames(const char *name1, const char *name2);
275 
276 
326 ucnv_open(const char *converterName, UErrorCode *err);
327 
328 
356 ucnv_openU(const UChar *name,
357  UErrorCode *err);
358 
424 ucnv_openCCSID(int32_t codepage,
425  UConverterPlatform platform,
426  UErrorCode * err);
427 
459 ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
460 
499 ucnv_safeClone(const UConverter *cnv,
500  void *stackBuffer,
501  int32_t *pBufferSize,
502  UErrorCode *status);
503 
510 #define U_CNV_SAFECLONE_BUFFERSIZE 1024
511 
523 U_STABLE void U_EXPORT2
524 ucnv_close(UConverter * converter);
525 
543 U_STABLE void U_EXPORT2
544 ucnv_getSubstChars(const UConverter *converter,
545  char *subChars,
546  int8_t *len,
547  UErrorCode *err);
548 
568 U_STABLE void U_EXPORT2
569 ucnv_setSubstChars(UConverter *converter,
570  const char *subChars,
571  int8_t len,
572  UErrorCode *err);
573 
601 U_STABLE void U_EXPORT2
603  const UChar *s,
604  int32_t length,
605  UErrorCode *err);
606 
620 U_STABLE void U_EXPORT2
621 ucnv_getInvalidChars(const UConverter *converter,
622  char *errBytes,
623  int8_t *len,
624  UErrorCode *err);
625 
639 U_STABLE void U_EXPORT2
640 ucnv_getInvalidUChars(const UConverter *converter,
641  UChar *errUChars,
642  int8_t *len,
643  UErrorCode *err);
644 
652 U_STABLE void U_EXPORT2
653 ucnv_reset(UConverter *converter);
654 
663 U_STABLE void U_EXPORT2
664 ucnv_resetToUnicode(UConverter *converter);
665 
674 U_STABLE void U_EXPORT2
676 
726 ucnv_getMaxCharSize(const UConverter *converter);
727 
747 #define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
748  (((int32_t)(length)+10)*(int32_t)(maxCharSize))
749 
759 ucnv_getMinCharSize(const UConverter *converter);
760 
776 ucnv_getDisplayName(const UConverter *converter,
777  const char *displayLocale,
778  UChar *displayName,
779  int32_t displayNameCapacity,
780  UErrorCode *err);
781 
792 U_STABLE const char * U_EXPORT2
793 ucnv_getName(const UConverter *converter, UErrorCode *err);
794 
819 ucnv_getCCSID(const UConverter *converter,
820  UErrorCode *err);
821 
833 ucnv_getPlatform(const UConverter *converter,
834  UErrorCode *err);
835 
845 ucnv_getType(const UConverter * converter);
846 
862 U_STABLE void U_EXPORT2
863 ucnv_getStarters(const UConverter* converter,
864  UBool starters[256],
865  UErrorCode* err);
866 
867 
873 typedef enum UConverterUnicodeSet {
881 
882 
928 U_STABLE void U_EXPORT2
929 ucnv_getUnicodeSet(const UConverter *cnv,
930  USet *setFillIn,
931  UConverterUnicodeSet whichSet,
932  UErrorCode *pErrorCode);
933 
945 U_STABLE void U_EXPORT2
946 ucnv_getToUCallBack (const UConverter * converter,
947  UConverterToUCallback *action,
948  const void **context);
949 
961 U_STABLE void U_EXPORT2
962 ucnv_getFromUCallBack (const UConverter * converter,
963  UConverterFromUCallback *action,
964  const void **context);
965 
981 U_STABLE void U_EXPORT2
982 ucnv_setToUCallBack (UConverter * converter,
983  UConverterToUCallback newAction,
984  const void* newContext,
985  UConverterToUCallback *oldAction,
986  const void** oldContext,
987  UErrorCode * err);
988 
1004 U_STABLE void U_EXPORT2
1005 ucnv_setFromUCallBack (UConverter * converter,
1006  UConverterFromUCallback newAction,
1007  const void *newContext,
1008  UConverterFromUCallback *oldAction,
1009  const void **oldContext,
1010  UErrorCode * err);
1011 
1070 U_STABLE void U_EXPORT2
1071 ucnv_fromUnicode (UConverter * converter,
1072  char **target,
1073  const char *targetLimit,
1074  const UChar ** source,
1075  const UChar * sourceLimit,
1076  int32_t* offsets,
1077  UBool flush,
1078  UErrorCode * err);
1079 
1139 U_STABLE void U_EXPORT2
1140 ucnv_toUnicode(UConverter *converter,
1141  UChar **target,
1142  const UChar *targetLimit,
1143  const char **source,
1144  const char *sourceLimit,
1145  int32_t *offsets,
1146  UBool flush,
1147  UErrorCode *err);
1148 
1178  char *dest, int32_t destCapacity,
1179  const UChar *src, int32_t srcLength,
1180  UErrorCode *pErrorCode);
1181 
1210  UChar *dest, int32_t destCapacity,
1211  const char *src, int32_t srcLength,
1212  UErrorCode *pErrorCode);
1213 
1285 ucnv_getNextUChar(UConverter * converter,
1286  const char **source,
1287  const char * sourceLimit,
1288  UErrorCode * err);
1289 
1428 U_STABLE void U_EXPORT2
1429 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
1430  char **target, const char *targetLimit,
1431  const char **source, const char *sourceLimit,
1432  UChar *pivotStart, UChar **pivotSource,
1433  UChar **pivotTarget, const UChar *pivotLimit,
1434  UBool reset, UBool flush,
1435  UErrorCode *pErrorCode);
1436 
1493 ucnv_convert(const char *toConverterName,
1494  const char *fromConverterName,
1495  char *target,
1496  int32_t targetCapacity,
1497  const char *source,
1498  int32_t sourceLength,
1499  UErrorCode *pErrorCode);
1500 
1547 ucnv_toAlgorithmic(UConverterType algorithmicType,
1548  UConverter *cnv,
1549  char *target, int32_t targetCapacity,
1550  const char *source, int32_t sourceLength,
1551  UErrorCode *pErrorCode);
1552 
1600  UConverterType algorithmicType,
1601  char *target, int32_t targetCapacity,
1602  const char *source, int32_t sourceLength,
1603  UErrorCode *pErrorCode);
1604 
1613 ucnv_flushCache(void);
1614 
1623 ucnv_countAvailable(void);
1624 
1635 U_STABLE const char* U_EXPORT2
1637 
1651 ucnv_openAllNames(UErrorCode *pErrorCode);
1652 
1664 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
1665 
1678 U_STABLE const char * U_EXPORT2
1679 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
1680 
1694 U_STABLE void U_EXPORT2
1695 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
1696 
1721 ucnv_openStandardNames(const char *convName,
1722  const char *standard,
1723  UErrorCode *pErrorCode);
1724 
1731 ucnv_countStandards(void);
1732 
1740 U_STABLE const char * U_EXPORT2
1741 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
1742 
1762 U_STABLE const char * U_EXPORT2
1763 ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
1764 
1784 U_STABLE const char * U_EXPORT2
1785 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
1786 
1801 U_STABLE const char * U_EXPORT2
1802 ucnv_getDefaultName(void);
1803 
1820 U_STABLE void U_EXPORT2
1821 ucnv_setDefaultName(const char *name);
1822 
1840 U_STABLE void U_EXPORT2
1841 ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
1842 
1852 ucnv_isAmbiguous(const UConverter *cnv);
1853 
1869 U_STABLE void U_EXPORT2
1870 ucnv_setFallback(UConverter *cnv, UBool usesFallback);
1871 
1882 ucnv_usesFallback(const UConverter *cnv);
1883 
1940 U_STABLE const char* U_EXPORT2
1941 ucnv_detectUnicodeSignature(const char* source,
1942  int32_t sourceLength,
1943  int32_t *signatureLength,
1944  UErrorCode *pErrorCode);
1945 
1958 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
1959 
1972 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
1973 
1974 #endif
1975 
1976 #endif
1977 /*_UCNV*/
The structure for the toUnicode callback function parameter.
Definition: ucnv_err.h:208
void ucnv_fromUnicode(UConverter *converter, char **target, const char *targetLimit, const UChar **source, const UChar *sourceLimit, int32_t *offsets, UBool flush, UErrorCode *err)
Converts an array of unicode characters to an array of codepage characters.
UBool ucnv_usesFallback(const UConverter *cnv)
Determines if the converter uses fallback mappings or not.
void ucnv_setSubstString(UConverter *cnv, const UChar *s, int32_t length, UErrorCode *err)
Set a substitution string for converting from Unicode to a charset.
UConverterCallbackReason
The process condition code to be used with the callbacks.
Definition: ucnv_err.h:155
int32_t ucnv_fromUCountPending(const UConverter *cnv, UErrorCode *status)
Returns the number of UChars held in the converter's internal state because more input is needed for ...
int32_t ucnv_toUCountPending(const UConverter *cnv, UErrorCode *status)
Returns the number of chars held in the converter's internal state because more input is needed for c...
int ucnv_compareNames(const char *name1, const char *name2)
Do a fuzzy compare of two converter/alias names.
const char * ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode)
Gives the name of the standard at given index of standard list.
uint16_t ucnv_countStandards(void)
Gives the number of standards associated to converter names.
void(* UConverterFromUCallback)(const void *context, UConverterFromUnicodeArgs *args, const UChar *codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *pErrorCode)
Function pointer for error callback in the unicode to codepage direction.
Definition: ucnv.h:184
int8_t ucnv_getMaxCharSize(const UConverter *converter)
Returns the maximum number of bytes that are output per UChar in conversion from Unicode using this c...
void ucnv_resetFromUnicode(UConverter *converter)
Resets the from-Unicode part of a converter state to the default state.
UConverter * ucnv_open(const char *converterName, UErrorCode *err)
Creates a UConverter object with the name of a coded character set specified as a C string...
UConverterType ucnv_getType(const UConverter *converter)
Gets the type of the converter e.g.
void ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, char **target, const char *targetLimit, const char **source, const char *sourceLimit, UChar *pivotStart, UChar **pivotSource, UChar **pivotTarget, const UChar *pivotLimit, UBool reset, UBool flush, UErrorCode *pErrorCode)
Convert from one external charset to another using two existing UConverters.
void ucnv_getToUCallBack(const UConverter *converter, UConverterToUCallback *action, const void **context)
Gets the current calback function used by the converter when an illegal or invalid codepage sequence ...
struct UEnumeration UEnumeration
structure representing an enumeration object instance
Definition: uenum.h:38
Number of UConverterUnicodeSet selectors.
Definition: ucnv.h:879
int32_t ucnv_getCCSID(const UConverter *converter, UErrorCode *err)
Gets a codepage number associated with the converter.
void ucnv_getUnicodeSet(const UConverter *cnv, USet *setFillIn, UConverterUnicodeSet whichSet, UErrorCode *pErrorCode)
Returns the set of Unicode code points that can be converted by an ICU converter. ...
UConverter * ucnv_openCCSID(int32_t codepage, UConverterPlatform platform, UErrorCode *err)
Creates a UConverter object from a CCSID number and platform pair.
The structure for the fromUnicode callback function parameter.
Definition: ucnv_err.h:192
int32_t ucnv_toUChars(UConverter *cnv, UChar *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert the codepage string into a Unicode string using an existing UConverter.
void ucnv_getFromUCallBack(const UConverter *converter, UConverterFromUCallback *action, const void **context)
Gets the current callback function used by the converter when illegal or invalid Unicode sequence is ...
UConverter * ucnv_openU(const UChar *name, UErrorCode *err)
Creates a Unicode converter with the names specified as unicode string.
void ucnv_getStarters(const UConverter *converter, UBool starters[256], UErrorCode *err)
Gets the "starter" (lead) bytes for converters of type MBCS.
UConverter * ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err)
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:101
const char * ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
Gives the name of the alias at given index of alias list.
void ucnv_setFromUCallBack(UConverter *converter, UConverterFromUCallback newAction, const void *newContext, UConverterFromUCallback *oldAction, const void **oldContext, UErrorCode *err)
Changes the current callback function used by the converter when an illegal or invalid sequence is fo...
const char * ucnv_getAvailableName(int32_t n)
Gets the canonical converter name of the specified converter from a list of all available converters ...
int32_t ucnv_countAvailable(void)
Returns the number of available converters, as per the alias file.
UBool ucnv_isAmbiguous(const UConverter *cnv)
Determines if the converter contains ambiguous mappings of the same character or not.
UConverterUnicodeSet
Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
Definition: ucnv.h:873
const char * ucnv_detectUnicodeSignature(const char *source, int32_t sourceLength, int32_t *signatureLength, UErrorCode *pErrorCode)
Detects Unicode signature byte sequences at the start of the byte stream and returns the charset name...
void ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
Fill-up the list of alias names for the given alias.
uint16_t ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
Gives the number of aliases for a given converter or alias name.
void ucnv_setDefaultName(const char *name)
This function is not thread safe.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:319
UConverterPlatform ucnv_getPlatform(const UConverter *converter, UErrorCode *err)
Gets a codepage platform associated with the converter.
void ucnv_getInvalidChars(const UConverter *converter, char *errBytes, int8_t *len, UErrorCode *err)
Fills in the output parameter, errBytes, with the error characters from the last failing conversion...
void ucnv_reset(UConverter *converter)
Resets the state of a converter to the default state.
void ucnv_setFallback(UConverter *cnv, UBool usesFallback)
Sets the converter to use fallback mappings or not.
#define U_EXPORT2
Definition: platform.h:338
UConverter * ucnv_safeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe converter cloning operation.
Select the set of roundtrippable Unicode code points.
Definition: ucnv.h:875
Select the set of Unicode code points with roundtrip or fallback mappings.
Definition: ucnv.h:877
void ucnv_getInvalidUChars(const UConverter *converter, UChar *errUChars, int8_t *len, UErrorCode *err)
Fills in the output parameter, errChars, with the error characters from the last failing conversion...
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:102
UEnumeration * ucnv_openStandardNames(const char *convName, const char *standard, UErrorCode *pErrorCode)
Return a new UEnumeration object for enumerating all the alias names for a given converter that are r...
struct USet USet
Definition: ucnv.h:66
const char * ucnv_getName(const UConverter *converter, UErrorCode *err)
Gets the internal, canonical name of the converter (zero-terminated).
int32_t ucnv_getDisplayName(const UConverter *converter, const char *displayLocale, UChar *displayName, int32_t displayNameCapacity, UErrorCode *err)
Returns the display name of the converter passed in based on the Locale passed in.
void(* UConverterToUCallback)(const void *context, UConverterToUnicodeArgs *args, const char *codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode *pErrorCode)
Function pointer for error callback in the codepage to unicode direction.
Definition: ucnv.h:161
signed char int8_t
Define 64 bit limits.
Definition: pwin32.h:127
void ucnv_resetToUnicode(UConverter *converter)
Resets the to-Unicode part of a converter state to the default state.
struct UConverter UConverter
Definition: ucnv_err.h:94
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
const char * ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode)
This function will return the internal canonical converter name of the tagged alias.
void ucnv_setSubstChars(UConverter *converter, const char *subChars, int8_t len, UErrorCode *err)
Sets the substitution chars when converting from unicode to a codepage.
int32_t ucnv_fromUChars(UConverter *cnv, char *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert the Unicode string into a codepage string using an existing UConverter.
void ucnv_toUnicode(UConverter *converter, UChar **target, const UChar *targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, UBool flush, UErrorCode *err)
Converts a buffer of codepage bytes into an array of unicode UChars characters.
C API: String Enumeration.
void ucnv_close(UConverter *converter)
Deletes the unicode converter and releases resources associated with just this instance.
unsigned short uint16_t
Define 64 bit limits.
Definition: pwin32.h:139
int32_t ucnv_fromAlgorithmic(UConverter *cnv, UConverterType algorithmicType, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode)
Convert from one external charset to another.
UConverterType
Enum for specifying basic types of converters.
Definition: ucnv.h:89
void ucnv_getSubstChars(const UConverter *converter, char *subChars, int8_t *len, UErrorCode *err)
Fills in the output parameter, subChars, with the substitution characters as multiple bytes...
int32_t ucnv_toAlgorithmic(UConverterType algorithmicType, UConverter *cnv, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode)
Convert from one external charset to another.
void ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen)
Fixes the backslash character mismapping.
C UConverter predefined error callbacks.
UChar32 ucnv_getNextUChar(UConverter *converter, const char **source, const char *sourceLimit, UErrorCode *err)
Convert a codepage buffer into Unicode one character at a time.
int32_t ucnv_flushCache(void)
Frees up memory occupied by unused, cached converter shared data.
const char * ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode)
Returns a standard name for a given converter name.
int8_t ucnv_getMinCharSize(const UConverter *converter)
Returns the minimum byte length for characters in this codepage.
UConverterPlatform
Enum for specifying which platform a converter ID refers to.
Definition: ucnv.h:141
const char * ucnv_getDefaultName(void)
Returns the current default converter name.
void ucnv_setToUCallBack(UConverter *converter, UConverterToUCallback newAction, const void *newContext, UConverterToUCallback *oldAction, const void **oldContext, UErrorCode *err)
Changes the callback function used by the converter when an illegal or invalid sequence is found...
UEnumeration * ucnv_openAllNames(UErrorCode *pErrorCode)
Returns a UEnumeration to enumerate all of the canonical converter names, as per the alias file...
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
int32_t ucnv_convert(const char *toConverterName, const char *fromConverterName, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode)
Convert from one external charset to another.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:117
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208