ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
utext.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2004-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: utext.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2004oct06
14 * created by: Markus W. Scherer
15 */
16 
17 #ifndef __UTEXT_H__
18 #define __UTEXT_H__
19 
138 #include "unicode/utypes.h"
139 #ifdef XP_CPLUSPLUS
140 #include "unicode/rep.h"
141 #include "unicode/unistr.h"
142 #include "unicode/chariter.h"
143 #endif
144 
145 
147 
148 struct UText;
149 typedef struct UText UText;
152 /***************************************************************************************
153  *
154  * C Functions for creating UText wrappers around various kinds of text strings.
155  *
156  ****************************************************************************************/
157 
158 
180 utext_close(UText *ut);
181 
182 
205 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
206 
207 
223 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
224 
225 
226 #ifdef XP_CPLUSPLUS
227 
240 utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
241 
242 
256 utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
257 
258 
272 utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status);
273 
287 utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status);
288 
289 #endif
290 
291 
350 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
351 
352 
365 utext_equals(const UText *a, const UText *b);
366 
367 
368 /*****************************************************************************
369  *
370  * Functions to work with the text represeted by a UText wrapper
371  *
372  *****************************************************************************/
373 
387 
402 utext_isLengthExpensive(const UText *ut);
403 
430 utext_char32At(UText *ut, int64_t nativeIndex);
431 
432 
445 
446 
466 utext_next32(UText *ut);
467 
468 
488 
489 
509 utext_next32From(UText *ut, int64_t nativeIndex);
510 
511 
512 
529 utext_previous32From(UText *ut, int64_t nativeIndex);
530 
544 utext_getNativeIndex(const UText *ut);
545 
569 U_STABLE void U_EXPORT2
570 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
571 
589 utext_moveIndex32(UText *ut, int32_t delta);
590 
615 
616 
652 utext_extract(UText *ut,
653  int64_t nativeStart, int64_t nativeLimit,
654  UChar *dest, int32_t destCapacity,
655  UErrorCode *status);
656 
657 
658 /************************************************************************************
659  *
660  * #define inline versions of selected performance-critical text access functions
661  * Caution: do not use auto increment++ or decrement-- expressions
662  * as parameters to these macros.
663  *
664  * For most use, where there is no extreme performance constraint, the
665  * normal, non-inline functions are a better choice. The resulting code
666  * will be smaller, and, if the need ever arises, easier to debug.
667  *
668  * These are implemented as #defines rather than real functions
669  * because there is no fully portable way to do inline functions in plain C.
670  *
671  ************************************************************************************/
672 
684 #define UTEXT_NEXT32(ut) \
685  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
686  ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
687 
698 #define UTEXT_PREVIOUS32(ut) \
699  ((ut)->chunkOffset > 0 && \
700  (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
701  (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
702 
715 #define UTEXT_GETNATIVEINDEX(ut) \
716  ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
717  (ut)->chunkNativeStart+(ut)->chunkOffset : \
718  (ut)->pFuncs->mapOffsetToNative(ut))
719 
731 #define UTEXT_SETNATIVEINDEX(ut, ix) \
732  { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
733  if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
734  (ut)->chunkOffset=(int32_t)__offset; \
735  } else { \
736  utext_setNativeIndex((ut), (ix)); } }
737 
738 
739 
740 /************************************************************************************
741  *
742  * Functions related to writing or modifying the text.
743  * These will work only with modifiable UTexts. Attempting to
744  * modify a read-only UText will return an error status.
745  *
746  ************************************************************************************/
747 
748 
768 utext_isWritable(const UText *ut);
769 
770 
780 utext_hasMetaData(const UText *ut);
781 
782 
811 utext_replace(UText *ut,
812  int64_t nativeStart, int64_t nativeLimit,
813  const UChar *replacementText, int32_t replacementLength,
814  UErrorCode *status);
815 
816 
817 
850 U_STABLE void U_EXPORT2
851 utext_copy(UText *ut,
852  int64_t nativeStart, int64_t nativeLimit,
853  int64_t destIndex,
854  UBool move,
855  UErrorCode *status);
856 
857 
879 U_STABLE void U_EXPORT2
880 utext_freeze(UText *ut);
881 
882 
889 enum {
923 };
924 
962 typedef UText * U_CALLCONV
963 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
964 
965 
974 typedef int64_t U_CALLCONV
976 
1002 typedef UBool U_CALLCONV
1003 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1004 
1032 typedef int32_t U_CALLCONV
1034  int64_t nativeStart, int64_t nativeLimit,
1035  UChar *dest, int32_t destCapacity,
1036  UErrorCode *status);
1037 
1067 typedef int32_t U_CALLCONV
1069  int64_t nativeStart, int64_t nativeLimit,
1070  const UChar *replacementText, int32_t replacmentLength,
1071  UErrorCode *status);
1072 
1101 typedef void U_CALLCONV
1103  int64_t nativeStart, int64_t nativeLimit,
1104  int64_t nativeDest,
1105  UBool move,
1106  UErrorCode *status);
1107 
1121 typedef int64_t U_CALLCONV
1123 
1139 typedef int32_t U_CALLCONV
1140 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1141 
1142 
1160 typedef void U_CALLCONV
1162 
1163 
1173 struct UTextFuncs {
1189 
1196 
1197 
1205 
1214 
1222 
1230 
1238 
1246 
1254 
1262 
1270 
1282 
1288 
1289 };
1294 typedef struct UTextFuncs UTextFuncs;
1295 
1307 struct UText {
1321 
1322 
1329 
1330 
1337 
1345 
1346  /* ------ 16 byte alignment boundary ----------- */
1347 
1348 
1355 
1361 
1370 
1371  /* ---- 16 byte alignment boundary------ */
1372 
1378 
1385 
1391 
1392  /* ---- 16 byte alignment boundary-- */
1393 
1394 
1402 
1408 
1414  void *pExtra;
1415 
1422  const void *context;
1423 
1424  /* --- 16 byte alignment boundary--- */
1425 
1431  const void *p;
1437  const void *q;
1443  const void *r;
1444 
1450  void *privP;
1451 
1452 
1453  /* --- 16 byte alignment boundary--- */
1454 
1455 
1462 
1469 
1476 
1477  /* ---- 16 byte alignment boundary---- */
1478 
1479 
1498 };
1499 
1500 
1518 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1519 
1525 enum {
1526  UTEXT_MAGIC = 0x345ad82c
1527 };
1528 
1536 #define UTEXT_INITIALIZER { \
1537  UTEXT_MAGIC, /* magic */ \
1538  0, /* flags */ \
1539  0, /* providerProps */ \
1540  sizeof(UText), /* sizeOfStruct */ \
1541  0, /* chunkNativeLimit */ \
1542  0, /* extraSize */ \
1543  0, /* nativeIndexingLimit */ \
1544  0, /* chunkNativeStart */ \
1545  0, /* chunkOffset */ \
1546  0, /* chunkLength */ \
1547  NULL, /* chunkContents */ \
1548  NULL, /* pFuncs */ \
1549  NULL, /* pExtra */ \
1550  NULL, /* context */ \
1551  NULL, NULL, NULL, /* p, q, r */ \
1552  NULL, /* privP */ \
1553  0, 0, 0, /* a, b, c */ \
1554  0, 0, 0 /* privA,B,C, */ \
1555  }
1556 
1557 
1559 
1560 
1561 
1562 #endif
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition: utext.h:1033
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1475
int64_t utext_nativeLength(UText *ut)
Get the length of the text.
UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position...
UTextClose * spare3
(private) Spare function pointer
Definition: utext.h:1287
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond...
Definition: utext.h:1369
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition: utext.h:1377
UBool utext_isWritable(const UText *ut)
Return TRUE if the text can be written (modified) with utext_replace() or utext_copy().
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition: utext.h:1161
int32_t providerProperties
Text provider properties.
Definition: utext.h:1336
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition: utext.h:1414
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1461
UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index...
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition: utext.h:1390
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: utypes.h:254
C++ API: Unicode String.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition: utext.h:1102
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition: utext.h:1261
UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition: utext.h:963
It is potentially time consuming for the provider to determine the length of the text.
Definition: utext.h:894
int32_t reserved1
(private) Alignment padding.
Definition: utext.h:1195
void utext_freeze(UText *ut)
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:71
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition: utext.h:1229
There is meta data associated with the text.
Definition: utext.h:914
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition: utext.h:975
void * privP
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1450
UTextClose * close
(public) Function pointer for UTextClose.
Definition: utext.h:1269
int32_t flags
(private) Flags for managing the allocation and freeing of memory associated with this UText...
Definition: utext.h:1328
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1497
unsigned int uint32_t
Define 64 bit limits.
Definition: pwin32.h:147
UTextClone * clone
(public) Function pointer for UTextClone
Definition: utext.h:1204
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition: utext.h:1213
const void * q
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1437
(public) Function dispatch table for UText.
Definition: utext.h:1173
UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition: utext.h:1221
UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:101
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
const void * p
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1431
void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
#define U_NAMESPACE_QUALIFIER
This is used to qualify that a function or class is part of the public ICU C++ API namespace...
Definition: uversion.h:186
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition: utext.h:1245
UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition: utext.h:1401
int32_t reserved3
Definition: utext.h:1195
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accomodate...
Definition: utext.h:1188
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:319
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1491
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition: utext.h:1003
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition: utext.h:1384
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition: utext.h:1360
UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:187
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition: utext.h:1122
#define U_EXPORT2
Definition: platform.h:338
int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceeding the current position.
int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition: utext.h:1140
void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:102
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1485
const void * r
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1443
UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator postion by delta code points.
UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition: utext.h:1354
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition: utext.h:1344
UTextClose * spare2
(private) Spare function pointer
Definition: utext.h:1281
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1468
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition: utext.h:1407
UTextClose * spare1
(private) Spare function pointer
Definition: utext.h:1276
Text provider owns the text storage.
Definition: utext.h:922
uint32_t magic
(private) Magic.
Definition: utext.h:1320
signed long long int64_t
Define 64 bit limits.
Definition: pwin32.h:152
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition: utext.h:1068
UText struct.
Definition: utext.h:1307
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition: utext.h:1237
Basic definitions for ICU, for both C and C++ APIs.
UBool utext_isLengthExpensive(const UText *ut)
Return TRUE if calculating the length of the text could be expensive.
int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
const void * context
(protected) Pointer to string or text-containin object or similar.
Definition: utext.h:1422
UText * utext_close(UText *ut)
Close function for UText instances.
C++ API: Character Iterator.
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition: utext.h:901
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition: utext.h:1253
int32_t reserved2
Definition: utext.h:1195
UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
The provider supports modifying the text via the replace() and copy() functions.
Definition: utext.h:908
UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:117
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208
C++ API: Replaceable String.