ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
unistr.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1998-2009, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File unistr.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 11/11/98 stephen Changed per 11/9 code review.
14 * 04/20/99 stephen Overhauled per 4/16 code review.
15 * 11/18/99 aliu Made to inherit from Replaceable. Added method
16 * handleReplaceBetween(); other methods unchanged.
17 * 06/25/01 grhoten Remove dependency on iostream.
18 ******************************************************************************
19 */
20 
21 #ifndef UNISTR_H
22 #define UNISTR_H
23 
29 #include "unicode/utypes.h"
30 #include "unicode/rep.h"
31 #include "unicode/std_string.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/bytestream.h"
34 
35 struct UConverter; // unicode/ucnv.h
36 class StringThreadTest;
37 
38 #ifndef U_COMPARE_CODE_POINT_ORDER
39 /* see also ustring.h and unorm.h */
45 #define U_COMPARE_CODE_POINT_ORDER 0x8000
46 #endif
47 
48 #ifndef USTRING_H
49 
53 u_strlen(const UChar *s);
54 #endif
55 
57 
58 class Locale; // unicode/locid.h
60 class BreakIterator; // unicode/brkiter.h
61 
62 /* The <iostream> include has been moved to unicode/ustream.h */
63 
74 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
75 
93 #if defined(U_DECLARE_UTF16)
94 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
95 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
96 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
97 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
98 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
99 #else
100 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
101 #endif
102 
116 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
117 
188 {
189 public:
190 
199  enum EInvariant {
204  kInvariant
205  };
206 
207  //========================================
208  // Read-only operations
209  //========================================
210 
211  /* Comparison - bitwise only - for international comparison use collation */
212 
220  inline UBool operator== (const UnicodeString& text) const;
221 
229  inline UBool operator!= (const UnicodeString& text) const;
230 
238  inline UBool operator> (const UnicodeString& text) const;
239 
247  inline UBool operator< (const UnicodeString& text) const;
248 
256  inline UBool operator>= (const UnicodeString& text) const;
257 
265  inline UBool operator<= (const UnicodeString& text) const;
266 
278  inline int8_t compare(const UnicodeString& text) const;
279 
294  inline int8_t compare(int32_t start,
295  int32_t length,
296  const UnicodeString& text) const;
297 
315  inline int8_t compare(int32_t start,
316  int32_t length,
317  const UnicodeString& srcText,
318  int32_t srcStart,
319  int32_t srcLength) const;
320 
333  inline int8_t compare(const UChar *srcChars,
334  int32_t srcLength) const;
335 
350  inline int8_t compare(int32_t start,
351  int32_t length,
352  const UChar *srcChars) const;
353 
371  inline int8_t compare(int32_t start,
372  int32_t length,
373  const UChar *srcChars,
374  int32_t srcStart,
375  int32_t srcLength) const;
376 
394  inline int8_t compareBetween(int32_t start,
395  int32_t limit,
396  const UnicodeString& srcText,
397  int32_t srcStart,
398  int32_t srcLimit) const;
399 
417  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
418 
438  inline int8_t compareCodePointOrder(int32_t start,
439  int32_t length,
440  const UnicodeString& srcText) const;
441 
463  inline int8_t compareCodePointOrder(int32_t start,
464  int32_t length,
465  const UnicodeString& srcText,
466  int32_t srcStart,
467  int32_t srcLength) const;
468 
487  inline int8_t compareCodePointOrder(const UChar *srcChars,
488  int32_t srcLength) const;
489 
509  inline int8_t compareCodePointOrder(int32_t start,
510  int32_t length,
511  const UChar *srcChars) const;
512 
534  inline int8_t compareCodePointOrder(int32_t start,
535  int32_t length,
536  const UChar *srcChars,
537  int32_t srcStart,
538  int32_t srcLength) const;
539 
561  inline int8_t compareCodePointOrderBetween(int32_t start,
562  int32_t limit,
563  const UnicodeString& srcText,
564  int32_t srcStart,
565  int32_t srcLimit) const;
566 
585  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
586 
607  inline int8_t caseCompare(int32_t start,
608  int32_t length,
609  const UnicodeString& srcText,
610  uint32_t options) const;
611 
634  inline int8_t caseCompare(int32_t start,
635  int32_t length,
636  const UnicodeString& srcText,
637  int32_t srcStart,
638  int32_t srcLength,
639  uint32_t options) const;
640 
660  inline int8_t caseCompare(const UChar *srcChars,
661  int32_t srcLength,
662  uint32_t options) const;
663 
684  inline int8_t caseCompare(int32_t start,
685  int32_t length,
686  const UChar *srcChars,
687  uint32_t options) const;
688 
711  inline int8_t caseCompare(int32_t start,
712  int32_t length,
713  const UChar *srcChars,
714  int32_t srcStart,
715  int32_t srcLength,
716  uint32_t options) const;
717 
740  inline int8_t caseCompareBetween(int32_t start,
741  int32_t limit,
742  const UnicodeString& srcText,
743  int32_t srcStart,
744  int32_t srcLimit,
745  uint32_t options) const;
746 
754  inline UBool startsWith(const UnicodeString& text) const;
755 
766  inline UBool startsWith(const UnicodeString& srcText,
767  int32_t srcStart,
768  int32_t srcLength) const;
769 
778  inline UBool startsWith(const UChar *srcChars,
779  int32_t srcLength) const;
780 
790  inline UBool startsWith(const UChar *srcChars,
791  int32_t srcStart,
792  int32_t srcLength) const;
793 
801  inline UBool endsWith(const UnicodeString& text) const;
802 
813  inline UBool endsWith(const UnicodeString& srcText,
814  int32_t srcStart,
815  int32_t srcLength) const;
816 
825  inline UBool endsWith(const UChar *srcChars,
826  int32_t srcLength) const;
827 
838  inline UBool endsWith(const UChar *srcChars,
839  int32_t srcStart,
840  int32_t srcLength) const;
841 
842 
843  /* Searching - bitwise only */
844 
853  inline int32_t indexOf(const UnicodeString& text) const;
854 
864  inline int32_t indexOf(const UnicodeString& text,
865  int32_t start) const;
866 
878  inline int32_t indexOf(const UnicodeString& text,
879  int32_t start,
880  int32_t length) const;
881 
898  inline int32_t indexOf(const UnicodeString& srcText,
899  int32_t srcStart,
900  int32_t srcLength,
901  int32_t start,
902  int32_t length) const;
903 
915  inline int32_t indexOf(const UChar *srcChars,
916  int32_t srcLength,
917  int32_t start) const;
918 
931  inline int32_t indexOf(const UChar *srcChars,
932  int32_t srcLength,
933  int32_t start,
934  int32_t length) const;
935 
952  int32_t indexOf(const UChar *srcChars,
953  int32_t srcStart,
954  int32_t srcLength,
955  int32_t start,
956  int32_t length) const;
957 
965  inline int32_t indexOf(UChar c) const;
966 
975  inline int32_t indexOf(UChar32 c) const;
976 
985  inline int32_t indexOf(UChar c,
986  int32_t start) const;
987 
997  inline int32_t indexOf(UChar32 c,
998  int32_t start) const;
999 
1010  inline int32_t indexOf(UChar c,
1011  int32_t start,
1012  int32_t length) const;
1013 
1025  inline int32_t indexOf(UChar32 c,
1026  int32_t start,
1027  int32_t length) const;
1028 
1037  inline int32_t lastIndexOf(const UnicodeString& text) const;
1038 
1048  inline int32_t lastIndexOf(const UnicodeString& text,
1049  int32_t start) const;
1050 
1062  inline int32_t lastIndexOf(const UnicodeString& text,
1063  int32_t start,
1064  int32_t length) const;
1065 
1082  inline int32_t lastIndexOf(const UnicodeString& srcText,
1083  int32_t srcStart,
1084  int32_t srcLength,
1085  int32_t start,
1086  int32_t length) const;
1087 
1098  inline int32_t lastIndexOf(const UChar *srcChars,
1099  int32_t srcLength,
1100  int32_t start) const;
1101 
1114  inline int32_t lastIndexOf(const UChar *srcChars,
1115  int32_t srcLength,
1116  int32_t start,
1117  int32_t length) const;
1118 
1135  int32_t lastIndexOf(const UChar *srcChars,
1136  int32_t srcStart,
1137  int32_t srcLength,
1138  int32_t start,
1139  int32_t length) const;
1140 
1148  inline int32_t lastIndexOf(UChar c) const;
1149 
1158  inline int32_t lastIndexOf(UChar32 c) const;
1159 
1168  inline int32_t lastIndexOf(UChar c,
1169  int32_t start) const;
1170 
1180  inline int32_t lastIndexOf(UChar32 c,
1181  int32_t start) const;
1182 
1193  inline int32_t lastIndexOf(UChar c,
1194  int32_t start,
1195  int32_t length) const;
1196 
1208  inline int32_t lastIndexOf(UChar32 c,
1209  int32_t start,
1210  int32_t length) const;
1211 
1212 
1213  /* Character access */
1214 
1223  inline UChar charAt(int32_t offset) const;
1224 
1232  inline UChar operator[] (int32_t offset) const;
1233 
1245  inline UChar32 char32At(int32_t offset) const;
1246 
1262  inline int32_t getChar32Start(int32_t offset) const;
1263 
1280  inline int32_t getChar32Limit(int32_t offset) const;
1281 
1332  int32_t moveIndex32(int32_t index, int32_t delta) const;
1333 
1334  /* Substring extraction */
1335 
1351  inline void extract(int32_t start,
1352  int32_t length,
1353  UChar *dst,
1354  int32_t dstStart = 0) const;
1355 
1377  int32_t
1378  extract(UChar *dest, int32_t destCapacity,
1379  UErrorCode &errorCode) const;
1380 
1391  inline void extract(int32_t start,
1392  int32_t length,
1393  UnicodeString& target) const;
1394 
1406  inline void extractBetween(int32_t start,
1407  int32_t limit,
1408  UChar *dst,
1409  int32_t dstStart = 0) const;
1410 
1420  virtual void extractBetween(int32_t start,
1421  int32_t limit,
1422  UnicodeString& target) const;
1423 
1445  int32_t extract(int32_t start,
1446  int32_t startLength,
1447  char *target,
1448  int32_t targetCapacity,
1449  enum EInvariant inv) const;
1450 
1451 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1452 
1472  int32_t extract(int32_t start,
1473  int32_t startLength,
1474  char *target,
1475  uint32_t targetLength) const;
1476 
1477 #endif
1478 
1479 #if !UCONFIG_NO_CONVERSION
1480 
1506  inline int32_t extract(int32_t start,
1507  int32_t startLength,
1508  char *target,
1509  const char *codepage = 0) const;
1510 
1540  int32_t extract(int32_t start,
1541  int32_t startLength,
1542  char *target,
1543  uint32_t targetLength,
1544  const char *codepage) const;
1545 
1563  int32_t extract(char *dest, int32_t destCapacity,
1564  UConverter *cnv,
1565  UErrorCode &errorCode) const;
1566 
1567 #endif
1568 
1579  void toUTF8(ByteSink &sink) const;
1580 
1581 #if U_HAVE_STD_STRING
1582 
1595  template<typename StringClass>
1596  StringClass &toUTF8String(StringClass &result) const {
1597  StringByteSink<StringClass> sbs(&result);
1598  toUTF8(sbs);
1599  return result;
1600  }
1601 
1602 #endif
1603 
1619  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1620 
1621  /* Length operations */
1622 
1631  inline int32_t length(void) const;
1632 
1646  int32_t
1647  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1648 
1672  UBool
1673  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1674 
1680  inline UBool isEmpty(void) const;
1681 
1691  inline int32_t getCapacity(void) const;
1692 
1693  /* Other operations */
1694 
1700  inline int32_t hashCode(void) const;
1701 
1713  inline UBool isBogus(void) const;
1714 
1715 
1716  //========================================
1717  // Write operations
1718  //========================================
1719 
1720  /* Assignment operations */
1721 
1729  UnicodeString &operator=(const UnicodeString &srcText);
1730 
1751  UnicodeString &fastCopyFrom(const UnicodeString &src);
1752 
1760  inline UnicodeString& operator= (UChar ch);
1761 
1769  inline UnicodeString& operator= (UChar32 ch);
1770 
1782  inline UnicodeString& setTo(const UnicodeString& srcText,
1783  int32_t srcStart);
1784 
1798  inline UnicodeString& setTo(const UnicodeString& srcText,
1799  int32_t srcStart,
1800  int32_t srcLength);
1801 
1810  inline UnicodeString& setTo(const UnicodeString& srcText);
1811 
1820  inline UnicodeString& setTo(const UChar *srcChars,
1821  int32_t srcLength);
1822 
1831  UnicodeString& setTo(UChar srcChar);
1832 
1841  UnicodeString& setTo(UChar32 srcChar);
1842 
1863  UnicodeString &setTo(UBool isTerminated,
1864  const UChar *text,
1865  int32_t textLength);
1866 
1886  UnicodeString &setTo(UChar *buffer,
1887  int32_t buffLength,
1888  int32_t buffCapacity);
1889 
1930  void setToBogus();
1931 
1939  UnicodeString& setCharAt(int32_t offset,
1940  UChar ch);
1941 
1942 
1943  /* Append operations */
1944 
1952  inline UnicodeString& operator+= (UChar ch);
1953 
1961  inline UnicodeString& operator+= (UChar32 ch);
1962 
1971  inline UnicodeString& operator+= (const UnicodeString& srcText);
1972 
1987  inline UnicodeString& append(const UnicodeString& srcText,
1988  int32_t srcStart,
1989  int32_t srcLength);
1990 
1998  inline UnicodeString& append(const UnicodeString& srcText);
1999 
2013  inline UnicodeString& append(const UChar *srcChars,
2014  int32_t srcStart,
2015  int32_t srcLength);
2016 
2025  inline UnicodeString& append(const UChar *srcChars,
2026  int32_t srcLength);
2027 
2034  inline UnicodeString& append(UChar srcChar);
2035 
2042  inline UnicodeString& append(UChar32 srcChar);
2043 
2044 
2045  /* Insert operations */
2046 
2060  inline UnicodeString& insert(int32_t start,
2061  const UnicodeString& srcText,
2062  int32_t srcStart,
2063  int32_t srcLength);
2064 
2073  inline UnicodeString& insert(int32_t start,
2074  const UnicodeString& srcText);
2075 
2089  inline UnicodeString& insert(int32_t start,
2090  const UChar *srcChars,
2091  int32_t srcStart,
2092  int32_t srcLength);
2093 
2103  inline UnicodeString& insert(int32_t start,
2104  const UChar *srcChars,
2105  int32_t srcLength);
2106 
2115  inline UnicodeString& insert(int32_t start,
2116  UChar srcChar);
2117 
2126  inline UnicodeString& insert(int32_t start,
2127  UChar32 srcChar);
2128 
2129 
2130  /* Replace operations */
2131 
2149  UnicodeString& replace(int32_t start,
2150  int32_t length,
2151  const UnicodeString& srcText,
2152  int32_t srcStart,
2153  int32_t srcLength);
2154 
2167  UnicodeString& replace(int32_t start,
2168  int32_t length,
2169  const UnicodeString& srcText);
2170 
2188  UnicodeString& replace(int32_t start,
2189  int32_t length,
2190  const UChar *srcChars,
2191  int32_t srcStart,
2192  int32_t srcLength);
2193 
2206  inline UnicodeString& replace(int32_t start,
2207  int32_t length,
2208  const UChar *srcChars,
2209  int32_t srcLength);
2210 
2222  inline UnicodeString& replace(int32_t start,
2223  int32_t length,
2224  UChar srcChar);
2225 
2237  inline UnicodeString& replace(int32_t start,
2238  int32_t length,
2239  UChar32 srcChar);
2240 
2250  inline UnicodeString& replaceBetween(int32_t start,
2251  int32_t limit,
2252  const UnicodeString& srcText);
2253 
2268  inline UnicodeString& replaceBetween(int32_t start,
2269  int32_t limit,
2270  const UnicodeString& srcText,
2271  int32_t srcStart,
2272  int32_t srcLimit);
2273 
2284  virtual void handleReplaceBetween(int32_t start,
2285  int32_t limit,
2286  const UnicodeString& text);
2287 
2293  virtual UBool hasMetaData() const;
2294 
2310  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2311 
2312  /* Search and replace operations */
2313 
2322  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2323  const UnicodeString& newText);
2324 
2336  inline UnicodeString& findAndReplace(int32_t start,
2337  int32_t length,
2338  const UnicodeString& oldText,
2339  const UnicodeString& newText);
2340 
2358  UnicodeString& findAndReplace(int32_t start,
2359  int32_t length,
2360  const UnicodeString& oldText,
2361  int32_t oldStart,
2362  int32_t oldLength,
2363  const UnicodeString& newText,
2364  int32_t newStart,
2365  int32_t newLength);
2366 
2367 
2368  /* Remove operations */
2369 
2375  inline UnicodeString& remove(void);
2376 
2385  inline UnicodeString& remove(int32_t start,
2386  int32_t length = (int32_t)INT32_MAX);
2387 
2396  inline UnicodeString& removeBetween(int32_t start,
2397  int32_t limit = (int32_t)INT32_MAX);
2398 
2399 
2400  /* Length operations */
2401 
2413  UBool padLeading(int32_t targetLength,
2414  UChar padChar = 0x0020);
2415 
2427  UBool padTrailing(int32_t targetLength,
2428  UChar padChar = 0x0020);
2429 
2436  inline UBool truncate(int32_t targetLength);
2437 
2443  UnicodeString& trim(void);
2444 
2445 
2446  /* Miscellaneous operations */
2447 
2453  inline UnicodeString& reverse(void);
2454 
2463  inline UnicodeString& reverse(int32_t start,
2464  int32_t length);
2465 
2472  UnicodeString& toUpper(void);
2473 
2481  UnicodeString& toUpper(const Locale& locale);
2482 
2489  UnicodeString& toLower(void);
2490 
2498  UnicodeString& toLower(const Locale& locale);
2499 
2500 #if !UCONFIG_NO_BREAK_ITERATION
2501 
2528  UnicodeString &toTitle(BreakIterator *titleIter);
2529 
2557  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2558 
2590  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2591 
2592 #endif
2593 
2605  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2606 
2607  //========================================
2608  // Access to the internal buffer
2609  //========================================
2610 
2654  UChar *getBuffer(int32_t minCapacity);
2655 
2676  void releaseBuffer(int32_t newLength=-1);
2677 
2708  inline const UChar *getBuffer() const;
2709 
2743  inline const UChar *getTerminatedBuffer();
2744 
2745  //========================================
2746  // Constructors
2747  //========================================
2748 
2752  UnicodeString();
2753 
2765  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2766 
2772  UnicodeString(UChar ch);
2773 
2779  UnicodeString(UChar32 ch);
2780 
2787  UnicodeString(const UChar *text);
2788 
2796  UnicodeString(const UChar *text,
2797  int32_t textLength);
2798 
2818  UnicodeString(UBool isTerminated,
2819  const UChar *text,
2820  int32_t textLength);
2821 
2840  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2841 
2842 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2843 
2850  UnicodeString(const char *codepageData);
2851 
2858  UnicodeString(const char *codepageData, int32_t dataLength);
2859 
2860 #endif
2861 
2862 #if !UCONFIG_NO_CONVERSION
2863 
2881  UnicodeString(const char *codepageData, const char *codepage);
2882 
2900  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
2901 
2923  UnicodeString(
2924  const char *src, int32_t srcLength,
2925  UConverter *cnv,
2926  UErrorCode &errorCode);
2927 
2928 #endif
2929 
2954  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
2955 
2956 
2962  UnicodeString(const UnicodeString& that);
2963 
2970  UnicodeString(const UnicodeString& src, int32_t srcStart);
2971 
2979  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
2980 
2997  virtual Replaceable *clone() const;
2998 
3002  virtual ~UnicodeString();
3003 
3017  static UnicodeString fromUTF8(const StringPiece &utf8);
3018 
3030  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3031 
3032  /* Miscellaneous operations */
3033 
3068  UnicodeString unescape() const;
3069 
3089  UChar32 unescapeAt(int32_t &offset) const;
3090 
3096  static UClassID U_EXPORT2 getStaticClassID();
3097 
3103  virtual UClassID getDynamicClassID() const;
3104 
3105  //========================================
3106  // Implementation methods
3107  //========================================
3108 
3109 protected:
3114  virtual int32_t getLength() const;
3115 
3121  virtual UChar getCharAt(int32_t offset) const;
3122 
3128  virtual UChar32 getChar32At(int32_t offset) const;
3129 
3130 private:
3131  // For char* constructors. Could be made public.
3132  UnicodeString &setToUTF8(const StringPiece &utf8);
3133  // For extract(char*).
3134  // We could make a toUTF8(target, capacity, errorCode) public but not
3135  // this version: New API will be cleaner if we make callers create substrings
3136  // rather than having start+length on every method,
3137  // and it should take a UErrorCode&.
3138  int32_t
3139  toUTF8(int32_t start, int32_t len,
3140  char *target, int32_t capacity) const;
3141 
3142 
3143  inline int8_t
3144  doCompare(int32_t start,
3145  int32_t length,
3146  const UnicodeString& srcText,
3147  int32_t srcStart,
3148  int32_t srcLength) const;
3149 
3150  int8_t doCompare(int32_t start,
3151  int32_t length,
3152  const UChar *srcChars,
3153  int32_t srcStart,
3154  int32_t srcLength) const;
3155 
3156  inline int8_t
3157  doCompareCodePointOrder(int32_t start,
3158  int32_t length,
3159  const UnicodeString& srcText,
3160  int32_t srcStart,
3161  int32_t srcLength) const;
3162 
3163  int8_t doCompareCodePointOrder(int32_t start,
3164  int32_t length,
3165  const UChar *srcChars,
3166  int32_t srcStart,
3167  int32_t srcLength) const;
3168 
3169  inline int8_t
3170  doCaseCompare(int32_t start,
3171  int32_t length,
3172  const UnicodeString &srcText,
3173  int32_t srcStart,
3174  int32_t srcLength,
3175  uint32_t options) const;
3176 
3177  int8_t
3178  doCaseCompare(int32_t start,
3179  int32_t length,
3180  const UChar *srcChars,
3181  int32_t srcStart,
3182  int32_t srcLength,
3183  uint32_t options) const;
3184 
3185  int32_t doIndexOf(UChar c,
3186  int32_t start,
3187  int32_t length) const;
3188 
3189  int32_t doIndexOf(UChar32 c,
3190  int32_t start,
3191  int32_t length) const;
3192 
3193  int32_t doLastIndexOf(UChar c,
3194  int32_t start,
3195  int32_t length) const;
3196 
3197  int32_t doLastIndexOf(UChar32 c,
3198  int32_t start,
3199  int32_t length) const;
3200 
3201  void doExtract(int32_t start,
3202  int32_t length,
3203  UChar *dst,
3204  int32_t dstStart) const;
3205 
3206  inline void doExtract(int32_t start,
3207  int32_t length,
3208  UnicodeString& target) const;
3209 
3210  inline UChar doCharAt(int32_t offset) const;
3211 
3212  UnicodeString& doReplace(int32_t start,
3213  int32_t length,
3214  const UnicodeString& srcText,
3215  int32_t srcStart,
3216  int32_t srcLength);
3217 
3218  UnicodeString& doReplace(int32_t start,
3219  int32_t length,
3220  const UChar *srcChars,
3221  int32_t srcStart,
3222  int32_t srcLength);
3223 
3224  UnicodeString& doReverse(int32_t start,
3225  int32_t length);
3226 
3227  // calculate hash code
3228  int32_t doHashCode(void) const;
3229 
3230  // get pointer to start of array
3231  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3232  inline UChar* getArrayStart(void);
3233  inline const UChar* getArrayStart(void) const;
3234 
3235  // A UnicodeString object (not necessarily its current buffer)
3236  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3237  inline UBool isWritable() const;
3238 
3239  // Is the current buffer writable?
3240  inline UBool isBufferWritable() const;
3241 
3242  // None of the following does releaseArray().
3243  inline void setLength(int32_t len); // sets only fShortLength and fLength
3244  inline void setToEmpty(); // sets fFlags=kShortString
3245  inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
3246  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3247 
3248  // allocate the array; result may be fStackBuffer
3249  // sets refCount to 1 if appropriate
3250  // sets fArray, fCapacity, and fFlags
3251  // returns boolean for success or failure
3252  UBool allocate(int32_t capacity);
3253 
3254  // release the array if owned
3255  void releaseArray(void);
3256 
3257  // turn a bogus string into an empty one
3258  void unBogus();
3259 
3260  // implements assigment operator, copy constructor, and fastCopyFrom()
3261  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3262 
3263  // Pin start and limit to acceptable values.
3264  inline void pinIndex(int32_t& start) const;
3265  inline void pinIndices(int32_t& start,
3266  int32_t& length) const;
3267 
3268 #if !UCONFIG_NO_CONVERSION
3269 
3270  /* Internal extract() using UConverter. */
3271  int32_t doExtract(int32_t start, int32_t length,
3272  char *dest, int32_t destCapacity,
3273  UConverter *cnv,
3274  UErrorCode &errorCode) const;
3275 
3276  /*
3277  * Real constructor for converting from codepage data.
3278  * It assumes that it is called with !fRefCounted.
3279  *
3280  * If <code>codepage==0</code>, then the default converter
3281  * is used for the platform encoding.
3282  * If <code>codepage</code> is an empty string (<code>""</code>),
3283  * then a simple conversion is performed on the codepage-invariant
3284  * subset ("invariant characters") of the platform encoding. See utypes.h.
3285  */
3286  void doCodepageCreate(const char *codepageData,
3287  int32_t dataLength,
3288  const char *codepage);
3289 
3290  /*
3291  * Worker function for creating a UnicodeString from
3292  * a codepage string using a UConverter.
3293  */
3294  void
3295  doCodepageCreate(const char *codepageData,
3296  int32_t dataLength,
3297  UConverter *converter,
3298  UErrorCode &status);
3299 
3300 #endif
3301 
3302  /*
3303  * This function is called when write access to the array
3304  * is necessary.
3305  *
3306  * We need to make a copy of the array if
3307  * the buffer is read-only, or
3308  * the buffer is refCounted (shared), and refCount>1, or
3309  * the buffer is too small.
3310  *
3311  * Return FALSE if memory could not be allocated.
3312  */
3313  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3314  int32_t growCapacity = -1,
3315  UBool doCopyArray = TRUE,
3316  int32_t **pBufferToDelete = 0,
3317  UBool forceClone = FALSE);
3318 
3319  // common function for case mappings
3320  UnicodeString &
3321  caseMap(BreakIterator *titleIter,
3322  const char *locale,
3323  uint32_t options,
3324  int32_t toWhichCase);
3325 
3326  // ref counting
3327  void addRef(void);
3328  int32_t removeRef(void);
3329  int32_t refCount(void) const;
3330 
3331  // constants
3332  enum {
3333  // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
3334  // 32-bit pointers: 4+1+1+13*2 = 32 bytes
3335  // 64-bit pointers: 8+1+1+15*2 = 40 bytes
3336  US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
3337  kInvalidUChar=0xffff, // invalid UChar index
3338  kGrowSize=128, // grow size for this buffer
3339  kInvalidHashCode=0, // invalid hash code
3340  kEmptyHashCode=1, // hash code for empty string
3341 
3342  // bit flag values for fFlags
3343  kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3344  kUsingStackBuffer=2,// fArray==fStackBuffer
3345  kRefCounted=4, // there is a refCount field before the characters in fArray
3346  kBufferIsReadonly=8,// do not write to this buffer
3347  kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3348  // and releaseBuffer(newLength) must be called
3349 
3350  // combined values for convenience
3351  kShortString=kUsingStackBuffer,
3352  kLongString=kRefCounted,
3353  kReadonlyAlias=kBufferIsReadonly,
3354  kWritableAlias=0
3355  };
3356 
3357  friend class StringThreadTest;
3358 
3359  union StackBufferOrFields; // forward declaration necessary before friend declaration
3360  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3361 
3362  /*
3363  * The following are all the class fields that are stored
3364  * in each UnicodeString object.
3365  * Note that UnicodeString has virtual functions,
3366  * therefore there is an implicit vtable pointer
3367  * as the first real field.
3368  * The fields should be aligned such that no padding is
3369  * necessary, mostly by having larger types first.
3370  * On 32-bit machines, the size should be 32 bytes,
3371  * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3372  */
3373  // (implicit) *vtable;
3374  int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
3375  uint8_t fFlags; // bit flags: see constants above
3376  union StackBufferOrFields {
3377  // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3378  // else fFields is used
3379  UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
3380  struct {
3381  uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b)
3382  int32_t fLength; // number of characters in fArray if >127; else undefined
3383  UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
3384  int32_t fCapacity; // sizeof fArray
3385  } fFields;
3386  } fUnion;
3387 };
3388 
3398 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3399 
3400 //========================================
3401 // Inline members
3402 //========================================
3403 
3404 //========================================
3405 // Privates
3406 //========================================
3407 
3408 inline void
3409 UnicodeString::pinIndex(int32_t& start) const
3410 {
3411  // pin index
3412  if(start < 0) {
3413  start = 0;
3414  } else if(start > length()) {
3415  start = length();
3416  }
3417 }
3418 
3419 inline void
3420 UnicodeString::pinIndices(int32_t& start,
3421  int32_t& _length) const
3422 {
3423  // pin indices
3424  int32_t len = length();
3425  if(start < 0) {
3426  start = 0;
3427  } else if(start > len) {
3428  start = len;
3429  }
3430  if(_length < 0) {
3431  _length = 0;
3432  } else if(_length > (len - start)) {
3433  _length = (len - start);
3434  }
3435 }
3436 
3437 inline UChar*
3438 UnicodeString::getArrayStart()
3439 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3440 
3441 inline const UChar*
3442 UnicodeString::getArrayStart() const
3443 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3444 
3445 //========================================
3446 // Read-only implementation methods
3447 //========================================
3448 inline int32_t
3450 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3451 
3452 inline int32_t
3454 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3455 
3456 inline int32_t
3458 { return doHashCode(); }
3459 
3460 inline UBool
3462 { return (UBool)(fFlags & kIsBogus); }
3463 
3464 inline UBool
3465 UnicodeString::isWritable() const
3466 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3467 
3468 inline UBool
3469 UnicodeString::isBufferWritable() const
3470 {
3471  return (UBool)(
3472  !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3473  (!(fFlags&kRefCounted) || refCount()==1));
3474 }
3475 
3476 inline const UChar *
3478  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3479  return 0;
3480  } else if(fFlags&kUsingStackBuffer) {
3481  return fUnion.fStackBuffer;
3482  } else {
3483  return fUnion.fFields.fArray;
3484  }
3485 }
3486 
3487 //========================================
3488 // Read-only alias methods
3489 //========================================
3490 inline int8_t
3491 UnicodeString::doCompare(int32_t start,
3492  int32_t thisLength,
3493  const UnicodeString& srcText,
3494  int32_t srcStart,
3495  int32_t srcLength) const
3496 {
3497  if(srcText.isBogus()) {
3498  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3499  } else {
3500  srcText.pinIndices(srcStart, srcLength);
3501  return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3502  }
3503 }
3504 
3505 inline UBool
3507 {
3508  if(isBogus()) {
3509  return text.isBogus();
3510  } else {
3511  int32_t len = length(), textLength = text.length();
3512  return
3513  !text.isBogus() &&
3514  len == textLength &&
3515  doCompare(0, len, text, 0, textLength) == 0;
3516  }
3517 }
3518 
3519 inline UBool
3521 { return (! operator==(text)); }
3522 
3523 inline UBool
3525 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3526 
3527 inline UBool
3529 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3530 
3531 inline UBool
3533 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3534 
3535 inline UBool
3537 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3538 
3539 inline int8_t
3541 { return doCompare(0, length(), text, 0, text.length()); }
3542 
3543 inline int8_t
3545  int32_t _length,
3546  const UnicodeString& srcText) const
3547 { return doCompare(start, _length, srcText, 0, srcText.length()); }
3548 
3549 inline int8_t
3551  int32_t srcLength) const
3552 { return doCompare(0, length(), srcChars, 0, srcLength); }
3553 
3554 inline int8_t
3556  int32_t _length,
3557  const UnicodeString& srcText,
3558  int32_t srcStart,
3559  int32_t srcLength) const
3560 { return doCompare(start, _length, srcText, srcStart, srcLength); }
3561 
3562 inline int8_t
3564  int32_t _length,
3565  const UChar *srcChars) const
3566 { return doCompare(start, _length, srcChars, 0, _length); }
3567 
3568 inline int8_t
3570  int32_t _length,
3571  const UChar *srcChars,
3572  int32_t srcStart,
3573  int32_t srcLength) const
3574 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3575 
3576 inline int8_t
3578  int32_t limit,
3579  const UnicodeString& srcText,
3580  int32_t srcStart,
3581  int32_t srcLimit) const
3582 { return doCompare(start, limit - start,
3583  srcText, srcStart, srcLimit - srcStart); }
3584 
3585 inline int8_t
3586 UnicodeString::doCompareCodePointOrder(int32_t start,
3587  int32_t thisLength,
3588  const UnicodeString& srcText,
3589  int32_t srcStart,
3590  int32_t srcLength) const
3591 {
3592  if(srcText.isBogus()) {
3593  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3594  } else {
3595  srcText.pinIndices(srcStart, srcLength);
3596  return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3597  }
3598 }
3599 
3600 inline int8_t
3602 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3603 
3604 inline int8_t
3606  int32_t _length,
3607  const UnicodeString& srcText) const
3608 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3609 
3610 inline int8_t
3612  int32_t srcLength) const
3613 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3614 
3615 inline int8_t
3617  int32_t _length,
3618  const UnicodeString& srcText,
3619  int32_t srcStart,
3620  int32_t srcLength) const
3621 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3622 
3623 inline int8_t
3625  int32_t _length,
3626  const UChar *srcChars) const
3627 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3628 
3629 inline int8_t
3631  int32_t _length,
3632  const UChar *srcChars,
3633  int32_t srcStart,
3634  int32_t srcLength) const
3635 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3636 
3637 inline int8_t
3639  int32_t limit,
3640  const UnicodeString& srcText,
3641  int32_t srcStart,
3642  int32_t srcLimit) const
3643 { return doCompareCodePointOrder(start, limit - start,
3644  srcText, srcStart, srcLimit - srcStart); }
3645 
3646 inline int8_t
3647 UnicodeString::doCaseCompare(int32_t start,
3648  int32_t thisLength,
3649  const UnicodeString &srcText,
3650  int32_t srcStart,
3651  int32_t srcLength,
3652  uint32_t options) const
3653 {
3654  if(srcText.isBogus()) {
3655  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3656  } else {
3657  srcText.pinIndices(srcStart, srcLength);
3658  return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3659  }
3660 }
3661 
3662 inline int8_t
3664  return doCaseCompare(0, length(), text, 0, text.length(), options);
3665 }
3666 
3667 inline int8_t
3669  int32_t _length,
3670  const UnicodeString &srcText,
3671  uint32_t options) const {
3672  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3673 }
3674 
3675 inline int8_t
3677  int32_t srcLength,
3678  uint32_t options) const {
3679  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3680 }
3681 
3682 inline int8_t
3684  int32_t _length,
3685  const UnicodeString &srcText,
3686  int32_t srcStart,
3687  int32_t srcLength,
3688  uint32_t options) const {
3689  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3690 }
3691 
3692 inline int8_t
3694  int32_t _length,
3695  const UChar *srcChars,
3696  uint32_t options) const {
3697  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3698 }
3699 
3700 inline int8_t
3702  int32_t _length,
3703  const UChar *srcChars,
3704  int32_t srcStart,
3705  int32_t srcLength,
3706  uint32_t options) const {
3707  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3708 }
3709 
3710 inline int8_t
3712  int32_t limit,
3713  const UnicodeString &srcText,
3714  int32_t srcStart,
3715  int32_t srcLimit,
3716  uint32_t options) const {
3717  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3718 }
3719 
3720 inline int32_t
3722  int32_t srcStart,
3723  int32_t srcLength,
3724  int32_t start,
3725  int32_t _length) const
3726 {
3727  if(!srcText.isBogus()) {
3728  srcText.pinIndices(srcStart, srcLength);
3729  if(srcLength > 0) {
3730  return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3731  }
3732  }
3733  return -1;
3734 }
3735 
3736 inline int32_t
3738 { return indexOf(text, 0, text.length(), 0, length()); }
3739 
3740 inline int32_t
3742  int32_t start) const {
3743  pinIndex(start);
3744  return indexOf(text, 0, text.length(), start, length() - start);
3745 }
3746 
3747 inline int32_t
3749  int32_t start,
3750  int32_t _length) const
3751 { return indexOf(text, 0, text.length(), start, _length); }
3752 
3753 inline int32_t
3755  int32_t srcLength,
3756  int32_t start) const {
3757  pinIndex(start);
3758  return indexOf(srcChars, 0, srcLength, start, length() - start);
3759 }
3760 
3761 inline int32_t
3763  int32_t srcLength,
3764  int32_t start,
3765  int32_t _length) const
3766 { return indexOf(srcChars, 0, srcLength, start, _length); }
3767 
3768 inline int32_t
3770  int32_t start,
3771  int32_t _length) const
3772 { return doIndexOf(c, start, _length); }
3773 
3774 inline int32_t
3776  int32_t start,
3777  int32_t _length) const
3778 { return doIndexOf(c, start, _length); }
3779 
3780 inline int32_t
3782 { return doIndexOf(c, 0, length()); }
3783 
3784 inline int32_t
3786 { return indexOf(c, 0, length()); }
3787 
3788 inline int32_t
3790  int32_t start) const {
3791  pinIndex(start);
3792  return doIndexOf(c, start, length() - start);
3793 }
3794 
3795 inline int32_t
3797  int32_t start) const {
3798  pinIndex(start);
3799  return indexOf(c, start, length() - start);
3800 }
3801 
3802 inline int32_t
3804  int32_t srcLength,
3805  int32_t start,
3806  int32_t _length) const
3807 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3808 
3809 inline int32_t
3811  int32_t srcLength,
3812  int32_t start) const {
3813  pinIndex(start);
3814  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3815 }
3816 
3817 inline int32_t
3819  int32_t srcStart,
3820  int32_t srcLength,
3821  int32_t start,
3822  int32_t _length) const
3823 {
3824  if(!srcText.isBogus()) {
3825  srcText.pinIndices(srcStart, srcLength);
3826  if(srcLength > 0) {
3827  return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3828  }
3829  }
3830  return -1;
3831 }
3832 
3833 inline int32_t
3835  int32_t start,
3836  int32_t _length) const
3837 { return lastIndexOf(text, 0, text.length(), start, _length); }
3838 
3839 inline int32_t
3841  int32_t start) const {
3842  pinIndex(start);
3843  return lastIndexOf(text, 0, text.length(), start, length() - start);
3844 }
3845 
3846 inline int32_t
3848 { return lastIndexOf(text, 0, text.length(), 0, length()); }
3849 
3850 inline int32_t
3852  int32_t start,
3853  int32_t _length) const
3854 { return doLastIndexOf(c, start, _length); }
3855 
3856 inline int32_t
3858  int32_t start,
3859  int32_t _length) const {
3860  return doLastIndexOf(c, start, _length);
3861 }
3862 
3863 inline int32_t
3865 { return doLastIndexOf(c, 0, length()); }
3866 
3867 inline int32_t
3869  return lastIndexOf(c, 0, length());
3870 }
3871 
3872 inline int32_t
3874  int32_t start) const {
3875  pinIndex(start);
3876  return doLastIndexOf(c, start, length() - start);
3877 }
3878 
3879 inline int32_t
3881  int32_t start) const {
3882  pinIndex(start);
3883  return lastIndexOf(c, start, length() - start);
3884 }
3885 
3886 inline UBool
3888 { return compare(0, text.length(), text, 0, text.length()) == 0; }
3889 
3890 inline UBool
3892  int32_t srcStart,
3893  int32_t srcLength) const
3894 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
3895 
3896 inline UBool
3898  int32_t srcLength) const
3899 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
3900 
3901 inline UBool
3903  int32_t srcStart,
3904  int32_t srcLength) const
3905 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
3906 
3907 inline UBool
3909 { return doCompare(length() - text.length(), text.length(),
3910  text, 0, text.length()) == 0; }
3911 
3912 inline UBool
3914  int32_t srcStart,
3915  int32_t srcLength) const {
3916  srcText.pinIndices(srcStart, srcLength);
3917  return doCompare(length() - srcLength, srcLength,
3918  srcText, srcStart, srcLength) == 0;
3919 }
3920 
3921 inline UBool
3923  int32_t srcLength) const {
3924  if(srcLength < 0) {
3925  srcLength = u_strlen(srcChars);
3926  }
3927  return doCompare(length() - srcLength, srcLength,
3928  srcChars, 0, srcLength) == 0;
3929 }
3930 
3931 inline UBool
3933  int32_t srcStart,
3934  int32_t srcLength) const {
3935  if(srcLength < 0) {
3936  srcLength = u_strlen(srcChars + srcStart);
3937  }
3938  return doCompare(length() - srcLength, srcLength,
3939  srcChars, srcStart, srcLength) == 0;
3940 }
3941 
3942 //========================================
3943 // replace
3944 //========================================
3945 inline UnicodeString&
3947  int32_t _length,
3948  const UnicodeString& srcText)
3949 { return doReplace(start, _length, srcText, 0, srcText.length()); }
3950 
3951 inline UnicodeString&
3953  int32_t _length,
3954  const UnicodeString& srcText,
3955  int32_t srcStart,
3956  int32_t srcLength)
3957 { return doReplace(start, _length, srcText, srcStart, srcLength); }
3958 
3959 inline UnicodeString&
3961  int32_t _length,
3962  const UChar *srcChars,
3963  int32_t srcLength)
3964 { return doReplace(start, _length, srcChars, 0, srcLength); }
3965 
3966 inline UnicodeString&
3968  int32_t _length,
3969  const UChar *srcChars,
3970  int32_t srcStart,
3971  int32_t srcLength)
3972 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
3973 
3974 inline UnicodeString&
3976  int32_t _length,
3977  UChar srcChar)
3978 { return doReplace(start, _length, &srcChar, 0, 1); }
3979 
3980 inline UnicodeString&
3982  int32_t _length,
3983  UChar32 srcChar) {
3984  UChar buffer[U16_MAX_LENGTH];
3985  int32_t count = 0;
3986  UBool isError = FALSE;
3987  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
3988  return doReplace(start, _length, buffer, 0, count);
3989 }
3990 
3991 inline UnicodeString&
3993  int32_t limit,
3994  const UnicodeString& srcText)
3995 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
3996 
3997 inline UnicodeString&
3999  int32_t limit,
4000  const UnicodeString& srcText,
4001  int32_t srcStart,
4002  int32_t srcLimit)
4003 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4004 
4005 inline UnicodeString&
4007  const UnicodeString& newText)
4008 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4009  newText, 0, newText.length()); }
4010 
4011 inline UnicodeString&
4013  int32_t _length,
4014  const UnicodeString& oldText,
4015  const UnicodeString& newText)
4016 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4017  newText, 0, newText.length()); }
4018 
4019 // ============================
4020 // extract
4021 // ============================
4022 inline void
4023 UnicodeString::doExtract(int32_t start,
4024  int32_t _length,
4025  UnicodeString& target) const
4026 { target.replace(0, target.length(), *this, start, _length); }
4027 
4028 inline void
4030  int32_t _length,
4031  UChar *target,
4032  int32_t targetStart) const
4033 { doExtract(start, _length, target, targetStart); }
4034 
4035 inline void
4037  int32_t _length,
4038  UnicodeString& target) const
4039 { doExtract(start, _length, target); }
4040 
4041 #if !UCONFIG_NO_CONVERSION
4042 
4043 inline int32_t
4045  int32_t _length,
4046  char *dst,
4047  const char *codepage) const
4048 
4049 {
4050  // This dstSize value will be checked explicitly
4051  return extract(start, _length, dst, dst!=0 ? (((size_t)dst >= ((size_t)-1) - UINT32_MAX) ? (((char*)UINT32_MAX) - dst) : UINT32_MAX) : 0, codepage);
4052 }
4053 
4054 #endif
4055 
4056 inline void
4058  int32_t limit,
4059  UChar *dst,
4060  int32_t dstStart) const {
4061  pinIndex(start);
4062  pinIndex(limit);
4063  doExtract(start, limit - start, dst, dstStart);
4064 }
4065 
4066 inline UChar
4067 UnicodeString::doCharAt(int32_t offset) const
4068 {
4069  if((uint32_t)offset < (uint32_t)length()) {
4070  return getArrayStart()[offset];
4071  } else {
4072  return kInvalidUChar;
4073  }
4074 }
4075 
4076 inline UChar
4078 { return doCharAt(offset); }
4079 
4080 inline UChar
4082 { return doCharAt(offset); }
4083 
4084 inline UChar32
4086 {
4087  int32_t len = length();
4088  if((uint32_t)offset < (uint32_t)len) {
4089  const UChar *array = getArrayStart();
4090  UChar32 c;
4091  U16_GET(array, 0, offset, len, c);
4092  return c;
4093  } else {
4094  return kInvalidUChar;
4095  }
4096 }
4097 
4098 inline int32_t
4100  if((uint32_t)offset < (uint32_t)length()) {
4101  const UChar *array = getArrayStart();
4102  U16_SET_CP_START(array, 0, offset);
4103  return offset;
4104  } else {
4105  return 0;
4106  }
4107 }
4108 
4109 inline int32_t
4111  int32_t len = length();
4112  if((uint32_t)offset < (uint32_t)len) {
4113  const UChar *array = getArrayStart();
4114  U16_SET_CP_LIMIT(array, 0, offset, len);
4115  return offset;
4116  } else {
4117  return len;
4118  }
4119 }
4120 
4121 inline UBool
4123  return fShortLength == 0;
4124 }
4125 
4126 //========================================
4127 // Write implementation methods
4128 //========================================
4129 inline void
4130 UnicodeString::setLength(int32_t len) {
4131  if(len <= 127) {
4132  fShortLength = (int8_t)len;
4133  } else {
4134  fShortLength = (int8_t)-1;
4135  fUnion.fFields.fLength = len;
4136  }
4137 }
4138 
4139 inline void
4140 UnicodeString::setToEmpty() {
4141  fShortLength = 0;
4142  fFlags = kShortString;
4143 }
4144 
4145 inline void
4146 UnicodeString::setToStackBuffer(int32_t len) {
4147  fShortLength = (int8_t)len;
4148  fFlags = kShortString;
4149 }
4150 
4151 inline void
4152 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4153  setLength(len);
4154  fUnion.fFields.fArray = array;
4155  fUnion.fFields.fCapacity = capacity;
4156 }
4157 
4158 inline const UChar *
4160  if(!isWritable()) {
4161  return 0;
4162  } else {
4163  UChar *array = getArrayStart();
4164  int32_t len = length();
4165 #ifndef U_VALGRIND
4166  if(len < getCapacity() && array[len] == 0) {
4167  return array;
4168  }
4169 #endif
4170  if(cloneArrayIfNeeded(len+1)) {
4171  array = getArrayStart();
4172  array[len] = 0;
4173  return array;
4174  } else {
4175  return 0;
4176  }
4177  }
4178 }
4179 
4180 inline UnicodeString&
4182 { return doReplace(0, length(), &ch, 0, 1); }
4183 
4184 inline UnicodeString&
4186 { return replace(0, length(), ch); }
4187 
4188 inline UnicodeString&
4190  int32_t srcStart,
4191  int32_t srcLength)
4192 {
4193  unBogus();
4194  return doReplace(0, length(), srcText, srcStart, srcLength);
4195 }
4196 
4197 inline UnicodeString&
4199  int32_t srcStart)
4200 {
4201  unBogus();
4202  srcText.pinIndex(srcStart);
4203  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4204 }
4205 
4206 inline UnicodeString&
4208 {
4209  unBogus();
4210  return doReplace(0, length(), srcText, 0, srcText.length());
4211 }
4212 
4213 inline UnicodeString&
4214 UnicodeString::setTo(const UChar *srcChars,
4215  int32_t srcLength)
4216 {
4217  unBogus();
4218  return doReplace(0, length(), srcChars, 0, srcLength);
4219 }
4220 
4221 inline UnicodeString&
4223 {
4224  unBogus();
4225  return doReplace(0, length(), &srcChar, 0, 1);
4226 }
4227 
4228 inline UnicodeString&
4230 {
4231  unBogus();
4232  return replace(0, length(), srcChar);
4233 }
4234 
4235 inline UnicodeString&
4237  int32_t srcStart,
4238  int32_t srcLength)
4239 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
4240 
4241 inline UnicodeString&
4243 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4244 
4245 inline UnicodeString&
4247  int32_t srcStart,
4248  int32_t srcLength)
4249 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4250 
4251 inline UnicodeString&
4253  int32_t srcLength)
4254 { return doReplace(length(), 0, srcChars, 0, srcLength); }
4255 
4256 inline UnicodeString&
4258 { return doReplace(length(), 0, &srcChar, 0, 1); }
4259 
4260 inline UnicodeString&
4262  UChar buffer[U16_MAX_LENGTH];
4263  int32_t _length = 0;
4264  UBool isError = FALSE;
4265  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
4266  return doReplace(length(), 0, buffer, 0, _length);
4267 }
4268 
4269 inline UnicodeString&
4271 { return doReplace(length(), 0, &ch, 0, 1); }
4272 
4273 inline UnicodeString&
4275  return append(ch);
4276 }
4277 
4278 inline UnicodeString&
4280 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4281 
4282 inline UnicodeString&
4284  const UnicodeString& srcText,
4285  int32_t srcStart,
4286  int32_t srcLength)
4287 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4288 
4289 inline UnicodeString&
4291  const UnicodeString& srcText)
4292 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4293 
4294 inline UnicodeString&
4296  const UChar *srcChars,
4297  int32_t srcStart,
4298  int32_t srcLength)
4299 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4300 
4301 inline UnicodeString&
4303  const UChar *srcChars,
4304  int32_t srcLength)
4305 { return doReplace(start, 0, srcChars, 0, srcLength); }
4306 
4307 inline UnicodeString&
4309  UChar srcChar)
4310 { return doReplace(start, 0, &srcChar, 0, 1); }
4311 
4312 inline UnicodeString&
4314  UChar32 srcChar)
4315 { return replace(start, 0, srcChar); }
4316 
4317 
4318 inline UnicodeString&
4320 {
4321  // remove() of a bogus string makes the string empty and non-bogus
4322  if(isBogus()) {
4323  unBogus();
4324  } else {
4325  setLength(0);
4326  }
4327  return *this;
4328 }
4329 
4330 inline UnicodeString&
4332  int32_t _length)
4333 {
4334  if(start <= 0 && _length == INT32_MAX) {
4335  // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4336  return remove();
4337  }
4338  return doReplace(start, _length, NULL, 0, 0);
4339 }
4340 
4341 inline UnicodeString&
4343  int32_t limit)
4344 { return doReplace(start, limit - start, NULL, 0, 0); }
4345 
4346 inline UBool
4348 {
4349  if(isBogus() && targetLength == 0) {
4350  // truncate(0) of a bogus string makes the string empty and non-bogus
4351  unBogus();
4352  return FALSE;
4353  } else if((uint32_t)targetLength < (uint32_t)length()) {
4354  setLength(targetLength);
4355  return TRUE;
4356  } else {
4357  return FALSE;
4358  }
4359 }
4360 
4361 inline UnicodeString&
4363 { return doReverse(0, length()); }
4364 
4365 inline UnicodeString&
4367  int32_t _length)
4368 { return doReverse(start, _length); }
4369 
4371 
4372 #endif
void extract(int32_t start, int32_t length, UChar *dst, int32_t dstStart=0) const
Copy the characters in the range [start, start + length) into the array dst, beginning at dstStart...
Definition: unistr.h:4029
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
Definition: unistr.h:4099
int8_t caseCompareBetween(int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit, uint32_t options) const
Compare two strings case-insensitively using full case folding.
Definition: unistr.h:3711
int8_t caseCompare(const UnicodeString &text, uint32_t options) const
Compare two strings case-insensitively using full case folding.
Definition: unistr.h:3663
The BreakIterator class implements methods for finding the location of boundaries in text...
Definition: brkiter.h:100
A ByteSink can be filled with bytes.
Definition: bytestream.h:48
#define U16_GET(s, start, i, length, c)
Get a code point from a string at a random-access offset, without changing the offset.
Definition: utf16.h:197
int32_t hashCode(void) const
Generate a hash code for this object.
Definition: unistr.h:3457
UBool operator>(const UnicodeString &text) const
Greater than operator.
Definition: unistr.h:3524
const UChar * getBuffer() const
Get a read-only pointer to the internal buffer.
Definition: unistr.h:3477
int32_t length() const
Returns the number of 16-bit code units in the text.
Definition: rep.h:241
#define U16_SET_CP_START(s, start, i)
Adjust a random-access offset to a code point boundary at the start of a code point.
Definition: utf16.h:429
UBool operator!=(const UnicodeString &text) const
Inequality operator.
Definition: unistr.h:3520
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:52
virtual UChar32 getChar32At(int32_t offset) const =0
Virtual version of char32At().
UnicodeString & reverse(void)
Reverse this UnicodeString in place.
Definition: unistr.h:4362
int32_t getCapacity(void) const
Return the capacity of the internal buffer of the UnicodeString object.
Definition: unistr.h:3453
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
Definition: unistr.h:3952
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:71
int8_t compareCodePointOrder(const UnicodeString &text) const
Compare two Unicode strings in code point order.
Definition: unistr.h:3601
unsigned char uint8_t
Define 64 bit limits.
Definition: pwin32.h:131
unsigned int uint32_t
Define 64 bit limits.
Definition: pwin32.h:147
virtual int32_t getLength() const =0
Virtual version of length().
UChar operator[](int32_t offset) const
Return the code unit at offset offset.
Definition: unistr.h:4081
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition: unistr.h:4347
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
Definition: unistr.h:199
UChar charAt(int32_t offset) const
Return the code unit at offset offset.
Definition: unistr.h:4077
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition: unistr.h:4236
int8_t compareCodePointOrderBetween(int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) const
Compare two Unicode strings in code point order.
Definition: unistr.h:3638
UnicodeString & findAndReplace(const UnicodeString &oldText, const UnicodeString &newText)
Replace all occurrences of characters in oldText with the characters in newText.
Definition: unistr.h:4006
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
virtual void copy(int32_t start, int32_t limit, int32_t dest)=0
Copies a substring of this object, retaining metadata.
C++ API: StringPiece: Read-only byte string wrapper class.
UnicodeString & remove(void)
Remove all characters from the UnicodeString object.
Definition: unistr.h:4319
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:183
C++ API: Interface for writing bytes, and implementation classes.
#define U16_MAX_LENGTH
The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
Definition: utf16.h:147
int8_t compare(const UnicodeString &text) const
Compare the characters bitwise in this UnicodeString to the characters in text.
Definition: unistr.h:3540
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
Definition: unistr.h:4110
#define U16_SET_CP_LIMIT(s, start, i, length)
Adjust a random-access offset to a code point boundary after a code point.
Definition: utf16.h:608
int32_t lastIndexOf(const UnicodeString &text) const
Locate in this the last occurrence of the characters in text, using bitwise comparison.
Definition: unistr.h:3847
int32_t indexOf(const UnicodeString &text) const
Locate in this the first occurrence of the characters in text, using bitwise comparison.
Definition: unistr.h:3737
virtual Replaceable * clone() const
Clone this object, an instance of a subclass of Replaceable.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:319
virtual UChar getCharAt(int32_t offset) const =0
Virtual version of charAt().
#define NULL
Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
Definition: utypes.h:266
UBool isBogus(void) const
Determine if this object contains a valid string.
Definition: unistr.h:3461
C++ API: Central ICU header for including the C++ standard &lt;string&gt; header and for related definition...
UBool startsWith(const UnicodeString &text) const
Determine if this starts with the characters in text
Definition: unistr.h:3887
UnicodeString & removeBetween(int32_t start, int32_t limit=(int32_t) INT32_MAX)
Remove the characters in the range [start, limit) from the UnicodeString object.
Definition: unistr.h:4342
UnicodeString & operator+=(UChar ch)
Append operator.
Definition: unistr.h:4270
virtual UBool hasMetaData() const
Returns true if this object contains metadata.
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3449
#define TRUE
The TRUE value of a UBool.
Definition: umachine.h:212
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:187
#define U_EXPORT2
Definition: platform.h:338
UBool endsWith(const UnicodeString &text) const
Determine if this ends with the characters in text
Definition: unistr.h:3908
U_COMMON_API UnicodeString operator+(const UnicodeString &s1, const UnicodeString &s2)
Create a new UnicodeString with the concatenation of two others.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
Definition: schriter.h:43
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
int8_t compareBetween(int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) const
Compare the characters bitwise in the range [start, limit) with the characters in srcText in the rang...
Definition: unistr.h:3577
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API If the compiler doesn&#39;t support namespaces...
Definition: uversion.h:184
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text)=0
Replaces a substring of this object with the given text.
UBool operator<=(const UnicodeString &text) const
Less than or equal operator.
Definition: unistr.h:3536
signed char int8_t
Define 64 bit limits.
Definition: pwin32.h:127
const UChar * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
Definition: unistr.h:4159
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
struct UConverter UConverter
Definition: ucnv_err.h:94
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
Definition: umachine.h:154
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:339
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
Definition: unistr.h:4085
UChar charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
Definition: rep.h:246
UnicodeString & insert(int32_t start, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Insert the characters in srcText in the range [srcStart, srcStart + srcLength) into the UnicodeString...
Definition: unistr.h:4283
UnicodeString & setTo(const UnicodeString &srcText, int32_t srcStart)
Set the text in the UnicodeString object to the characters in srcText in the range [srcStart...
Definition: unistr.h:4198
#define UINT32_MAX
The largest value a 32 bit unsigned integer can hold.
Definition: umachine.h:167
unsigned short uint16_t
Define 64 bit limits.
Definition: pwin32.h:139
UBool operator<(const UnicodeString &text) const
Less than operator.
Definition: unistr.h:3528
Basic definitions for ICU, for both C and C++ APIs.
UBool isEmpty(void) const
Determine if this string is empty.
Definition: unistr.h:4122
int32_t u_strlen(const UChar *s)
Determine the length of an array of UChar.
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:216
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:474
#define U16_APPEND(s, i, capacity, c, isError)
Append a code point to a string, overwriting 1 or 2 code units.
Definition: utf16.h:311
UChar32 char32At(int32_t offset) const
Returns the 32-bit code point at the given 16-bit offset into the text.
Definition: rep.h:251
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:181
UBool operator==(const UnicodeString &text) const
Equality operator.
Definition: unistr.h:3506
virtual UClassID getDynamicClassID() const =0
ICU4C &quot;poor man&#39;s RTTI&quot;, returns a UClassID for the actual ICU class.
UBool operator>=(const UnicodeString &text) const
Greater than or equal operator.
Definition: unistr.h:3532
void extractBetween(int32_t start, int32_t limit, UChar *dst, int32_t dstStart=0) const
Copy the characters in the range [start, limit) into the array dst, beginning at dstStart.
Definition: unistr.h:4057
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:117
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208
UnicodeString & replaceBetween(int32_t start, int32_t limit, const UnicodeString &srcText)
Replace the characters in the range [start, limit) with the characters in srcText.
Definition: unistr.h:3992
C++ API: Replaceable String.