ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
uniset.h
Go to the documentation of this file.
1 /*
2 ***************************************************************************
3 * Copyright (C) 1999-2009, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 ***************************************************************************
6 * Date Name Description
7 * 10/20/99 alan Creation.
8 ***************************************************************************
9 */
10 
11 #ifndef UNICODESET_H
12 #define UNICODESET_H
13 
14 #include "unicode/unifilt.h"
15 #include "unicode/unistr.h"
16 #include "unicode/uset.h"
17 
24 
25 class BMPSet;
26 class ParsePosition;
27 class SymbolTable;
28 class UnicodeSetStringSpan;
29 class UVector;
30 class RuleCharacterIterator;
31 
273 
274  int32_t len; // length of list used; 0 <= len <= capacity
275  int32_t capacity; // capacity of list
276  UChar32* list; // MUST be terminated with HIGH
277  BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
278  UChar32* buffer; // internal buffer, may be NULL
279  int32_t bufferCapacity; // capacity of buffer
280  int32_t patLen;
281 
291  UChar *pat;
292  UVector* strings; // maintained in sorted order
293  UnicodeSetStringSpan *stringSpan;
294 
295 private:
296  enum { // constants
297  kIsBogus = 1 // This set is bogus (i.e. not valid)
298  };
299  uint8_t fFlags; // Bit flag (see constants above)
300 public:
310  inline UBool isBogus(void) const;
311 
328  void setToBogus();
329 
330 public:
331 
332  enum {
337  MIN_VALUE = 0,
338 
343  MAX_VALUE = 0x10ffff
344  };
345 
346  //----------------------------------------------------------------
347  // Constructors &c
348  //----------------------------------------------------------------
349 
350 public:
351 
356  UnicodeSet();
357 
366  UnicodeSet(UChar32 start, UChar32 end);
367 
376  UnicodeSet(const UnicodeString& pattern,
377  UErrorCode& status);
378 
391  UnicodeSet(const UnicodeString& pattern,
392  uint32_t options,
393  const SymbolTable* symbols,
394  UErrorCode& status);
395 
409  UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
410  uint32_t options,
411  const SymbolTable* symbols,
412  UErrorCode& status);
413 
418  UnicodeSet(const UnicodeSet& o);
419 
424  virtual ~UnicodeSet();
425 
431  UnicodeSet& operator=(const UnicodeSet& o);
432 
444  virtual UBool operator==(const UnicodeSet& o) const;
445 
451  UBool operator!=(const UnicodeSet& o) const;
452 
462  virtual UnicodeFunctor* clone() const;
463 
471  virtual int32_t hashCode(void) const;
472 
481  inline static UnicodeSet *fromUSet(USet *uset);
482 
491  inline static const UnicodeSet *fromUSet(const USet *uset);
492 
500  inline USet *toUSet();
501 
502 
510  inline const USet * toUSet() const;
511 
512 
513  //----------------------------------------------------------------
514  // Freezable API
515  //----------------------------------------------------------------
516 
525  inline UBool isFrozen() const;
526 
540  UnicodeFunctor *freeze();
541 
550  UnicodeFunctor *cloneAsThawed() const;
551 
552  //----------------------------------------------------------------
553  // Public API
554  //----------------------------------------------------------------
555 
566  UnicodeSet& set(UChar32 start, UChar32 end);
567 
573  static UBool resemblesPattern(const UnicodeString& pattern,
574  int32_t pos);
575 
588  UnicodeSet& applyPattern(const UnicodeString& pattern,
589  UErrorCode& status);
590 
607  UnicodeSet& applyPattern(const UnicodeString& pattern,
608  uint32_t options,
609  const SymbolTable* symbols,
610  UErrorCode& status);
611 
643  UnicodeSet& applyPattern(const UnicodeString& pattern,
644  ParsePosition& pos,
645  uint32_t options,
646  const SymbolTable* symbols,
647  UErrorCode& status);
648 
662  virtual UnicodeString& toPattern(UnicodeString& result,
663  UBool escapeUnprintable = FALSE) const;
664 
687  UnicodeSet& applyIntPropertyValue(UProperty prop,
688  int32_t value,
689  UErrorCode& ec);
690 
720  UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
721  const UnicodeString& value,
722  UErrorCode& ec);
723 
732  virtual int32_t size(void) const;
733 
740  virtual UBool isEmpty(void) const;
741 
749  virtual UBool contains(UChar32 c) const;
750 
759  virtual UBool contains(UChar32 start, UChar32 end) const;
760 
768  UBool contains(const UnicodeString& s) const;
769 
777  virtual UBool containsAll(const UnicodeSet& c) const;
778 
786  UBool containsAll(const UnicodeString& s) const;
787 
796  UBool containsNone(UChar32 start, UChar32 end) const;
797 
805  UBool containsNone(const UnicodeSet& c) const;
806 
814  UBool containsNone(const UnicodeString& s) const;
815 
824  inline UBool containsSome(UChar32 start, UChar32 end) const;
825 
833  inline UBool containsSome(const UnicodeSet& s) const;
834 
842  inline UBool containsSome(const UnicodeString& s) const;
843 
862  int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
863 
881  int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
882 
901  int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
902 
920  int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
921 
926  virtual UMatchDegree matches(const Replaceable& text,
927  int32_t& offset,
928  int32_t limit,
929  UBool incremental);
930 
931 private:
953  static int32_t matchRest(const Replaceable& text,
954  int32_t start, int32_t limit,
955  const UnicodeString& s);
956 
966  int32_t findCodePoint(UChar32 c) const;
967 
968 public:
969 
977  virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
978 
987  int32_t indexOf(UChar32 c) const;
988 
998  UChar32 charAt(int32_t index) const;
999 
1014  virtual UnicodeSet& add(UChar32 start, UChar32 end);
1015 
1023  UnicodeSet& add(UChar32 c);
1024 
1036  UnicodeSet& add(const UnicodeString& s);
1037 
1038  private:
1044  static int32_t getSingleCP(const UnicodeString& s);
1045 
1046  void _add(const UnicodeString& s);
1047 
1048  public:
1057  UnicodeSet& addAll(const UnicodeString& s);
1058 
1067  UnicodeSet& retainAll(const UnicodeString& s);
1068 
1077  UnicodeSet& complementAll(const UnicodeString& s);
1078 
1087  UnicodeSet& removeAll(const UnicodeString& s);
1088 
1097  static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
1098 
1099 
1107  static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
1108 
1122  virtual UnicodeSet& retain(UChar32 start, UChar32 end);
1123 
1124 
1130  UnicodeSet& retain(UChar32 c);
1131 
1145  virtual UnicodeSet& remove(UChar32 start, UChar32 end);
1146 
1154  UnicodeSet& remove(UChar32 c);
1155 
1165  UnicodeSet& remove(const UnicodeString& s);
1166 
1174  virtual UnicodeSet& complement(void);
1175 
1190  virtual UnicodeSet& complement(UChar32 start, UChar32 end);
1191 
1199  UnicodeSet& complement(UChar32 c);
1200 
1211  UnicodeSet& complement(const UnicodeString& s);
1212 
1225  virtual UnicodeSet& addAll(const UnicodeSet& c);
1226 
1238  virtual UnicodeSet& retainAll(const UnicodeSet& c);
1239 
1251  virtual UnicodeSet& removeAll(const UnicodeSet& c);
1252 
1263  virtual UnicodeSet& complementAll(const UnicodeSet& c);
1264 
1271  virtual UnicodeSet& clear(void);
1272 
1298  UnicodeSet& closeOver(int32_t attribute);
1299 
1306  virtual UnicodeSet &removeAllStrings();
1307 
1315  virtual int32_t getRangeCount(void) const;
1316 
1324  virtual UChar32 getRangeStart(int32_t index) const;
1325 
1333  virtual UChar32 getRangeEnd(int32_t index) const;
1334 
1383  int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
1384 
1391  virtual UnicodeSet& compact();
1392 
1404  static UClassID U_EXPORT2 getStaticClassID(void);
1405 
1414  virtual UClassID getDynamicClassID(void) const;
1415 
1416 private:
1417 
1418  // Private API for the USet API
1419 
1420  friend class USetAccess;
1421 
1422  int32_t getStringCount() const;
1423 
1424  const UnicodeString* getString(int32_t index) const;
1425 
1426  //----------------------------------------------------------------
1427  // RuleBasedTransliterator support
1428  //----------------------------------------------------------------
1429 
1430 private:
1431 
1437  virtual UBool matchesIndexValue(uint8_t v) const;
1438 
1439 private:
1440 
1441  //----------------------------------------------------------------
1442  // Implementation: Clone as thawed (see ICU4J Freezable)
1443  //----------------------------------------------------------------
1444 
1445  UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
1446 
1447  //----------------------------------------------------------------
1448  // Implementation: Pattern parsing
1449  //----------------------------------------------------------------
1450 
1451  void applyPattern(RuleCharacterIterator& chars,
1452  const SymbolTable* symbols,
1453  UnicodeString& rebuiltPat,
1454  uint32_t options,
1455  UErrorCode& ec);
1456 
1457  //----------------------------------------------------------------
1458  // Implementation: Utility methods
1459  //----------------------------------------------------------------
1460 
1461  void ensureCapacity(int32_t newLen, UErrorCode& ec);
1462 
1463  void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
1464 
1465  void swapBuffers(void);
1466 
1467  UBool allocateStrings(UErrorCode &status);
1468 
1469  UnicodeString& _toPattern(UnicodeString& result,
1470  UBool escapeUnprintable) const;
1471 
1472  UnicodeString& _generatePattern(UnicodeString& result,
1473  UBool escapeUnprintable) const;
1474 
1475  static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
1476 
1477  static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
1478 
1479  //----------------------------------------------------------------
1480  // Implementation: Fundamental operators
1481  //----------------------------------------------------------------
1482 
1483  void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
1484 
1485  void add(const UChar32* other, int32_t otherLen, int8_t polarity);
1486 
1487  void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
1488 
1494  static UBool resemblesPropertyPattern(const UnicodeString& pattern,
1495  int32_t pos);
1496 
1497  static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
1498  int32_t iterOpts);
1499 
1538  UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
1539  ParsePosition& ppos,
1540  UErrorCode &ec);
1541 
1542  void applyPropertyPattern(RuleCharacterIterator& chars,
1543  UnicodeString& rebuiltPat,
1544  UErrorCode& ec);
1545 
1546  static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
1547 
1552  typedef UBool (*Filter)(UChar32 codePoint, void* context);
1553 
1563  void applyFilter(Filter filter,
1564  void* context,
1565  int32_t src,
1566  UErrorCode &status);
1567 
1571  void setPattern(const UnicodeString& newPat);
1575  void releasePattern();
1576 
1577  friend class UnicodeSetIterator;
1578 };
1579 
1580 
1581 
1582 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
1583  return !operator==(o);
1584 }
1585 
1586 inline UBool UnicodeSet::isFrozen() const {
1587  return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
1588 }
1589 
1590 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
1591  return !containsNone(start, end);
1592 }
1593 
1595  return !containsNone(s);
1596 }
1597 
1599  return !containsNone(s);
1600 }
1601 
1602 inline UBool UnicodeSet::isBogus() const {
1603  return (UBool)(fFlags & kIsBogus);
1604 }
1605 
1607  return reinterpret_cast<UnicodeSet *>(uset);
1608 }
1609 
1610 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
1611  return reinterpret_cast<const UnicodeSet *>(uset);
1612 }
1613 
1615  return reinterpret_cast<USet *>(this);
1616 }
1617 
1618 inline const USet *UnicodeSet::toUSet() const {
1619  return reinterpret_cast<const USet *>(this);
1620 }
1621 
1623 
1624 #endif
static UClassID getStaticClassID()
ICU &quot;poor man&#39;s RTTI&quot;, returns a UClassID for this class.
virtual UBool matchesIndexValue(uint8_t v) const =0
Returns TRUE if this matcher will match a character c, where c &amp; 0xFF == v, at offset, in the forward direction (with limit &gt; offset).
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:272
UMatchDegree
Constants returned by UnicodeMatcher::matches() indicating the degree of match.
Definition: unimatch.h:30
UBool containsSome(UChar32 start, UChar32 end) const
Returns true if this set contains one or more of the characters in the given range.
Definition: uniset.h:1590
C++ API: Unicode String.
An interface that defines both lookup protocol and parsing of symbolic names.
Definition: symtable.h:54
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:71
virtual UBool operator==(const UnicodeSet &o) const
Compares the specified object with this set for equality.
virtual UMatchDegree matches(const Replaceable &text, int32_t &offset, int32_t limit, UBool incremental)
Implement UnicodeMatcher API.
unsigned char uint8_t
Define 64 bit limits.
Definition: pwin32.h:131
unsigned int uint32_t
Define 64 bit limits.
Definition: pwin32.h:147
C API: Unicode Set.
virtual UnicodeString & toPattern(UnicodeString &result, UBool escapeUnprintable=FALSE) const =0
Returns a string representation of this matcher.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:183
virtual void addMatchSetTo(UnicodeSet &toUnionTo) const =0
Union the set of all characters that may be matched by this object into the given set...
static UnicodeSet * fromUSet(USet *uset)
Get a UnicodeSet pointer from a USet.
Definition: uniset.h:1606
ParsePosition is a simple class used by Format and its subclasses to keep track of the current positi...
Definition: parsepos.h:47
UBool isBogus(void) const
Determine if this object contains a valid set.
Definition: uniset.h:1602
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:59
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:319
#define NULL
Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
Definition: utypes.h:266
USet * toUSet()
Produce a USet * pointer for this UnicodeSet.
Definition: uniset.h:1614
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:187
#define U_EXPORT2
Definition: platform.h:338
UBool isFrozen() const
Determines whether the set has been frozen (made immutable) or not.
Definition: uniset.h:1586
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:155
UnicodeFunctor is an abstract base class for objects that perform match and/or replace operations on ...
Definition: unifunct.h:33
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API If the compiler doesn&#39;t support namespaces...
Definition: uversion.h:184
struct USet USet
Definition: ucnv.h:66
signed char int8_t
Define 64 bit limits.
Definition: pwin32.h:127
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:174
UnicodeSetIterator iterates over the contents of a UnicodeSet.
Definition: usetiter.h:61
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:339
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
UBool containsNone(UChar32 start, UChar32 end) const
Returns true if this set contains none of the characters of the given range.
virtual UClassID getDynamicClassID() const =0
ICU &quot;poor man&#39;s RTTI&quot;, returns a UClassID for the actual class.
unsigned short uint16_t
Define 64 bit limits.
Definition: pwin32.h:139
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:216
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:474
UBool operator!=(const UnicodeSet &o) const
Compares the specified object with this set for equality.
Definition: uniset.h:1582
C++ API: Unicode Filter.
virtual UnicodeFunctor * clone() const =0
Return a copy of this object.
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208
virtual UBool contains(UChar32 c) const =0
Returns true for characters that are in the selected subset.