ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tblcoll.h
Go to the documentation of this file.
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7 
59 #ifndef TBLCOLL_H
60 #define TBLCOLL_H
61 
62 #include "unicode/utypes.h"
63 
64 
65 #if !UCONFIG_NO_COLLATION
66 
67 #include "unicode/coll.h"
68 #include "unicode/ucol.h"
69 #include "unicode/sortkey.h"
70 #include "unicode/normlzr.h"
71 
73 
77 class StringSearch;
82 
112 {
113 public:
114 
115  // constructor -------------------------------------------------------------
116 
126  RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
127 
138  RuleBasedCollator(const UnicodeString& rules,
139  ECollationStrength collationStrength,
140  UErrorCode& status);
141 
152  RuleBasedCollator(const UnicodeString& rules,
153  UColAttributeValue decompositionMode,
154  UErrorCode& status);
155 
167  RuleBasedCollator(const UnicodeString& rules,
168  ECollationStrength collationStrength,
169  UColAttributeValue decompositionMode,
170  UErrorCode& status);
171 
178  RuleBasedCollator(const RuleBasedCollator& other);
179 
180 
198  RuleBasedCollator(const uint8_t *bin, int32_t length,
199  const RuleBasedCollator *base,
200  UErrorCode &status);
201  // destructor --------------------------------------------------------------
202 
207  virtual ~RuleBasedCollator();
208 
209  // public methods ----------------------------------------------------------
210 
216  RuleBasedCollator& operator=(const RuleBasedCollator& other);
217 
224  virtual UBool operator==(const Collator& other) const;
225 
232  virtual UBool operator!=(const Collator& other) const;
233 
240  virtual Collator* clone(void) const;
241 
252  virtual CollationElementIterator* createCollationElementIterator(
253  const UnicodeString& source) const;
254 
264  virtual CollationElementIterator* createCollationElementIterator(
265  const CharacterIterator& source) const;
266 
279  virtual EComparisonResult compare(const UnicodeString& source,
280  const UnicodeString& target) const;
281 
282 
295  virtual UCollationResult compare(const UnicodeString& source,
296  const UnicodeString& target,
297  UErrorCode &status) const;
298 
312  virtual EComparisonResult compare(const UnicodeString& source,
313  const UnicodeString& target,
314  int32_t length) const;
315 
329  virtual UCollationResult compare(const UnicodeString& source,
330  const UnicodeString& target,
331  int32_t length,
332  UErrorCode &status) const;
333 
367  virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
368  const UChar* target, int32_t targetLength)
369  const;
370 
387  virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
388  const UChar* target, int32_t targetLength,
389  UErrorCode &status) const;
390 
402  virtual UCollationResult compare(UCharIterator &sIter,
403  UCharIterator &tIter,
404  UErrorCode &status) const;
405 
418  virtual CollationKey& getCollationKey(const UnicodeString& source,
419  CollationKey& key,
420  UErrorCode& status) const;
421 
435  virtual CollationKey& getCollationKey(const UChar *source,
436  int32_t sourceLength,
437  CollationKey& key,
438  UErrorCode& status) const;
439 
445  virtual int32_t hashCode(void) const;
446 
457  virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
458 
465  const UnicodeString& getRules(void) const;
466 
472  virtual void getVersion(UVersionInfo info) const;
473 
484  int32_t getMaxExpansion(int32_t order) const;
485 
496  virtual UClassID getDynamicClassID(void) const;
497 
509  static UClassID U_EXPORT2 getStaticClassID(void);
510 
519  uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
520 
521 
532  int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
533 
541  void getRules(UColRuleOption delta, UnicodeString &buffer);
542 
550  virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
551  UErrorCode &status);
552 
561  UErrorCode &status);
562 
573  virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
574 
584  virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
585 
593  virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
594 
601  virtual uint32_t getVariableTop(UErrorCode &status) const;
602 
612  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
613 
619  virtual Collator* safeClone(void);
620 
631  virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
632  int32_t resultLength) const;
633 
646  virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
647  uint8_t *result, int32_t resultLength) const;
648 
659  virtual ECollationStrength getStrength(void) const;
660 
667  virtual void setStrength(ECollationStrength newStrength);
668 
669 private:
670 
671  // private static constants -----------------------------------------------
672 
673  enum {
674  /* need look up in .commit() */
675  CHARINDEX = 0x70000000,
676  /* Expand index follows */
677  EXPANDCHARINDEX = 0x7E000000,
678  /* contract indexes follows */
679  CONTRACTCHARINDEX = 0x7F000000,
680  /* unmapped character values */
681  UNMAPPED = 0xFFFFFFFF,
682  /* primary strength increment */
683  PRIMARYORDERINCREMENT = 0x00010000,
684  /* secondary strength increment */
685  SECONDARYORDERINCREMENT = 0x00000100,
686  /* tertiary strength increment */
687  TERTIARYORDERINCREMENT = 0x00000001,
688  /* mask off anything but primary order */
689  PRIMARYORDERMASK = 0xffff0000,
690  /* mask off anything but secondary order */
691  SECONDARYORDERMASK = 0x0000ff00,
692  /* mask off anything but tertiary order */
693  TERTIARYORDERMASK = 0x000000ff,
694  /* mask off ignorable char order */
695  IGNORABLEMASK = 0x0000ffff,
696  /* use only the primary difference */
697  PRIMARYDIFFERENCEONLY = 0xffff0000,
698  /* use only the primary and secondary difference */
699  SECONDARYDIFFERENCEONLY = 0xffffff00,
700  /* primary order shift */
701  PRIMARYORDERSHIFT = 16,
702  /* secondary order shift */
703  SECONDARYORDERSHIFT = 8,
704  /* starting value for collation elements */
705  COLELEMENTSTART = 0x02020202,
706  /* testing mask for primary low element */
707  PRIMARYLOWZEROMASK = 0x00FF0000,
708  /* reseting value for secondaries and tertiaries */
709  RESETSECONDARYTERTIARY = 0x00000202,
710  /* reseting value for tertiaries */
711  RESETTERTIARY = 0x00000002,
712 
713  PRIMIGNORABLE = 0x0202
714  };
715 
716  // private data members ---------------------------------------------------
717 
718  UBool dataIsOwned;
719 
720  UBool isWriteThroughAlias;
721 
726  UCollator *ucollator;
727 
731  UnicodeString urulestring;
732 
733  // friend classes --------------------------------------------------------
734 
739 
744  friend class Collator;
745 
749  friend class StringSearch;
750 
751  // private constructors --------------------------------------------------
752 
757 
768  RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
769 
778  void
779  construct(const UnicodeString& rules,
780  UColAttributeValue collationStrength,
781  UColAttributeValue decompositionMode,
782  UErrorCode& status);
783 
784  // private methods -------------------------------------------------------
785 
791  void setUCollator(const Locale& locale, UErrorCode& status);
792 
798  void setUCollator(const char* locale, UErrorCode& status);
799 
807  void setUCollator(UCollator *collator);
808 
809 public:
815  const UCollator * getUCollator();
816 
817 protected:
825  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
826 
827 private:
828 
829  // if not owned and not a write through alias, copy the ucollator
830  void checkOwned(void);
831 
832  // utility to init rule string used by checkOwned and construct
833  void setRuleStringFromCollator();
834 
841  Collator::EComparisonResult getEComparisonResult(
842  const UCollationResult &result) const;
843 
849  Collator::ECollationStrength getECollationStrength(
850  const UCollationStrength &strength) const;
851 
857  UCollationStrength getUCollationStrength(
858  const Collator::ECollationStrength &strength) const;
859 };
860 
861 // inline method implementation ---------------------------------------------
862 
863 inline void RuleBasedCollator::setUCollator(const Locale &locale,
864  UErrorCode &status)
865 {
866  setUCollator(locale.getName(), status);
867 }
868 
869 
870 inline void RuleBasedCollator::setUCollator(UCollator *collator)
871 {
872 
873  if (ucollator && dataIsOwned) {
874  ucol_close(ucollator);
875  }
876  ucollator = collator;
877  dataIsOwned = FALSE;
878  isWriteThroughAlias = TRUE;
879  setRuleStringFromCollator();
880 }
881 
883 {
884  return ucollator;
885 }
886 
887 inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
888  const UCollationResult &result) const
889 {
890  switch (result)
891  {
892  case UCOL_LESS :
893  return Collator::LESS;
894  case UCOL_EQUAL :
895  return Collator::EQUAL;
896  default :
897  return Collator::GREATER;
898  }
899 }
900 
901 inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
902  const UCollationStrength &strength) const
903 {
904  switch (strength)
905  {
906  case UCOL_PRIMARY :
907  return Collator::PRIMARY;
908  case UCOL_SECONDARY :
909  return Collator::SECONDARY;
910  case UCOL_TERTIARY :
911  return Collator::TERTIARY;
912  case UCOL_QUATERNARY :
913  return Collator::QUATERNARY;
914  default :
915  return Collator::IDENTICAL;
916  }
917 }
918 
919 inline UCollationStrength RuleBasedCollator::getUCollationStrength(
920  const Collator::ECollationStrength &strength) const
921 {
922  switch (strength)
923  {
924  case Collator::PRIMARY :
925  return UCOL_PRIMARY;
926  case Collator::SECONDARY :
927  return UCOL_SECONDARY;
928  case Collator::TERTIARY :
929  return UCOL_TERTIARY;
930  case Collator::QUATERNARY :
931  return UCOL_QUATERNARY;
932  default :
933  return UCOL_IDENTICAL;
934  }
935 }
936 
938 
939 #endif /* #if !UCONFIG_NO_COLLATION */
940 
941 #endif
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:124
string a < string b
Definition: ucol.h:81
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get an UnicodeSet that contains all the characters and sequences tailored in this collator...
virtual Collator * safeClone(void)=0
Thread safe cloning operation.
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables...
Definition: tblcoll.h:111
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from an UnicodeString.
The CollationElementIterator class is used as an iterator to walk through each character of an intern...
Definition: coleitr.h:120
virtual UBool operator!=(const Collator &other) const
Returns true if "other" is not the same as "this".
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the u_strcoll() m...
Definition: ucol.h:75
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:272
C++ API: Unicode Normalization.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registraton to define the requested and valid locales.
virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const =0
Gets the locale of the Collator.
virtual uint32_t getVariableTop(UErrorCode &status) const =0
Gets the variable top value of a Collator.
C API for code unit iteration.
Definition: uiter.h:339
EComparisonResult
LESS is returned if source string is compared to be less than target string in the compare() method...
Definition: coll.h:226
const char * getName() const
Returns the programmatic name of the entire locale, with the language, country and variant separated ...
Definition: locid.h:752
string a == string b
Definition: ucol.h:77
virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status)=0
Sets the variable top to a collation element value of a string supplied.
unsigned char uint8_t
Define 64 bit limits.
Definition: pwin32.h:131
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
unsigned int uint32_t
Define 64 bit limits.
Definition: pwin32.h:147
virtual UClassID getDynamicClassID(void) const =0
Returns a unique class ID POLYMORPHICALLY.
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:166
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:475
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:183
Collation keys are generated by the Collator class.
Definition: sortkey.h:96
C++ API: Collation Service.
Secondary collation strength.
Definition: ucol.h:98
virtual void getVersion(UVersionInfo info) const =0
Gets the version information for a Collator.
C API: Collator.
C++ API: Keys for comparing strings multiple times.
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:177
void ucol_close(UCollator *coll)
Close a UCollator.
virtual ECollationStrength getStrength(void) const =0
Determines the minimum strength that will be use in comparison or transformation. ...
Tertiary collation strength.
Definition: ucol.h:100
#define TRUE
The TRUE value of a UBool.
Definition: umachine.h:212
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:187
virtual UBool operator==(const Collator &other) const
Returns true if "other" is the same as "this".
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status)=0
Universal attribute getter.
#define U_EXPORT2
Definition: platform.h:338
virtual int32_t hashCode(void) const =0
Generates the hash code for the collation object.
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API If the compiler doesn't support namespaces...
Definition: uversion.h:184
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)=0
Universal attribute setter.
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:339
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:314
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:60
Basic definitions for ICU, for both C and C++ APIs.
Identical collation strength.
Definition: ucol.h:107
virtual Collator * clone(void) const =0
Makes a shallow copy of the current object.
Quaternary collation strength.
Definition: ucol.h:105
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:216
UColAttributeValue
Enum containing attribute values for controling collation behavior.
Definition: ucol.h:91
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:181
UColRuleOption
Options for retrieving the rule string.
Definition: ucol.h:231
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:207
Primary collation strength.
Definition: ucol.h:96
StringSearch is a SearchIterator that provides language-sensitive text searching based on the compari...
Definition: stsearch.h:138
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
const UCollator * getUCollator()
Get UCollator data struct.
Definition: tblcoll.h:882
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208
virtual void setStrength(ECollationStrength newStrength)=0
Sets the minimum strength to be used in comparison or transformation.