ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
normlzr.h
Go to the documentation of this file.
1 /*
2  ********************************************************************
3  * COPYRIGHT:
4  * Copyright (c) 1996-2006, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************
7  */
8 
9 #ifndef NORMLZR_H
10 #define NORMLZR_H
11 
12 #include "unicode/utypes.h"
13 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/uobject.h"
22 #include "unicode/unistr.h"
23 #include "unicode/chariter.h"
24 #include "unicode/unorm.h"
25 
26 
27 struct UCharIterator;
28 typedef struct UCharIterator UCharIterator;
124 public:
130  enum {
131  DONE=0xffff
132  };
133 
134  // Constructors
135 
146  Normalizer(const UnicodeString& str, UNormalizationMode mode);
147 
159  Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
160 
172 
178  Normalizer(const Normalizer& copy);
179 
184  virtual ~Normalizer();
185 
186 
187  //-------------------------------------------------------------------------
188  // Static utility methods
189  //-------------------------------------------------------------------------
190 
205  static void U_EXPORT2 normalize(const UnicodeString& source,
206  UNormalizationMode mode, int32_t options,
207  UnicodeString& result,
208  UErrorCode &status);
209 
227  static void U_EXPORT2 compose(const UnicodeString& source,
228  UBool compat, int32_t options,
229  UnicodeString& result,
230  UErrorCode &status);
231 
249  static void U_EXPORT2 decompose(const UnicodeString& source,
250  UBool compat, int32_t options,
251  UnicodeString& result,
252  UErrorCode &status);
253 
274  static inline UNormalizationCheckResult
275  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
276 
290  static inline UNormalizationCheckResult
291  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
292 
313  static inline UBool
314  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
315 
331  static inline UBool
332  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
333 
363  static UnicodeString &
364  U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
365  UnicodeString &result,
366  UNormalizationMode mode, int32_t options,
367  UErrorCode &errorCode);
368 
433  static inline int32_t
434  compare(const UnicodeString &s1, const UnicodeString &s2,
435  uint32_t options,
436  UErrorCode &errorCode);
437 
438  //-------------------------------------------------------------------------
439  // Iteration API
440  //-------------------------------------------------------------------------
441 
450  UChar32 current(void);
451 
460  UChar32 first(void);
461 
470  UChar32 last(void);
471 
486  UChar32 next(void);
487 
502  UChar32 previous(void);
503 
513  void setIndexOnly(int32_t index);
514 
520  void reset(void);
521 
536  int32_t getIndex(void) const;
537 
546  int32_t startIndex(void) const;
547 
558  int32_t endIndex(void) const;
559 
568  UBool operator==(const Normalizer& that) const;
569 
578  inline UBool operator!=(const Normalizer& that) const;
579 
586  Normalizer* clone(void) const;
587 
594  int32_t hashCode(void) const;
595 
596  //-------------------------------------------------------------------------
597  // Property access methods
598  //-------------------------------------------------------------------------
599 
615  void setMode(UNormalizationMode newMode);
616 
627  UNormalizationMode getUMode(void) const;
628 
645  void setOption(int32_t option,
646  UBool value);
647 
658  UBool getOption(int32_t option) const;
659 
668  void setText(const UnicodeString& newText,
669  UErrorCode &status);
670 
679  void setText(const CharacterIterator& newText,
680  UErrorCode &status);
681 
691  void setText(const UChar* newText,
692  int32_t length,
693  UErrorCode &status);
700  void getText(UnicodeString& result);
701 
707  static UClassID U_EXPORT2 getStaticClassID();
708 
714  virtual UClassID getDynamicClassID() const;
715 
716 private:
717  //-------------------------------------------------------------------------
718  // Private functions
719  //-------------------------------------------------------------------------
720 
721  Normalizer(); // default constructor not implemented
722  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
723 
724  // Private utility methods for iteration
725  // For documentation, see the source code
726  UBool nextNormalize();
727  UBool previousNormalize();
728 
729  void init(CharacterIterator *iter);
730  void clearBuffer(void);
731 
732  //-------------------------------------------------------------------------
733  // Private data
734  //-------------------------------------------------------------------------
735 
736  UNormalizationMode fUMode;
737  int32_t fOptions;
738 
739  // The input text and our position in it
740  UCharIterator *text;
741 
742  // The normalization buffer is the result of normalization
743  // of the source in [currentIndex..nextIndex[ .
744  int32_t currentIndex, nextIndex;
745 
746  // A buffer for holding intermediate results
747  UnicodeString buffer;
748  int32_t bufferPos;
749 
750 };
751 
752 //-------------------------------------------------------------------------
753 // Inline implementations
754 //-------------------------------------------------------------------------
755 
756 inline UBool
758 { return ! operator==(other); }
759 
762  UNormalizationMode mode,
763  UErrorCode &status) {
764  if(U_FAILURE(status)) {
765  return UNORM_MAYBE;
766  }
767 
768  return unorm_quickCheck(source.getBuffer(), source.length(),
769  mode, &status);
770 }
771 
774  UNormalizationMode mode, int32_t options,
775  UErrorCode &status) {
776  if(U_FAILURE(status)) {
777  return UNORM_MAYBE;
778  }
779 
780  return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
781  mode, options, &status);
782 }
783 
784 inline UBool
786  UNormalizationMode mode,
787  UErrorCode &status) {
788  if(U_FAILURE(status)) {
789  return FALSE;
790  }
791 
792  return unorm_isNormalized(source.getBuffer(), source.length(),
793  mode, &status);
794 }
795 
796 inline UBool
798  UNormalizationMode mode, int32_t options,
799  UErrorCode &status) {
800  if(U_FAILURE(status)) {
801  return FALSE;
802  }
803 
804  return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
805  mode, options, &status);
806 }
807 
808 inline int32_t
810  uint32_t options,
811  UErrorCode &errorCode) {
812  // all argument checking is done in unorm_compare
813  return unorm_compare(s1.getBuffer(), s1.length(),
814  s2.getBuffer(), s2.length(),
815  options,
816  &errorCode);
817 }
818 
820 
821 #endif /* #if !UCONFIG_NO_NORMALIZATION */
822 
823 #endif // NORMLZR_H
UBool operator==(const Normalizer &that) const
Returns TRUE when both iterators refer to the same character in the same input text.
UNormalizationCheckResult unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, UNormalizationMode mode, int32_t options, UErrorCode *pErrorCode)
Performing quick check on a string; same as unorm_quickCheck but takes an extra options parameter lik...
UBool operator!=(const Normalizer &that) const
Returns FALSE when both iterators refer to the same character in the same input text.
Definition: normlzr.h:757
The Normalizer class supports the standard normalization forms described in Unicode Standard Annex #1...
Definition: normlzr.h:123
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status)
Performing quick check on a string, to quickly determine if the string is in a particular normalizati...
Definition: normlzr.h:761
#define U_FAILURE(x)
Does the error code indicate a failure?
Definition: utypes.h:816
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:127
C++ API: Unicode String.
C API for code unit iteration.
Definition: uiter.h:339
unsigned int uint32_t
Define 64 bit limits.
Definition: pwin32.h:147
UNormalizationCheckResult unorm_quickCheck(const UChar *source, int32_t sourcelength, UNormalizationMode mode, UErrorCode *status)
Performing quick check on a string, to quickly determine if the string is in a particular normalizati...
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:183
static int32_t compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode)
Compare two strings for canonical equivalence.
Definition: normlzr.h:809
UBool unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, UNormalizationMode mode, int32_t options, UErrorCode *pErrorCode)
Test if a string is in a given normalization form; same as unorm_isNormalized but takes an extra opti...
C API: Unicode Normalization.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:319
UBool unorm_isNormalized(const UChar *src, int32_t srcLength, UNormalizationMode mode, UErrorCode *pErrorCode)
Test if a string is in a given normalization form.
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3449
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:187
#define U_EXPORT2
Definition: platform.h:338
int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compare two strings for canonical equivalence.
C++ API: Common ICU base class UObject.
Indicates that string cannot be determined if it is in the normalized format without further thorough...
Definition: unorm.h:224
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API If the compiler doesn't support namespaces...
Definition: uversion.h:184
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:212
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode)
Test if a string is in a given normalization form.
Definition: normlzr.h:785
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:339
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
UChar * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
Basic definitions for ICU, for both C and C++ APIs.
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:216
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:474
C++ API: Character Iterator.
UNormalizationCheckResult
Result values for unorm_quickCheck().
Definition: unorm.h:211
virtual UClassID getDynamicClassID() const =0
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208