ICU 57.1  57.1
normlzr.h
Go to the documentation of this file.
1 /*
2  ********************************************************************
3  * COPYRIGHT:
4  * Copyright (c) 1996-2015, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************
7  */
8 
9 #ifndef NORMLZR_H
10 #define NORMLZR_H
11 
12 #include "unicode/utypes.h"
13 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/chariter.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "unicode/uobject.h"
26 
133 public:
134 #ifndef U_HIDE_DEPRECATED_API
135 
140  enum {
141  DONE=0xffff
142  };
143 
144  // Constructors
145 
156  Normalizer(const UnicodeString& str, UNormalizationMode mode);
157 
169  Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
170 
182 
188  Normalizer(const Normalizer& copy);
189 #endif /* U_HIDE_DEPRECATED_API */
190 
195  virtual ~Normalizer();
196 
197 
198  //-------------------------------------------------------------------------
199  // Static utility methods
200  //-------------------------------------------------------------------------
201 
202 #ifndef U_HIDE_DEPRECATED_API
203 
217  static void U_EXPORT2 normalize(const UnicodeString& source,
218  UNormalizationMode mode, int32_t options,
219  UnicodeString& result,
220  UErrorCode &status);
221 
239  static void U_EXPORT2 compose(const UnicodeString& source,
240  UBool compat, int32_t options,
241  UnicodeString& result,
242  UErrorCode &status);
243 
261  static void U_EXPORT2 decompose(const UnicodeString& source,
262  UBool compat, int32_t options,
263  UnicodeString& result,
264  UErrorCode &status);
265 
286  static inline UNormalizationCheckResult
287  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
288 
303  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
304 
325  static inline UBool
326  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
327 
343  static UBool
344  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
345 
375  static UnicodeString &
376  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
377  UnicodeString &result,
378  UNormalizationMode mode, int32_t options,
379  UErrorCode &errorCode);
380 #endif /* U_HIDE_DEPRECATED_API */
381 
446  static inline int32_t
447  compare(const UnicodeString &s1, const UnicodeString &s2,
448  uint32_t options,
449  UErrorCode &errorCode);
450 
451 #ifndef U_HIDE_DEPRECATED_API
452  //-------------------------------------------------------------------------
453  // Iteration API
454  //-------------------------------------------------------------------------
455 
464  UChar32 current(void);
465 
474  UChar32 first(void);
475 
484  UChar32 last(void);
485 
500  UChar32 next(void);
501 
516  UChar32 previous(void);
517 
527  void setIndexOnly(int32_t index);
528 
534  void reset(void);
535 
550  int32_t getIndex(void) const;
551 
560  int32_t startIndex(void) const;
561 
572  int32_t endIndex(void) const;
573 
582  UBool operator==(const Normalizer& that) const;
583 
592  inline UBool operator!=(const Normalizer& that) const;
593 
600  Normalizer* clone(void) const;
601 
608  int32_t hashCode(void) const;
609 
610  //-------------------------------------------------------------------------
611  // Property access methods
612  //-------------------------------------------------------------------------
613 
629  void setMode(UNormalizationMode newMode);
630 
641  UNormalizationMode getUMode(void) const;
642 
659  void setOption(int32_t option,
660  UBool value);
661 
672  UBool getOption(int32_t option) const;
673 
682  void setText(const UnicodeString& newText,
683  UErrorCode &status);
684 
693  void setText(const CharacterIterator& newText,
694  UErrorCode &status);
695 
705  void setText(const UChar* newText,
706  int32_t length,
707  UErrorCode &status);
714  void getText(UnicodeString& result);
715 
721  static UClassID U_EXPORT2 getStaticClassID();
722 #endif /* U_HIDE_DEPRECATED_API */
723 
729  virtual UClassID getDynamicClassID() const;
730 
731 private:
732  //-------------------------------------------------------------------------
733  // Private functions
734  //-------------------------------------------------------------------------
735 
736  Normalizer(); // default constructor not implemented
737  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
738 
739  // Private utility methods for iteration
740  // For documentation, see the source code
741  UBool nextNormalize();
742  UBool previousNormalize();
743 
744  void init();
745  void clearBuffer(void);
746 
747  //-------------------------------------------------------------------------
748  // Private data
749  //-------------------------------------------------------------------------
750 
751  FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
752  const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
753 #ifndef U_HIDE_DEPRECATED_API
754  UNormalizationMode fUMode;
755 #endif /* U_HIDE_DEPRECATED_API */
756  int32_t fOptions;
757 
758  // The input text and our position in it
759  CharacterIterator *text;
760 
761  // The normalization buffer is the result of normalization
762  // of the source in [currentIndex..nextIndex[ .
763  int32_t currentIndex, nextIndex;
764 
765  // A buffer for holding intermediate results
766  UnicodeString buffer;
767  int32_t bufferPos;
768 };
769 
770 //-------------------------------------------------------------------------
771 // Inline implementations
772 //-------------------------------------------------------------------------
773 
774 #ifndef U_HIDE_DEPRECATED_API
775 inline UBool
777 { return ! operator==(other); }
778 
780 Normalizer::quickCheck(const UnicodeString& source,
781  UNormalizationMode mode,
782  UErrorCode &status) {
783  return quickCheck(source, mode, 0, status);
784 }
785 
786 inline UBool
787 Normalizer::isNormalized(const UnicodeString& source,
788  UNormalizationMode mode,
789  UErrorCode &status) {
790  return isNormalized(source, mode, 0, status);
791 }
792 #endif /* U_HIDE_DEPRECATED_API */
793 
794 inline int32_t
795 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
796  uint32_t options,
797  UErrorCode &errorCode) {
798  // all argument checking is done in unorm_compare
799  return unorm_compare(s1.getBuffer(), s1.length(),
800  s2.getBuffer(), s2.length(),
801  options,
802  &errorCode);
803 }
804 
806 
807 #endif /* #if !UCONFIG_NO_NORMALIZATION */
808 
809 #endif // NORMLZR_H
int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
virtual UClassID getDynamicClassID() const
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:136
C++ API: Unicode String.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
C++ API: New API for Unicode Normalization.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
UChar * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:78
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
C API: Unicode Normalization.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
Old Unicode normalization API.
Definition: normlzr.h:132
C++ API: Common ICU base class UObject.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:357
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:293
C++ API: Character Iterator.
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
Normalization filtered by a UnicodeSet.
Definition: normalizer2.h:443
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:91
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3794
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234