ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
translit.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/17/99 aliu Creation.
8 **********************************************************************
9 */
10 #ifndef TRANSLIT_H
11 #define TRANSLIT_H
12 
13 #include "unicode/utypes.h"
14 
20 #if !UCONFIG_NO_TRANSLITERATION
21 
22 #include "unicode/uobject.h"
23 #include "unicode/unistr.h"
24 #include "unicode/parseerr.h"
25 #include "unicode/utrans.h" // UTransPosition, UTransDirection
26 #include "unicode/strenum.h"
27 
29 
30 class UnicodeFilter;
31 class UnicodeSet;
32 class CompoundTransliterator;
33 class TransliteratorParser;
34 class NormalizationTransliterator;
35 class TransliteratorIDParser;
36 
242 
243 private:
244 
248  UnicodeString ID;
249 
256  UnicodeFilter* filter;
257 
258  int32_t maximumContextLength;
259 
260  public:
261 
267  union Token {
277  void* pointer;
278  };
279 
285  inline static Token integerToken(int32_t);
286 
292  inline static Token pointerToken(void*);
293 
309  typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
310 
311 protected:
312 
322  Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
323 
329 
334  Transliterator& operator=(const Transliterator&);
335 
347  static Transliterator* createBasicInstance(const UnicodeString& id,
348  const UnicodeString* canon);
349 
350  friend class TransliteratorParser; // for parseID()
351  friend class TransliteratorIDParser; // for createBasicInstance()
352  friend class TransliteratorAlias; // for setID()
353 
354 public:
355 
360  virtual ~Transliterator();
361 
376  virtual Transliterator* clone() const;
377 
393  virtual int32_t transliterate(Replaceable& text,
394  int32_t start, int32_t limit) const;
395 
401  virtual void transliterate(Replaceable& text) const;
402 
467  virtual void transliterate(Replaceable& text, UTransPosition& index,
468  const UnicodeString& insertion,
469  UErrorCode& status) const;
470 
486  virtual void transliterate(Replaceable& text, UTransPosition& index,
487  UChar32 insertion,
488  UErrorCode& status) const;
489 
504  virtual void transliterate(Replaceable& text, UTransPosition& index,
505  UErrorCode& status) const;
506 
518  virtual void finishTransliteration(Replaceable& text,
519  UTransPosition& index) const;
520 
521 private:
522 
538  void _transliterate(Replaceable& text,
539  UTransPosition& index,
540  const UnicodeString* insertion,
541  UErrorCode &status) const;
542 
543 protected:
544 
624  virtual void handleTransliterate(Replaceable& text,
625  UTransPosition& pos,
626  UBool incremental) const = 0;
627 
628 public:
640  virtual void filteredTransliterate(Replaceable& text,
641  UTransPosition& index,
642  UBool incremental) const;
643 
644 private:
645 
673  virtual void filteredTransliterate(Replaceable& text,
674  UTransPosition& index,
675  UBool incremental,
676  UBool rollback) const;
677 
678 public:
679 
693  int32_t getMaximumContextLength(void) const;
694 
695 protected:
696 
703  void setMaximumContextLength(int32_t maxContextLength);
704 
705 public:
706 
717  virtual const UnicodeString& getID(void) const;
718 
728  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
729  UnicodeString& result);
730 
752  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
753  const Locale& inLocale,
754  UnicodeString& result);
755 
763  const UnicodeFilter* getFilter(void) const;
764 
774  UnicodeFilter* orphanFilter(void);
775 
786  void adoptFilter(UnicodeFilter* adoptedFilter);
787 
807  Transliterator* createInverse(UErrorCode& status) const;
808 
825  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
826  UTransDirection dir,
827  UParseError& parseError,
828  UErrorCode& status);
829 
840  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
841  UTransDirection dir,
842  UErrorCode& status);
843 
859  static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
860  const UnicodeString& rules,
861  UTransDirection dir,
862  UParseError& parseError,
863  UErrorCode& status);
864 
876  virtual UnicodeString& toRules(UnicodeString& result,
877  UBool escapeUnprintable) const;
878 
891  int32_t countElements() const;
892 
912  const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
913 
929  UnicodeSet& getSourceSet(UnicodeSet& result) const;
930 
945  virtual void handleGetSourceSet(UnicodeSet& result) const;
946 
960  virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
961 
962 public:
963 
975  static void U_EXPORT2 registerFactory(const UnicodeString& id,
976  Factory factory,
977  Token context);
978 
996  static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
997 
1012  static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
1013  const UnicodeString& realID);
1014 
1015 protected:
1016 
1026  static void _registerFactory(const UnicodeString& id,
1027  Factory factory,
1028  Token context);
1029 
1033  static void _registerInstance(Transliterator* adoptedObj);
1034 
1038  static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
1039 
1073  static void _registerSpecialInverse(const UnicodeString& target,
1074  const UnicodeString& inverseTarget,
1075  UBool bidirectional);
1076 
1077 public:
1078 
1092  static void U_EXPORT2 unregister(const UnicodeString& ID);
1093 
1094 public:
1095 
1105  static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
1106 
1112  static int32_t U_EXPORT2 countAvailableSources(void);
1113 
1123  static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
1124  UnicodeString& result);
1125 
1134  static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
1135 
1147  static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
1148  const UnicodeString& source,
1149  UnicodeString& result);
1150 
1158  static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
1159  const UnicodeString& target);
1160 
1174  static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
1175  const UnicodeString& source,
1176  const UnicodeString& target,
1177  UnicodeString& result);
1178 
1179 protected:
1180 
1185  static int32_t _countAvailableSources(void);
1186 
1191  static UnicodeString& _getAvailableSource(int32_t index,
1192  UnicodeString& result);
1193 
1198  static int32_t _countAvailableTargets(const UnicodeString& source);
1199 
1204  static UnicodeString& _getAvailableTarget(int32_t index,
1205  const UnicodeString& source,
1206  UnicodeString& result);
1207 
1212  static int32_t _countAvailableVariants(const UnicodeString& source,
1213  const UnicodeString& target);
1214 
1219  static UnicodeString& _getAvailableVariant(int32_t index,
1220  const UnicodeString& source,
1221  const UnicodeString& target,
1222  UnicodeString& result);
1223 
1224 protected:
1225 
1232  void setID(const UnicodeString& id);
1233 
1234 public:
1235 
1246  static UClassID U_EXPORT2 getStaticClassID(void);
1247 
1263  virtual UClassID getDynamicClassID(void) const = 0;
1264 
1265 private:
1266  static UBool initializeRegistry(UErrorCode &status);
1267 
1268 public:
1276  static int32_t U_EXPORT2 countAvailableIDs(void);
1277 
1290  static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
1291 };
1292 
1294  return maximumContextLength;
1295 }
1296 
1297 inline void Transliterator::setID(const UnicodeString& id) {
1298  ID = id;
1299  // NUL-terminate the ID string, which is a non-aliased copy.
1300  ID.append((UChar)0);
1301  ID.truncate(ID.length()-1);
1302 }
1303 
1305  Token t;
1306  t.integer = i;
1307  return t;
1308 }
1309 
1311  Token t;
1312  t.pointer = p;
1313  return t;
1314 }
1315 
1317 
1318 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
1319 
1320 #endif
Transliterator is an abstract class that transliterates text from one format to another.
Definition: translit.h:241
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:272
Position structure for utrans_transIncremental() incremental transliteration.
Definition: utrans.h:118
C++ API: Unicode String.
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:71
int32_t integer
This token, interpreted as a 32-bit integer.
Definition: translit.h:272
Transliterator *(* Factory)(const UnicodeString &ID, Token context)
A function that creates and returns a Transliterator.
Definition: translit.h:309
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition: unistr.h:4347
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition: unistr.h:4236
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:475
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:183
int32_t getMaximumContextLength(void) const
Returns the length of the longest context required by this transliterator.
Definition: translit.h:1293
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:59
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:319
C API: Transliterator.
void setID(const UnicodeString &id)
Set the ID of this transliterators.
Definition: translit.h:1297
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3449
A context integer or pointer for a factory function, passed by value.
Definition: translit.h:267
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:187
#define U_EXPORT2
Definition: platform.h:338
C++ API: Common ICU base class UObject.
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
static Token pointerToken(void *)
Return a token containing a pointer.
Definition: translit.h:1310
void * pointer
This token, interpreted as a native pointer.
Definition: translit.h:277
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API If the compiler doesn't support namespaces...
Definition: uversion.h:184
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:212
C API: Parse Error Information.
static Token integerToken(int32_t)
Return a token containing an integer.
Definition: translit.h:1304
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:339
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:55
C++ API: String Enumeration.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:181
virtual UClassID getDynamicClassID() const =0
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules o...
Definition: utrans.h:76
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208