ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
uset.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2002-2009, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uset.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2002mar07
14 * created by: Markus W. Scherer
15 *
16 * C version of UnicodeSet.
17 */
18 
19 
27 #ifndef __USET_H__
28 #define __USET_H__
29 
30 #include "unicode/utypes.h"
31 #include "unicode/uchar.h"
32 
33 #ifndef UCNV_H
34 struct USet;
40 typedef struct USet USet;
41 #endif
42 
48 enum {
54 
82 
92 
98 };
99 
155 typedef enum USetSpanCondition {
208 
214 typedef struct USerializedSet {
219  const uint16_t *array;
236 
237 /*********************************************************************
238  * USet API
239  *********************************************************************/
240 
250 
262 uset_open(UChar32 start, UChar32 end);
263 
274 uset_openPattern(const UChar* pattern, int32_t patternLength,
275  UErrorCode* ec);
276 
289 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
290  uint32_t options,
291  UErrorCode* ec);
292 
299 U_STABLE void U_EXPORT2
300 uset_close(USet* set);
301 
312 uset_clone(const USet *set);
313 
324 uset_isFrozen(const USet *set);
325 
340 U_DRAFT void U_EXPORT2
341 uset_freeze(USet *set);
342 
354 uset_cloneAsThawed(const USet *set);
355 
365 U_STABLE void U_EXPORT2
366 uset_set(USet* set,
367  UChar32 start, UChar32 end);
368 
392  const UChar *pattern, int32_t patternLength,
393  uint32_t options,
394  UErrorCode *status);
395 
418 U_STABLE void U_EXPORT2
420  UProperty prop, int32_t value, UErrorCode* ec);
421 
457 U_STABLE void U_EXPORT2
459  const UChar *prop, int32_t propLength,
460  const UChar *value, int32_t valueLength,
461  UErrorCode* ec);
462 
473 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
474  int32_t pos);
475 
492 uset_toPattern(const USet* set,
493  UChar* result, int32_t resultCapacity,
494  UBool escapeUnprintable,
495  UErrorCode* ec);
496 
505 U_STABLE void U_EXPORT2
506 uset_add(USet* set, UChar32 c);
507 
520 U_STABLE void U_EXPORT2
521 uset_addAll(USet* set, const USet *additionalSet);
522 
532 U_STABLE void U_EXPORT2
533 uset_addRange(USet* set, UChar32 start, UChar32 end);
534 
544 U_STABLE void U_EXPORT2
545 uset_addString(USet* set, const UChar* str, int32_t strLen);
546 
556 U_STABLE void U_EXPORT2
557 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
558 
567 U_STABLE void U_EXPORT2
568 uset_remove(USet* set, UChar32 c);
569 
579 U_STABLE void U_EXPORT2
580 uset_removeRange(USet* set, UChar32 start, UChar32 end);
581 
591 U_STABLE void U_EXPORT2
592 uset_removeString(USet* set, const UChar* str, int32_t strLen);
593 
605 U_STABLE void U_EXPORT2
606 uset_removeAll(USet* set, const USet* removeSet);
607 
622 U_STABLE void U_EXPORT2
623 uset_retain(USet* set, UChar32 start, UChar32 end);
624 
637 U_STABLE void U_EXPORT2
638 uset_retainAll(USet* set, const USet* retain);
639 
648 U_STABLE void U_EXPORT2
649 uset_compact(USet* set);
650 
659 U_STABLE void U_EXPORT2
660 uset_complement(USet* set);
661 
673 U_STABLE void U_EXPORT2
674 uset_complementAll(USet* set, const USet* complement);
675 
683 U_STABLE void U_EXPORT2
684 uset_clear(USet* set);
685 
712 U_DRAFT void U_EXPORT2
713 uset_closeOver(USet* set, int32_t attributes);
714 
721 U_DRAFT void U_EXPORT2
723 
732 uset_isEmpty(const USet* set);
733 
743 uset_contains(const USet* set, UChar32 c);
744 
755 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
756 
766 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
767 
779 uset_indexOf(const USet* set, UChar32 c);
780 
792 uset_charAt(const USet* set, int32_t charIndex);
793 
803 uset_size(const USet* set);
804 
814 uset_getItemCount(const USet* set);
815 
835 uset_getItem(const USet* set, int32_t itemIndex,
836  UChar32* start, UChar32* end,
837  UChar* str, int32_t strCapacity,
838  UErrorCode* ec);
839 
849 uset_containsAll(const USet* set1, const USet* set2);
850 
862 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
863 
873 uset_containsNone(const USet* set1, const USet* set2);
874 
884 uset_containsSome(const USet* set1, const USet* set2);
885 
906 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
907 
927 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
928 
949 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
950 
970 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
971 
981 uset_equals(const USet* set1, const USet* set2);
982 
983 /*********************************************************************
984  * Serialized set API
985  *********************************************************************/
986 
1037 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1038 
1048 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1049 
1057 U_STABLE void U_EXPORT2
1059 
1070 
1082 
1097 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1098  UChar32* pStart, UChar32* pEnd);
1099 
1100 #endif
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end...
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:234
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
void uset_clear(USet *set)
Removes all of the elements from this set.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
USet * uset_clone(const USet *set)
Returns a copy of this object.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
Continue a span() while there is a set element at the current position.
Definition: uset.h:182
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
One more than the last span condition.
Definition: uset.h:206
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:219
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
Enable case insensitive matching.
Definition: uset.h:91
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
unsigned int uint32_t
Define 64 bit limits.
Definition: pwin32.h:147
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
void uset_freeze(USet *set)
Freeze the set (make it immutable).
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:319
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
Enable case insensitive matching.
Definition: uset.h:81
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:53
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
#define U_EXPORT2
Definition: platform.h:338
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:155
C API: Unicode Properties.
Enough for any single-code point set.
Definition: uset.h:97
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
void uset_complement(USet *set)
Inverts this set.
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
struct USet USet
Definition: ucnv.h:66
int32_t length
The total length of the array.
Definition: uset.h:229
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:174
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
unsigned short uint16_t
Define 64 bit limits.
Definition: pwin32.h:139
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:224
A serialized form of a Unicode set.
Definition: uset.h:214
Continue a span() while there is a set element at the current position.
Definition: uset.h:201
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
#define U_DRAFT
This is used to declare a function as a draft public ICU C API.
Definition: umachine.h:119
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
Continue a span() while there is no set element at the current position.
Definition: uset.h:167
USet * uset_openEmpty()
Create an empty USet object.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:117
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208
void uset_close(USet *set)
Disposes of the storage used by a USet object.