ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ubrk.h
Go to the documentation of this file.
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2009, International Business Machines Corporation and others.
4 * All Rights Reserved.
5 ******************************************************************************
6 */
7 
8 #ifndef UBRK_H
9 #define UBRK_H
10 
11 #include "unicode/utypes.h"
12 #include "unicode/uloc.h"
13 #include "unicode/utext.h"
14 
19 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
20 # define UBRK_TYPEDEF_UBREAK_ITERATOR
21 
25  typedef void UBreakIterator;
26 #endif
27 
28 #if !UCONFIG_NO_BREAK_ITERATION
29 
30 #include "unicode/parseerr.h"
31 
86 typedef enum UBreakIteratorType {
90  UBRK_WORD = 1,
92  UBRK_LINE = 2,
95 
96 #ifndef U_HIDE_DEPRECATED_API
97 
106 #endif /* U_HIDE_DEPRECATED_API */
107  UBRK_COUNT = 5
109 
113 #define UBRK_DONE ((int32_t) -1)
114 
115 
124 typedef enum UWordBreak {
147 } UWordBreak;
148 
157 typedef enum ULineBreakTag {
167 } ULineBreakTag;
168 
169 
170 
179 typedef enum USentenceBreakTag {
196 
197 
214  const char *locale,
215  const UChar *text,
216  int32_t textLength,
217  UErrorCode *status);
218 
235 ubrk_openRules(const UChar *rules,
236  int32_t rulesLength,
237  const UChar *text,
238  int32_t textLength,
239  UParseError *parseErr,
240  UErrorCode *status);
241 
260  const UBreakIterator *bi,
261  void *stackBuffer,
262  int32_t *pBufferSize,
263  UErrorCode *status);
264 
269 #define U_BRK_SAFECLONE_BUFFERSIZE 512
270 
277 U_STABLE void U_EXPORT2
279 
288 U_STABLE void U_EXPORT2
290  const UChar* text,
291  int32_t textLength,
292  UErrorCode* status);
293 
294 
306 U_STABLE void U_EXPORT2
308  UText* text,
309  UErrorCode* status);
310 
311 
312 
322 ubrk_current(const UBreakIterator *bi);
323 
335 
347 
358 
371 
383  int32_t offset);
384 
396  int32_t offset);
397 
407 U_STABLE const char* U_EXPORT2
409 
419 ubrk_countAvailable(void);
420 
421 
433 
445 
464 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
465 
475 U_STABLE const char* U_EXPORT2
477 
478 
479 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
480 
481 #endif
Tag value for for sentences that do not contain an ending sentence terminator ('. ...
Definition: ubrk.h:191
UBreakIterator * ubrk_safeClone(const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe cloning operation.
Tag value for words containing kana characters, upper limit.
Definition: ubrk.h:142
Tag value for words containing ideographic characters, lower limit.
Definition: ubrk.h:144
void ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
Tag value for words containing letters, upper limit.
Definition: ubrk.h:138
int32_t ubrk_next(UBreakIterator *bi)
Determine the text boundary following the current text boundary.
Upper bound for hard line breaks.
Definition: ubrk.h:166
Tag value for words containing kana characters, lower limit.
Definition: ubrk.h:140
Character breaks.
Definition: ubrk.h:88
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
Definition: ubrk.h:157
UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
Definition: ubrk.h:124
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
Definition: ubrk.h:179
Tag value for a hard, or mandatory line break.
Definition: ubrk.h:164
Upper bound for tags for sentences ended by sentence terminators.
Definition: ubrk.h:186
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:86
void UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:25
Tag value for for sentences ending with a sentence terminator ('.
Definition: ubrk.h:184
UBreakIterator * ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
Word breaks.
Definition: ubrk.h:90
int32_t ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
Get the statuses from the break rules that determined the most recently returned break position...
int32_t ubrk_first(UBreakIterator *bi)
Determine the index of the first character in the text being scanned.
int32_t ubrk_preceding(UBreakIterator *bi, int32_t offset)
Determine the text boundary preceding the specified offset.
C API: Abstract Unicode Text API.
void ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
const char * ubrk_getAvailable(int32_t index)
Get a locale for which text breaking information is available.
Upper bound for tags for uncategorized words.
Definition: ubrk.h:129
Tag value for words that appear to be numbers, upper limit.
Definition: ubrk.h:133
int32_t ubrk_previous(UBreakIterator *bi)
Determine the text boundary preceding the current text boundary.
Tag value for soft line breaks, positions at which a line break is acceptable but not required...
Definition: ubrk.h:160
Upper bound for tags for sentences ended by a separator.
Definition: ubrk.h:193
#define U_EXPORT2
Definition: platform.h:338
UBreakIterator * ubrk_openRules(const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UParseError *parseErr, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using specified breaking rules.
int32_t ubrk_countAvailable(void)
Determine how many locales have text breaking information available.
int32_t ubrk_last(UBreakIterator *bi)
Determine the index immediately beyond the last character in the text being scanned.
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
int32_t ubrk_getRuleStatus(UBreakIterator *bi)
Return the status from the break rule that determined the most recently returned break position...
const char * ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode *status)
Return the locale of the break iterator.
C API: Parse Error Information.
int32_t ubrk_current(const UBreakIterator *bi)
Determine the most recently-returned text boundary.
Tag value for "words" that do not fit into any of other categories.
Definition: ubrk.h:127
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:314
Line breaks.
Definition: ubrk.h:92
Sentence breaks.
Definition: ubrk.h:94
UText struct.
Definition: utext.h:1307
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
void ubrk_close(UBreakIterator *bi)
Close a UBreakIterator.
Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters...
Definition: ubrk.h:136
Tag value for words that appear to be numbers, lower limit.
Definition: ubrk.h:131
Upper bound for soft line breaks.
Definition: ubrk.h:162
Title Case breaks The iterator created using this type locates title boundaries as described for Unic...
Definition: ubrk.h:105
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
UBool ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
Returns true if the specfied position is a boundary position.
int32_t ubrk_following(UBreakIterator *bi, int32_t offset)
Determine the text boundary following the specified offset.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:117
Tag value for words containing ideographic characters, upper limit.
Definition: ubrk.h:146
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208
C API: Locale.