ICU 4.2.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ucsdet.h
Go to the documentation of this file.
1 /*
2  **********************************************************************
3  * Copyright (C) 2005-2007, International Business Machines
4  * Corporation and others. All Rights Reserved.
5  **********************************************************************
6  * file name: ucsdet.h
7  * encoding: US-ASCII
8  * indentation:4
9  *
10  * created on: 2005Aug04
11  * created by: Andy Heninger
12  *
13  * ICU Character Set Detection, API for C
14  *
15  * Draft version 18 Oct 2005
16  *
17  */
18 
19 #ifndef __UCSDET_H
20 #define __UCSDET_H
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_CONVERSION
25 #include "unicode/uenum.h"
26 
47 struct UCharsetDetector;
53 
54 struct UCharsetMatch;
61 
71 ucsdet_open(UErrorCode *status);
72 
84 
100 U_STABLE void U_EXPORT2
101 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
102 
103 
122 U_STABLE void U_EXPORT2
123 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
124 
125 
153 
154 
186 ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
187 
188 
189 
205 U_STABLE const char * U_EXPORT2
206 ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
207 
232 ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
233 
263 U_STABLE const char * U_EXPORT2
264 ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
265 
266 
290 ucsdet_getUChars(const UCharsetMatch *ucsm,
291  UChar *buf, int32_t cap, UErrorCode *status);
292 
293 
294 
316 
317 
330 
331 
345 
346 #endif
347 #endif /* __UCSDET_H */
348 
349 
UBool ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
Enable filtering of input text.
struct UEnumeration UEnumeration
structure representing an enumeration object instance
Definition: uenum.h:38
const char * ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
Get the name of the charset represented by a UCharsetMatch.
void ucsdet_close(UCharsetDetector *ucsd)
Close a charset detector.
int32_t ucsdet_getUChars(const UCharsetMatch *ucsm, UChar *buf, int32_t cap, UErrorCode *status)
Get the entire input text as a UChar string, placing it into a caller-supplied buffer.
const UCharsetMatch * ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
Return the charset that best matches the supplied input data.
void ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
Set the input byte data whose charset is to detected.
void ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
Set the declared encoding for charset detection.
struct UCharsetDetector UCharsetDetector
Structure representing a charset detector.
Definition: ucsdet.h:52
#define U_EXPORT2
Definition: platform.h:338
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:299
const UCharsetMatch ** ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status)
Find all charset matches that appear to be consistent with the input, returning an array of results...
UEnumeration * ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of all detectable charsets - over the charsets that are known to the cha...
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:593
UCharsetDetector * ucsdet_open(UErrorCode *status)
Open a charset detector.
C API: String Enumeration.
int32_t ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
Get a confidence number for the quality of the match of the byte data with the charset.
Basic definitions for ICU, for both C and C++ APIs.
UBool ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
Test whether input filtering is enabled for this charset detector.
const char * ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
Get the RFC 3066 code for the language of the input data.
signed int int32_t
Define 64 bit limits.
Definition: pwin32.h:143
struct UCharsetMatch UCharsetMatch
Opaque structure representing a match that was identified from a charset detection operation...
Definition: ucsdet.h:60
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:117
int8_t UBool
The ICU boolean type.
Definition: umachine.h:208