ICU 74.2  74.2
unorm2.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2009-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: unorm2.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2009dec15
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UNORM2_H__
20 #define __UNORM2_H__
21 
33 #include "unicode/utypes.h"
34 #include "unicode/stringoptions.h"
35 #include "unicode/uset.h"
36 
37 #if U_SHOW_CPLUSPLUS_API
38 #include "unicode/localpointer.h"
39 #endif // U_SHOW_CPLUSPLUS_API
40 
48 typedef enum {
91 
117 
122 struct UNormalizer2;
123 typedef struct UNormalizer2 UNormalizer2;
125 #if !UCONFIG_NO_NORMALIZATION
126 
138 U_CAPI const UNormalizer2 * U_EXPORT2
139 unorm2_getNFCInstance(UErrorCode *pErrorCode);
140 
152 U_CAPI const UNormalizer2 * U_EXPORT2
153 unorm2_getNFDInstance(UErrorCode *pErrorCode);
154 
166 U_CAPI const UNormalizer2 * U_EXPORT2
168 
180 U_CAPI const UNormalizer2 * U_EXPORT2
182 
197 U_CAPI const UNormalizer2 * U_EXPORT2
199 
200 #ifndef U_HIDE_DRAFT_API
201 
215 U_CAPI const UNormalizer2 * U_EXPORT2
217 #endif // U_HIDE_DRAFT_API
218 
240 U_CAPI const UNormalizer2 * U_EXPORT2
241 unorm2_getInstance(const char *packageName,
242  const char *name,
243  UNormalization2Mode mode,
244  UErrorCode *pErrorCode);
245 
261 U_CAPI UNormalizer2 * U_EXPORT2
262 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
263 
270 U_CAPI void U_EXPORT2
271 unorm2_close(UNormalizer2 *norm2);
272 
273 #if U_SHOW_CPLUSPLUS_API
274 
275 U_NAMESPACE_BEGIN
276 
287 
288 U_NAMESPACE_END
289 
290 #endif
291 
308 U_CAPI int32_t U_EXPORT2
309 unorm2_normalize(const UNormalizer2 *norm2,
310  const UChar *src, int32_t length,
311  UChar *dest, int32_t capacity,
312  UErrorCode *pErrorCode);
331 U_CAPI int32_t U_EXPORT2
333  UChar *first, int32_t firstLength, int32_t firstCapacity,
334  const UChar *second, int32_t secondLength,
335  UErrorCode *pErrorCode);
354 U_CAPI int32_t U_EXPORT2
355 unorm2_append(const UNormalizer2 *norm2,
356  UChar *first, int32_t firstLength, int32_t firstCapacity,
357  const UChar *second, int32_t secondLength,
358  UErrorCode *pErrorCode);
359 
379 U_CAPI int32_t U_EXPORT2
381  UChar32 c, UChar *decomposition, int32_t capacity,
382  UErrorCode *pErrorCode);
383 
413 U_CAPI int32_t U_EXPORT2
415  UChar32 c, UChar *decomposition, int32_t capacity,
416  UErrorCode *pErrorCode);
417 
433 U_CAPI UChar32 U_EXPORT2
434 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
435 
445 U_CAPI uint8_t U_EXPORT2
447 
464 U_CAPI UBool U_EXPORT2
465 unorm2_isNormalized(const UNormalizer2 *norm2,
466  const UChar *s, int32_t length,
467  UErrorCode *pErrorCode);
468 
487 unorm2_quickCheck(const UNormalizer2 *norm2,
488  const UChar *s, int32_t length,
489  UErrorCode *pErrorCode);
490 
515 U_CAPI int32_t U_EXPORT2
517  const UChar *s, int32_t length,
518  UErrorCode *pErrorCode);
519 
529 U_CAPI UBool U_EXPORT2
531 
541 U_CAPI UBool U_EXPORT2
543 
552 U_CAPI UBool U_EXPORT2
553 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
554 
621 U_CAPI int32_t U_EXPORT2
622 unorm_compare(const UChar *s1, int32_t length1,
623  const UChar *s2, int32_t length2,
624  uint32_t options,
625  UErrorCode *pErrorCode);
626 
627 #endif /* !UCONFIG_NO_NORMALIZATION */
628 #endif /* __UNORM2_H__ */
U_CAPI int32_t unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the raw decomposition mapping of c.
U_CAPI int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Returns the end of the normalized substring of the input string.
"Fast C or D" form.
Definition: unorm2.h:79
The input string is not in the normalization form.
Definition: unorm2.h:102
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
Map, and reorder canonically.
Definition: unorm2.h:66
C API: Bit set option bit constants for various string and character processing functions.
U_CAPI int32_t unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the decomposition mapping of c.
U_CAPI UNormalizationCheckResult unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
U_CAPI int32_t unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
Writes the normalized form of the source string to the destination string (replacing its contents) an...
U_CAPI void unorm2_close(UNormalizer2 *norm2)
Closes a UNormalizer2 instance from unorm2_openFiltered().
The input string is in the normalization form.
Definition: unorm2.h:107
U_CAPI int32_t unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
The input string may or may not be in the normalization form.
Definition: unorm2.h:115
U_CAPI UBool unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary after it, regardless of context...
Compose only contiguously.
Definition: unorm2.h:89
U_CAPI const UNormalizer2 * unorm2_getInstance(const char *packageName, const char *name, UNormalization2Mode mode, UErrorCode *pErrorCode)
Returns a UNormalizer2 instance which uses the specified data file (packageName/name similar to ucnv_...
C API: Unicode Set.
U_CAPI UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary before it, regardless of context...
U_CAPI UBool unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
U_CAPI const UNormalizer2 * unorm2_getNFKDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKD normalization.
U_CAPI const UNormalizer2 * unorm2_getNFDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFD normalization.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_CAPI const UNormalizer2 * unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization which is equ...
U_CAPI int32_t unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the second string to the first string (merging them at the boundary) and returns the length o...
U_CAPI const UNormalizer2 * unorm2_getNFCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFC normalization.
struct UNormalizer2 UNormalizer2
C typedef for struct UNormalizer2.
Definition: unorm2.h:123
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:435
U_CAPI UChar32 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
Performs pairwise composition of a & b and returns the composite if there is one. ...
U_CAPI uint8_t unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
Gets the combining class of c.
U_CAPI UNormalizer2 * unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode)
Constructs a filtered normalizer wrapping any UNormalizer2 instance and a filter set.
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:386
U_CAPI const UNormalizer2 * unorm2_getNFKCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKC normalization.
UNormalization2Mode
Constants for normalization modes.
Definition: unorm2.h:48
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:50
U_CAPI const UNormalizer2 * unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization which is equivalent to ap...
Basic definitions for ICU, for both C and C++ APIs.
U_CAPI int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
U_CAPI UBool unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
Tests if the character is normalization-inert.
Decomposition followed by composition.
Definition: unorm2.h:57
"Smart pointer" class, closes a UNormalizer2 via unorm2_close().
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:97
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247