ICU 74.2  74.2
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 
35 #if U_SHOW_CPLUSPLUS_API
36 #include "unicode/localpointer.h"
37 #endif // U_SHOW_CPLUSPLUS_API
38 
39 #ifndef USET_DEFINED
40 
41 #ifndef U_IN_DOXYGEN
42 #define USET_DEFINED
43 #endif
44 
50 typedef struct USet USet;
51 #endif
52 
64 enum {
70 
98 
111 
112 #ifndef U_HIDE_DRAFT_API
113 
126 #endif // U_HIDE_DRAFT_API
127 };
128 
184 typedef enum USetSpanCondition {
233 #ifndef U_HIDE_DEPRECATED_API
234 
239 #endif // U_HIDE_DEPRECATED_API
241 
242 enum {
250 };
251 
257 typedef struct USerializedSet {
262  const uint16_t *array;
267  int32_t bmpLength;
272  int32_t length;
279 
280 /*********************************************************************
281  * USet API
282  *********************************************************************/
283 
291 U_CAPI USet* U_EXPORT2
292 uset_openEmpty(void);
293 
304 U_CAPI USet* U_EXPORT2
305 uset_open(UChar32 start, UChar32 end);
306 
316 U_CAPI USet* U_EXPORT2
317 uset_openPattern(const UChar* pattern, int32_t patternLength,
318  UErrorCode* ec);
319 
333 U_CAPI USet* U_EXPORT2
334 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
335  uint32_t options,
336  UErrorCode* ec);
337 
344 U_CAPI void U_EXPORT2
345 uset_close(USet* set);
346 
347 #if U_SHOW_CPLUSPLUS_API
348 
349 U_NAMESPACE_BEGIN
350 
361 
362 U_NAMESPACE_END
363 
364 #endif
365 
375 U_CAPI USet * U_EXPORT2
376 uset_clone(const USet *set);
377 
387 U_CAPI UBool U_EXPORT2
388 uset_isFrozen(const USet *set);
389 
404 U_CAPI void U_EXPORT2
405 uset_freeze(USet *set);
406 
417 U_CAPI USet * U_EXPORT2
418 uset_cloneAsThawed(const USet *set);
419 
429 U_CAPI void U_EXPORT2
430 uset_set(USet* set,
431  UChar32 start, UChar32 end);
432 
457 U_CAPI int32_t U_EXPORT2
459  const UChar *pattern, int32_t patternLength,
460  uint32_t options,
461  UErrorCode *status);
462 
485 U_CAPI void U_EXPORT2
487  UProperty prop, int32_t value, UErrorCode* ec);
488 
524 U_CAPI void U_EXPORT2
526  const UChar *prop, int32_t propLength,
527  const UChar *value, int32_t valueLength,
528  UErrorCode* ec);
529 
539 U_CAPI UBool U_EXPORT2
540 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
541  int32_t pos);
542 
558 U_CAPI int32_t U_EXPORT2
559 uset_toPattern(const USet* set,
560  UChar* result, int32_t resultCapacity,
561  UBool escapeUnprintable,
562  UErrorCode* ec);
563 
572 U_CAPI void U_EXPORT2
573 uset_add(USet* set, UChar32 c);
574 
587 U_CAPI void U_EXPORT2
588 uset_addAll(USet* set, const USet *additionalSet);
589 
599 U_CAPI void U_EXPORT2
600 uset_addRange(USet* set, UChar32 start, UChar32 end);
601 
611 U_CAPI void U_EXPORT2
612 uset_addString(USet* set, const UChar* str, int32_t strLen);
613 
623 U_CAPI void U_EXPORT2
624 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
625 
634 U_CAPI void U_EXPORT2
635 uset_remove(USet* set, UChar32 c);
636 
646 U_CAPI void U_EXPORT2
647 uset_removeRange(USet* set, UChar32 start, UChar32 end);
648 
658 U_CAPI void U_EXPORT2
659 uset_removeString(USet* set, const UChar* str, int32_t strLen);
660 
670 U_CAPI void U_EXPORT2
671 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
672 
684 U_CAPI void U_EXPORT2
685 uset_removeAll(USet* set, const USet* removeSet);
686 
699 U_CAPI void U_EXPORT2
700 uset_retain(USet* set, UChar32 start, UChar32 end);
701 
713 U_CAPI void U_EXPORT2
714 uset_retainString(USet *set, const UChar *str, int32_t length);
715 
725 U_CAPI void U_EXPORT2
726 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
727 
740 U_CAPI void U_EXPORT2
741 uset_retainAll(USet* set, const USet* retain);
742 
751 U_CAPI void U_EXPORT2
752 uset_compact(USet* set);
753 
767 U_CAPI void U_EXPORT2
768 uset_complement(USet* set);
769 
783 U_CAPI void U_EXPORT2
784 uset_complementRange(USet *set, UChar32 start, UChar32 end);
785 
796 U_CAPI void U_EXPORT2
797 uset_complementString(USet *set, const UChar *str, int32_t length);
798 
808 U_CAPI void U_EXPORT2
809 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
810 
822 U_CAPI void U_EXPORT2
823 uset_complementAll(USet* set, const USet* complement);
824 
832 U_CAPI void U_EXPORT2
833 uset_clear(USet* set);
834 
863 U_CAPI void U_EXPORT2
864 uset_closeOver(USet* set, int32_t attributes);
865 
872 U_CAPI void U_EXPORT2
874 
882 U_CAPI UBool U_EXPORT2
883 uset_isEmpty(const USet* set);
884 
890 U_CAPI UBool U_EXPORT2
891 uset_hasStrings(const USet *set);
892 
901 U_CAPI UBool U_EXPORT2
902 uset_contains(const USet* set, UChar32 c);
903 
913 U_CAPI UBool U_EXPORT2
914 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
915 
924 U_CAPI UBool U_EXPORT2
925 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
926 
937 U_CAPI int32_t U_EXPORT2
938 uset_indexOf(const USet* set, UChar32 c);
939 
955 U_CAPI UChar32 U_EXPORT2
956 uset_charAt(const USet* set, int32_t charIndex);
957 
971 U_CAPI int32_t U_EXPORT2
972 uset_size(const USet* set);
973 
982 U_CAPI int32_t U_EXPORT2
983 uset_getRangeCount(const USet *set);
984 
993 U_CAPI int32_t U_EXPORT2
994 uset_getItemCount(const USet* set);
995 
1024 U_CAPI int32_t U_EXPORT2
1025 uset_getItem(const USet* set, int32_t itemIndex,
1026  UChar32* start, UChar32* end,
1027  UChar* str, int32_t strCapacity,
1028  UErrorCode* ec);
1029 
1038 U_CAPI UBool U_EXPORT2
1039 uset_containsAll(const USet* set1, const USet* set2);
1040 
1051 U_CAPI UBool U_EXPORT2
1052 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1053 
1062 U_CAPI UBool U_EXPORT2
1063 uset_containsNone(const USet* set1, const USet* set2);
1064 
1073 U_CAPI UBool U_EXPORT2
1074 uset_containsSome(const USet* set1, const USet* set2);
1075 
1095 U_CAPI int32_t U_EXPORT2
1096 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1097 
1116 U_CAPI int32_t U_EXPORT2
1117 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1118 
1138 U_CAPI int32_t U_EXPORT2
1139 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1140 
1159 U_CAPI int32_t U_EXPORT2
1160 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1161 
1170 U_CAPI UBool U_EXPORT2
1171 uset_equals(const USet* set1, const USet* set2);
1172 
1173 /*********************************************************************
1174  * Serialized set API
1175  *********************************************************************/
1176 
1226 U_CAPI int32_t U_EXPORT2
1227 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1228 
1237 U_CAPI UBool U_EXPORT2
1238 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1239 
1247 U_CAPI void U_EXPORT2
1249 
1258 U_CAPI UBool U_EXPORT2
1260 
1270 U_CAPI int32_t U_EXPORT2
1272 
1286 U_CAPI UBool U_EXPORT2
1287 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1288  UChar32* pStart, UChar32* pEnd);
1289 
1290 #endif
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
Enable case insensitive matching.
Definition: uset.h:125
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:277
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
Spans the longest substring that is a concatenation of set elements (characters or strings)...
Definition: uset.h:212
One more than the last span condition.
Definition: uset.h:238
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:262
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
"Smart pointer" class, closes a USet via uset_close().
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:69
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end...
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:435
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:184
C API: Unicode Properties.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:386
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
int32_t length
The total length of the array.
Definition: uset.h:272
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:195
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:50
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
Capacity of USerializedSet::staticArray.
Definition: uset.h:249
Enable case insensitive matching.
Definition: uset.h:97
Adds all case mappings for each element in the set.
Definition: uset.h:110
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:267
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
A serialized form of a Unicode set.
Definition: uset.h:257
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
Continues a span() while there is a set element at the current position.
Definition: uset.h:232
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
Continues a span() while there is no set element at the current position.
Definition: uset.h:197
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247