1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
diff -ur icu.orig/source/layout/IndicReordering.cpp icu/source/layout/IndicReordering.cpp
--- icu.orig/source/layout/IndicReordering.cpp 2006-11-10 09:42:44.000000000 +0000
+++ icu/source/layout/IndicReordering.cpp 2006-11-10 09:47:05.000000000 +0000
@@ -395,7 +395,7 @@
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12}, // 2 - consonant with nukta
{-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12}, // 3 - consonant
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
- {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1, -1, -1}, // 5 - dependent vowels
+ {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
@@ -423,6 +423,48 @@
state = stateTable[state][charClass & CF_CLASS_MASK];
+ /*for the components of split matra*/
+ if ((charCount >= cursor + 3) &&
+ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCF && chars[cursor + 2] == 0x0DCA)) { /*for 3 split matra of Sinhala*/
+ return cursor + 3;
+ }
+ else if ((charCount >= cursor + 3) &&
+ (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CC2 && chars[cursor + 2] == 0x0CD5)) { /*for 3 split matra of Kannada*/
+ return cursor + 3;
+ }
+ /*for 2 split matra*/
+ else if (charCount >= cursor + 2) {
+ /*for Bengali*/
+ if ((chars[cursor] == 0x09C7 && chars[cursor + 1] == 0x09BE) ||
+ (chars[cursor] == 0x09C7 && chars[cursor + 1] == 0x09D7) ||
+ /*for Oriya*/
+ (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B3E) ||
+ (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B56) ||
+ (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B57) ||
+ /*for Tamil*/
+ (chars[cursor] == 0x0BC6 && chars[cursor + 1] == 0x0BBE) ||
+ (chars[cursor] == 0x0BC6 && chars[cursor + 1] == 0x0BD7) ||
+ (chars[cursor] == 0x0BC7 && chars[cursor + 1] == 0x0BBE) ||
+ /*for Malayalam*/
+ (chars[cursor] == 0x0D46 && chars[cursor + 1] == 0x0D3E) ||
+ (chars[cursor] == 0x0D46 && chars[cursor + 1] == 0x0D57) ||
+ (chars[cursor] == 0x0D47 && chars[cursor + 1] == 0x0D3E) ||
+ /*for Sinhala*/
+ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCA) ||
+ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCF) ||
+ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DDF) ||
+ (chars[cursor] == 0x0DDC && chars[cursor + 1] == 0x0DCA) ||
+ /*for Telugu*/
+ (chars[cursor] == 0x0C46 && chars[cursor + 1] == 0x0C56) ||
+ /*for Kannada*/
+ (chars[cursor] == 0x0CBF && chars[cursor + 1] == 0x0CD5) ||
+ (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CD5) ||
+ (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CD6) ||
+ (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CC2) ||
+ (chars[cursor] == 0x0CCA && chars[cursor + 1] == 0x0CD5))
+ return cursor + 2;
+ }
+
if (state < 0) {
break;
}
|