compat-icu36: new package (for EL-5)HEAD master

author: Remi Collet <fedora@famillecollet.com> 2013-03-20 10:29:29 +0100
committer: Remi Collet <fedora@famillecollet.com> 2013-03-20 10:29:29 +0100
commit: 6deac027c98f5d99e1805f9ddc21ff2dbebe0fb7 (patch)
tree: 008990c48199f2d517fc9b1a4b47c6b162ec30ef
26 files changed, 5812 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..1e65467
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,4 @@
+SRCDIR := $(shell pwd)
+NAME := $(shell basename $(SRCDIR))
+include ../common/Makefile
+
diff --git a/canonicalize.patch b/canonicalize.patch
new file mode 100644
index 0000000..3ff9c33
--- /dev/null
+++ b/canonicalize.patch
@@ -0,0 +1,11 @@
+--- source/common/uloc.c	2011-12-12 04:50:00.601092000 -0500
++++ source/common/uloc.c	2011-12-12 04:56:18.503570000 -0500
+@@ -1712,7 +1712,7 @@
+         /* Check for EURO variants. */
+         sawEuro = _deleteVariant(variant, variantSize, "EURO", 4);
+         len -= sawEuro;
+-        if (sawEuro > 0 && name[len-1] == '_') { /* delete trailing '_' */
++        if (sawEuro > 0 && len > 0 && name[len-1] == '_') { /* delete trailing '_' */
+             --len;
+         }
+ 
diff --git a/compat-icu36.spec b/compat-icu36.spec
new file mode 100644
index 0000000..d8a820e
--- /dev/null
+++ b/compat-icu36.spec
@@ -0,0 +1,189 @@
+Name:           compat-icu36
+Version:        3.6
+Release:        5.16.1
+Summary:        International Components for Unicode
+
+Group:          System Environment/Libraries
+License:        X License
+URL:            http://www.ibm.com/software/globalization/icu/
+Source0:        ftp://ftp.software.ibm.com/software/globalization/icu/icu4c-3_6-src.tgz
+BuildRoot:      %{_tmppath}/%{name}-%{version}-root
+
+BuildRequires:  doxygen, autoconf
+Patch1:  icu-3.4-multiarchdevel.patch
+Patch2:  icu-config
+Patch3:  icu.icu5365.dependantvowels.patch
+Patch4:  icu.icu5418.malayam.patch
+Patch5:  icu.icu5431.malayam.patch
+Patch6:  icu.icu5433.oriya.patch
+Patch7:  icu.icuXXXX.virama.prevnext.patch
+Patch8:  icu.icu5465.telegu.patch
+Patch9:  icu.icu5488.assamese.patch
+Patch10: icu.icu5500.devicetablecrash.patch
+Patch11: icu.icu5501.sinhala.biggerexpand.patch
+Patch12: icu.icu5557.safety.patch
+Patch13: icu.icu5594.gujarati.patch
+Patch14: icu.icu5506.multiplevowels.patch
+Patch15: icu.icuXXXX.malayalam.bysyllable.patch
+Patch16: icu.rh429023.regexp.patch
+Patch17: icu.icu5483.backport.patch
+Patch18: icu.icu5797.backport.patch
+Patch19: icu.icu6001.backport.patch
+Patch20: icu.icu6002.backport.patch
+Patch21: icu.icu6175.emptysegments.patch
+Patch22: icu.icu5691.backport.patch
+Patch23: icu.icuXXXX.rollbackabi.patch
+Patch24: canonicalize.patch
+Conflicts: icu
+
+%description
+The International Components for Unicode (ICU) libraries provide
+robust and full-featured Unicode services on a wide variety of
+platforms. ICU supports the most current version of the Unicode
+standard, and they provide support for supplementary Unicode
+characters (needed for GB 18030 repertoire support).
+As computing environments become more heterogeneous, software
+portability becomes more important. ICU lets you produce the same
+results across all the various platforms you support, without
+sacrificing performance. It offers great flexibility to extend and
+customize the supplied services.
+
+
+%package     -n compat-libicu36
+Summary:        International Components for Unicode - libraries
+Group:          System Environment/Libraries
+
+%description -n compat-libicu36
+%{summary}.
+
+This package provides the ICU libraries for package built
+against version %{version}.
+
+%package     -n compat-libicu36-devel
+Summary:        Development files for International Components for Unicode
+Group:          Development/Libraries
+Requires:       compat-libicu36 = %{version}-%{release}
+Requires:       pkgconfig
+Conflicts:      libicu-devel
+
+%description -n compat-libicu36-devel
+%{summary}.
+
+%package     -n compat-libicu36-doc
+Summary:        Documentation for International Components for Unicode
+Group:          Documentation
+
+%description -n compat-libicu36-doc
+%{summary}.
+
+
+%prep
+%setup -q -n icu
+%patch1  -p1 -b .multiarchdevel
+%patch3  -p1 -b .dependantvowels
+%patch4  -p1 -b .icu5418.malayam.patch
+%patch5  -p1 -b .icu5431.malayam.patch
+%patch6  -p1 -b .icu5433.oriya.patch
+%patch7  -p1 -b .icuXXXX.virama.prevnext.patch
+%patch8  -p1 -b .icu5465.telegu.patch
+%patch9  -p1 -b .icu5488.assamese.patch
+%patch10 -p1 -b .icu5500.devicetablecrash.patch
+%patch11 -p1 -b .icu5501.sinhala.biggerexpand.patch
+%patch12 -p1 -b .icu5557.safety.patch
+%patch13 -p1 -b .icu5594.gujarati.patch
+%patch14 -p1 -b .icu5506.multiplevowels.patch
+%patch15 -p1 -b .icuXXXX.malayalam.bysyllable.patch
+%patch16 -p1 -b .rh429023.regexp.patch
+%patch17 -p1 -b .icu5483.backport.patch
+%patch18 -p1 -b .icu5797.backport.patch
+%patch19 -p1 -b .icu6001.backport.patch
+%patch20 -p1 -b .icu6002.backport.patch
+%patch21 -p1 -b .icu6175.emptysegments.patch
+%patch22 -p1 -b .icu5691.backport.patch
+%patch23 -p1 -b .icuXXXX.rollbackabi.patch
+%patch24 -p0 -b .canonicalize.patch
+
+%build
+cd source
+export CFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing"
+export CXXFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing"
+autoconf
+%configure --with-data-packaging=library --disable-samples
+#rhbz#654590
+sed -i -- "s/-nodefaultlibs -nostdlib//" config/mh-linux
+make # %{?_smp_mflags} # -j(X>1) may "break" man pages as of 3.2, b.f.u #2357
+make doc
+
+%install
+rm -rf $RPM_BUILD_ROOT source/__docs
+make -C source install DESTDIR=$RPM_BUILD_ROOT
+make -C source install-doc docdir=__docs
+chmod +x $RPM_BUILD_ROOT%{_libdir}/*.so.*
+cp %{PATCH2} $RPM_BUILD_ROOT%{_bindir}/icu-config
+chmod a+x $RPM_BUILD_ROOT%{_bindir}/icu-config
+sed -i s/\\\$\(THREADSCXXFLAGS\)// $RPM_BUILD_ROOT/%{_libdir}/pkgconfig/icu.pc
+sed -i s/\\\$\(THREADSCPPFLAGS\)/-D_REENTRANT/ $RPM_BUILD_ROOT/%{_libdir}/pkgconfig/icu.pc
+
+%check
+make -C source check
+
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+
+%post -n compat-libicu36 -p /sbin/ldconfig
+
+%postun -n compat-libicu36 -p /sbin/ldconfig
+
+
+%files
+%defattr(-,root,root,-)
+%doc license.html readme.html
+%{_bindir}/derb
+%{_bindir}/genbrk
+%{_bindir}/gencnval
+%{_bindir}/genctd
+%{_bindir}/genrb
+%{_bindir}/makeconv
+%{_bindir}/pkgdata
+%{_bindir}/uconv
+%{_sbindir}/*
+%{_mandir}/man1/derb.1*
+%{_mandir}/man1/gencnval.1*
+%{_mandir}/man1/genrb.1*
+%{_mandir}/man1/genbrk.1*
+%{_mandir}/man1/genctd.1*
+%{_mandir}/man1/makeconv.1*
+%{_mandir}/man1/pkgdata.1*
+%{_mandir}/man1/uconv.1*
+%{_mandir}/man8/*.8*
+
+%files -n compat-libicu36
+%defattr(-,root,root,-)
+%{_libdir}/*.so.*
+
+%files -n compat-libicu36-devel
+%defattr(-,root,root,-)
+%{_bindir}/icu-config
+%{_mandir}/man1/icu-config.1*
+%{_includedir}/layout
+%{_includedir}/unicode
+%{_libdir}/*.so
+%{_libdir}/icu
+%{_libdir}/pkgconfig/icu.pc
+%dir %{_datadir}/icu
+%dir %{_datadir}/icu/3.6
+%{_datadir}/icu/3.6/mkinstalldirs
+%{_datadir}/icu/3.6/config
+%doc %{_datadir}/icu/3.6/license.html
+
+%files -n compat-libicu36-doc
+%defattr(-,root,root,-)
+%doc source/__docs/icu/html/*
+
+
+%changelog
+* Wed Mar 20 2013 Remi Collet <RPMS@famillecollet.com> - 3.6-5.16.1
+- new package from RHEL-5 spec of icu.
+
diff --git a/icu-3.4-multiarchdevel.patch b/icu-3.4-multiarchdevel.patch
new file mode 100644
index 0000000..a7839aa
--- /dev/null
+++ b/icu-3.4-multiarchdevel.patch
@@ -0,0 +1,70 @@
+--- icu/source/configure.in.orig	2006-05-02 12:10:31.000000000 +0100
++++ icu/source/configure.in	2006-05-02 15:06:07.000000000 +0100
+@@ -1011,6 +1011,7 @@
+ 		Makefile \
+ 		data/icupkg.inc \
+ 		config/Makefile.inc \
++		config/icu.pc \
+ 		data/Makefile \
+ 		stubdata/Makefile \
+ 		common/Makefile \
+--- /dev/null	2006-04-29 13:38:37.035974750 +0100
++++ icu/source/config/icu.pc.in	2006-05-02 15:03:14.000000000 +0100
+@@ -0,0 +1,46 @@
++prefix = @prefix@
++bindir = @bindir@
++exec_prefix = @exec_prefix@
++libdir = @libdir@
++includedir = @includedir@
++datadir = @datadir@
++sbindir = @sbindir@
++mandir = @mandir@
++sysconfdir = @sysconfdir@
++CFLAGS = @CFLAGS@ 
++CXXFLAGS = @CXXFLAGS@ 
++DEFS = @DEFS@ 
++UNICODE_VERSION=@UNICODE_VERSION@
++ICUPREFIX=icu
++ICULIBSUFFIX=@ICULIBSUFFIX@
++LIBICU=lib${ICUPREFIX}
++LIBCPPFLAGS=-D_REENTRANT
++CPPFLAGS=@CPPFLAGS@ ${LIBCPPFLAGS} -I${prefix}/include
++SHAREDLIBCPPFLAGS=-DPIC
++SHAREDLIBCXXFLAGS=-fPIC
++SHAREDLIBCFLAGS=-fPIC
++pkglibdir=${libdir}/@PACKAGE@${ICULIBSUFFIX}/@VERSION@
++pkgdatadir=${datadir}/@PACKAGE@${ICULIBSUFFIX}/@VERSION@
++ICUDATA_NAME = icudt@LIB_VERSION_MAJOR@@ICUDATA_CHAR@
++ICUPKGDATA_DIR=@libdir@
++ICUDATA_DIR=${pkgdatadir}
++SO=so
++ICULIBS_COMMON_LIB_NAME=${LIBICU}uc${ICULIBSUFFIX}.${SO}
++SHLIB_cc=cxx ${DEFS} ${CPPFLAGS} ${CXXFLAGS} @LDFLAGS@ -shared
++SHLIB_c=cc ${DEFS} ${CPPFLAGS} ${CFLAGS} @LDFLAGS@ -shared
++ICULIBS_LAYOUT = -l${ICUPREFIX}le${ICULIBSUFFIX} -l${ICUPREFIX}lx${ICULIBSUFFIX}
++ICULIBS_TOOLUTIL = -l${ICUPREFIX}tu${ICULIBSUFFIX}
++ICULIBS_OBSOLETE = -l${ICUPREFIX}obsolete${ICULIBSUFFIX}
++ICULIBS_ICUIO = -l${ICUPREFIX}io${ICULIBSUFFIX}
++ICULIBS_I18N = -l${ICUPREFIX}i18n${ICULIBSUFFIX}
++ICULIBS_COMMON = -l${ICUPREFIX}uc${ICULIBSUFFIX}
++ICULIBS_DATA = -l${ICUPREFIX}data${ICULIBSUFFIX}
++ICULIBS_LIBSONLY = ${ICULIBS_I18N} ${ICULIBS_COMMON} ${ICULIBS_DATA}
++ICULIBS_SYSTEMLIBS = @LIBS@
++ICULIBS_BASE = @LIBS@ -L${libdir}
++ICULIBS = ${ICULIBS_BASE} ${ICULIBS_I18N} ${ICULIBS_COMMON} ${ICULIBS_DATA}
++
++Name: @PACKAGE@
++Description: International Components for Unicode
++Version: @VERSION@
++Libs: @LDFLAGS@ ${ICULIBS} @LIBS@
+--- icu/source/Makefile.in.orig	2006-05-02 12:10:31.000000000 +0100
++++ icu/source/Makefile.in	2006-05-02 15:18:15.000000000 +0100
+@@ -125,6 +125,8 @@
+ 	@$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
+ 	$(INSTALL_DATA) @platform_make_fragment@ $(DESTDIR)$(pkgdatadir)/config/@platform_make_fragment_name@
+ 	$(INSTALL_SCRIPT) $(top_srcdir)/mkinstalldirs $(DESTDIR)$(pkgdatadir)/mkinstalldirs
++	@$(MKINSTALLDIRS) $(DESTDIR)$(libdir)/pkgconfig
++	$(INSTALL_DATA) $(top_srcdir)/config/icu.pc $(DESTDIR)$(libdir)/pkgconfig/icu.pc
+ 	$(INSTALL_DATA) $(top_srcdir)/../license.html $(DESTDIR)$(pkgdatadir)/license.html
+ 	$(INSTALL_SCRIPT) $(top_builddir)/config/icu-config $(DESTDIR)$(bindir)/icu-config
+ 	$(INSTALL_DATA) $(top_builddir)/config/Makefile.inc $(DESTDIR)$(pkglibdir)/Makefile.inc
diff --git a/icu-config b/icu-config
new file mode 100755
index 0000000..08f9ce8
--- /dev/null
+++ b/icu-config
@@ -0,0 +1,387 @@
+#!/bin/sh
+## -*-sh-*-
+#set -x
+# BEGIN of icu-config-top
+#******************************************************************************
+#   Copyright (C) 1999-2004, International Business Machines
+#   Corporation and others.  All Rights Reserved.
+#******************************************************************************
+# This script is designed to aid configuration of ICU.
+# rpath links a library search path right into the binaries.
+
+
+### END of icu-config-top
+
+## Zero out prefix.
+exec_prefix=`pkg-config --variable=exec_prefix icu`
+execprefix=$exec_prefix
+prefix=`pkg-config --variable=prefix icu`
+
+
+loaddefs()
+{
+LDLIBRARYPATH_ENVVAR="LD_LIBRARY_PATH"
+bindir=`pkg-config --variable=bindir icu`
+sbindir=`pkg-config --variable=sbindir icu`
+libdir=`pkg-config --variable=libdir icu`
+sysconfdir=`pkg-config --variable=sysconfdir icu`
+mandir=`pkg-config --variable=mandir icu`
+datadir=`pkg-config --variable=datadir icu`
+pkglibdir=`pkg-config --variable=pkglibdir icu`
+ICULIBS_COMMON_LIB_NAME=`pkg-config --variable=ICULIBS_COMMON_LIB_NAME icu`
+UNICODE_VERSION=`pkg-config --variable=UNICODE_VERSION icu`
+VERSION=`pkg-config --modversion icu`
+SO=`pkg-config --variable=SO icu`
+
+## -*-sh-*-
+## BEGIN of icu-config-bottom.
+## Copyright (c) 2002-2004, International Business Machines Corporation and
+## others. All Rights Reserved.
+
+ICUUC_FILE=${libdir}/${ICULIBS_COMMON_LIB_NAME}
+    
+#  echo ENABLE RPATH $ENABLE_RPATH and RPATHLDFLAGS=${RPATH_LDFLAGS}
+if [ "x$PKGDATA_MODE" = "x" ]; then
+    PKGDATA_MODE=dll
+fi
+
+}
+
+## The actual code of icu-config goes here.
+
+ME=`basename $0`
+
+allflags()
+{
+    echo "  --bindir               Print binary directory path (bin)"
+    echo "  --cc                   Print C compiler used [CC]"
+    echo "  --cflags               Print C compiler flags [CFLAGS]"
+    echo "  --cflags-dynamic       Print additional C flags for"
+    echo "                             building shared libraries."
+    echo "  --cppflags             Print C Preprocessor flags [CPPFLAGS]"
+    echo "  --cppflags-dynamic     Print additional C Preprocessor flags for"
+    echo "                             building shared libraries."
+    echo "  --cppflags-searchpath  Print only -I include directives  (-Iinclude)"
+    echo "  --cxx                  Print C++ compiler used [CXX]"
+    echo "  --cxxflags             Print C++ compiler flags [CXXFLAGS]"
+    echo "  --cxxflags-dynamic     Print additional C++ flags for"
+    echo "                             building shared libraries."
+    echo "  --detect-prefix        Attempt to detect prefix based on PATH"
+    echo "  --exec-prefix          Print prefix for executables (/bin)"
+    echo "  --exists               Return with 0 status if ICU exists else fail"
+    echo "  --help, -?, --usage    Print this message"
+    echo "  --icudata              Print shortname of ICU data file (icudt21l)"
+    echo "  --icudata-install-dir  Print path to install data to - use as --install option to pkgdata(1)"
+    echo "  --icudata-mode         Print default ICU pkgdata mode (dll) - use as --mode option to pkgdata(1)."
+    echo "  --icudatadir           Print path to packaged archive data. Can set as [ICU_DATA]"
+    echo "  --invoke               Print commands to invoke an ICU program"
+    echo "  --invoke=<prog>        Print commands to invoke an ICU program named <prog> (ex: genrb)" 
+    echo "  --ldflags              Print -L search path and -l libraries to link with ICU [LDFLAGS].  This is for the data, uc (common), and i18n libraries only.  "
+    echo "  --ldflags-layout       Print ICU layout engine link directive. Use in addition to --ldflags"
+    echo "  --ldflags-libsonly     Same as --ldflags, but only the -l directives"
+    echo "  --ldflags-searchpath   Print only -L (search path) directive"
+    echo "  --ldflags-system       Print only system libs ICU links with (-lpthread, -lm)"
+    echo "  --ldflags-icuio        Print ICU icuio link directive. Use in addition to --ldflags "
+    echo "  --ldflags-obsolete     Print ICU obsolete link directive. Use in addition to --ldflags. (requires icuapps/obsolete to be built and installed.) "
+    echo "  --mandir               Print manpage (man) path"
+    echo "  --prefix               Print PREFIX to icu install (/usr/local)"
+    echo "  --prefix=XXX           Set prefix to XXX for remainder of command"
+    echo "  --sbindir              Print system binary path (sbin) "
+    echo "  --shared-datadir       Print shared data (share) path. This is NOT the ICU data dir."
+    echo "  --shlib-c              Print the command to compile and build C shared libraries with ICU"
+    echo "  --shlib-cc             Print the command to compile and build C++ shared libraries with ICU"
+    echo "  --sysconfdir           Print system config (etc) path"
+    echo "  --unicode-version      Print version of Unicode data used in ICU ($UNICODE_VERSION)"
+    echo "  --version              Print ICU version ($VERSION)"
+    echo "  --incfile              Print path to Makefile.inc (for -O option of pkgdata)"
+}
+
+## Print the normal usage message
+shortusage()
+{
+    echo "usage: ${ME} " `allflags | cut -c-25 | sed -e 's%.*%[ & ]%'`
+}
+
+
+usage()
+{
+    echo "${ME}: icu-config: ICU configuration helper script"
+    echo
+    echo "The most commonly used options will be --cflags, --cxxflags, --cppflags, and --ldflags."
+    echo 'Example (in make):   CPFLAGS=$(shell icu-config --cppflags)'
+    echo '                     LDFLAGS=$(shell icu-config --ldflags)'
+    echo "                     (etc).."
+    echo
+    echo "Usage:"
+    allflags
+
+    echo 
+    echo " [Brackets] show MAKE variable equivalents,  (parenthesis) show example output"
+    echo
+    echo "Copyright (c) 2002, International Business Machines Corporation and others. All Rights Reserved."
+}
+
+## Check the sanity of current variables
+sanity()
+{
+    if [ ! -f ${ICUUC_FILE} ];
+    then
+	echo "### $ME: Can't find ${ICUUC_FILE} - ICU prefix is wrong."  1>&2
+	echo "###      Try the --prefix= or --exec-prefix= options " 1>&2
+	echo "###      or --detect-prefix"
+	echo "### $ME: Exitting." 1>&2
+	exit 2
+    fi
+}
+
+## Main starts here.
+
+if [ $# -lt 1 ]; then
+    shortusage
+    exit 1
+fi
+
+
+# Load our variables from autoconf
+# ALWAYS load twice because of dependencies
+loaddefs
+loaddefs
+sanity
+
+while [ $# -gt 0 ];
+do
+    arg="$1"
+    var=`echo $arg | sed -e 's/^[^=]*=//'`
+#    echo "### processing $arg" 1>&2
+    case "$arg" in
+
+        # undocumented.
+	--debug)
+	    set -x
+	    ;;
+
+        --so)
+            echo $SO
+            ;;
+
+	--bindir)
+	    echo $bindir
+	    ;;
+
+	--libdir)
+	    echo $libdir
+	    ;;
+
+	--exists)
+	    sanity
+	    ;;
+
+	--sbindir)
+	    echo $sbindir
+	    ;;
+
+	--invoke=*)
+	    QUOT="'"
+            CMD="${var}"
+
+            # If it's not a locally executable command (1st choice) then 
+            # search for it in the ICU directories. 
+            if [ ! -x ${CMD} ]; then
+                if [ -x ${bindir}/${var} ]; then
+                    CMD="${bindir}/${var}"
+                fi
+                if [ -x ${sbindir}/${var} ]; then
+                    CMD="${sbindir}/${var}"
+                fi
+            fi
+
+	    echo "env ${QUOT}${LDLIBRARYPATH_ENVVAR}=${libdir}:"'${'"${LDLIBRARYPATH_ENVVAR}"'}'${QUOT} ${CMD}
+	    ;;
+
+	--invoke)
+	    QUOT="'"
+	    echo "env ${QUOT}${LDLIBRARYPATH_ENVVAR}=${libdir}:"'${'"${LDLIBRARYPATH_ENVVAR}"'}'${QUOT}
+	    ;;
+
+	--cflags)
+            pkg-config --variable=CFLAGS icu
+	    ;;
+	    
+	--cc)
+	    echo cc
+	    ;;
+	    
+	--cxx)
+	    echo c++
+	    ;;
+
+	--cxxflags)
+            pkg-config --variable=CXXFLAGS icu
+	    ;;
+
+	--cppflags)
+	    # Don't echo the -I. - it's unneeded.
+            CPPFLAGS=`pkg-config --variable=CPPFLAGS icu`
+	    echo $CPPFLAGS | sed -e 's/-I. //'
+	    ;;
+
+	--cppflags-searchpath)
+	    echo -I${prefix}/include
+	    ;;
+
+	--cppflags-dynamic)
+            pkg-config --variable=SHAREDLIBCPPFLAGS icu
+	    ;;
+
+	--cxxflags-dynamic)
+            pkg-config --variable=SHAREDLIBCXXFLAGS icu
+	    ;;
+
+	--cflags-dynamic)
+            pkg-config --variable=SHAREDLIBCFLAGS icu
+	    ;;
+
+	--ldflags-system)
+            pkg-config --variable=ICULIBS_SYSTEMLIBS icu
+	    ;;
+
+	--ldflags)
+            pkg-config --libs icu
+# $RPATH_LDFLAGS
+	    ;;
+
+	--ldflags-libsonly)
+            pkg-config --variable=ICULIBS_LIBSONLY icu
+	    ;;
+
+	--ldflags-icuio)
+            pkg-config --variable=ICULIBS_ICUIO icu
+	    ;;
+
+	--ldflags-obsolete)
+            pkg-config --variable=ICULIBS_OBSOLETE icu
+	    ;;
+
+	--ldflags-toolutil)
+            pkg-config --variable=ICULIBS_TOOLUTIL icu
+	    ;;
+
+	--ldflags-layout)
+            pkg-config --variable=ICULIBS_LAYOUT icu
+	    ;;
+
+	--ldflags-searchpath)
+	    echo -L${libdir}
+	    ;;
+
+	--detect-prefix)
+	    HERE=`echo $0 | sed -e "s/$ME//g"`
+	    if [ -f $HERE/../lib/${ICULIBS_COMMON_LIB_NAME} ]; then
+		prefix=$HERE/..
+		echo "## Using --prefix=${prefix}" 1>&2
+	    fi
+	    loaddefs
+	    loaddefs
+	    sanity
+	    ;;
+
+	--exec-prefix)
+	    echo $exec_prefix
+	    ;;
+
+	--prefix)
+	    echo $prefix
+	    ;;
+
+	--prefix=*)
+	    prefix=$var
+	    loaddefs
+	    loaddefs
+	    sanity
+	    ;;
+
+	--sysconfdir)
+	    echo $sysconfdir
+	    ;;
+
+	--mandir)
+	    echo $mandir
+	    ;;
+
+	--shared-datadir)
+	    echo $datadir
+	    ;;
+
+        --incfile)
+	    echo $pkglibdir/Makefile.inc
+	    ;;
+
+	--icudata)
+            pkg-config --variable=ICUDATA_NAME icu
+	    ;;
+
+	--icudata-mode)
+	    echo $PKGDATA_MODE
+	    ;;
+
+	--icudata-install-dir)
+            pkg-config --variable=ICUPKGDATA_DIR icu
+	    ;;
+	    
+	--icudatadir)
+            pkg-config --variable=ICUDATA_DIR icu
+	    ;;
+
+	--shlib-c)
+            pkg-config --variable=SHLIB_c icu
+	    ;;
+
+	--shlib-cc)
+            pkg-config --variable=SHLIB_cc icu
+	    ;;
+
+	--version)
+            echo $VERSION
+            ;;
+
+        --unicode-version)
+            echo $UNICODE_VERSION
+            ;;
+
+	--help)
+	    usage
+	    exit 0
+	    ;;
+
+	--usage)
+	    usage
+	    exit 0
+	    ;;
+
+#	--enable-rpath=*)
+#	    ENABLE_RPATH=$var
+#	    loaddefs
+#	    ;;
+
+	-?)
+	    usage
+	    exit 0
+	    ;;
+
+        *)
+	    echo ${ME}: ERROR Unknown Option $arg 1>&2
+            echo 1>&2
+            shortusage 1>&2
+	    echo "### $ME: Exitting." 1>&2
+            exit 1;
+            ;;
+    esac
+    shift
+done
+
+# Check once before we quit (will check last used prefix)
+sanity
+## END of icu-config-bottom
+
+exit 0
+
diff --git a/icu.icu5365.dependantvowels.patch b/icu.icu5365.dependantvowels.patch
new file mode 100644
index 0000000..5708018
--- /dev/null
+++ b/icu.icu5365.dependantvowels.patch
@@ -0,0 +1,11 @@
+--- icu/source/layout/IndicReordering.cpp.orig	2006-09-05 17:01:15.000000000 +0100
++++ icu/source/layout/IndicReordering.cpp	2006-09-05 17:01:19.000000000 +0100
+@@ -377,7 +377,7 @@
+     {-1,  6,  1, -1, -1, -1, -1, -1, -1,  5,  9,  5,  5,  4, 12}, //  2 - consonant with nukta
+     {-1,  6,  1, -1, -1, -1, -1, -1,  2,  5,  9,  5,  5,  4, 12}, //  3 - consonant
+     {-1, -1, -1, -1, -1, -1,  3,  2, -1, -1, -1, -1, -1, -1,  7}, //  4 - consonant virama
+-    {-1,  6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, //  5 - dependent vowels
++    {-1,  6,  1, -1, -1, -1, -1, -1, -1,  5, -1, -1, -1, -1, -1}, //  5 - dependent vowels
+     {-1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, //  6 - vowel mark
+     {-1, -1, -1, -1, -1, -1,  3,  2, -1, -1, -1, -1, -1, -1, -1}, //  7 - consonant virama ZWJ, consonant ZWJ virama
+     {-1,  6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  4, -1}, //  8 - independent vowels that can take a virama
diff --git a/icu.icu5418.malayam.patch b/icu.icu5418.malayam.patch
new file mode 100644
index 0000000..03fbe63
--- /dev/null
+++ b/icu.icu5418.malayam.patch
@@ -0,0 +1,39 @@
+--- icu/source/layout/IndicClassTables.cpp.orig	2006-08-23 01:12:40.000000000 +0100
++++ icu/source/layout/IndicClassTables.cpp	2006-09-25 09:06:38.000000000 +0100
+@@ -173,6 +173,19 @@
+     _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx  // 0CE0 - 0CEF
+ };
+ 
++#if 1
++//use the pango char class table here
++static const IndicClassTable::CharClass mlymCharClasses[] =
++{
++    _xx, _xx, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, _iv, /* 0D00 - 0D0F */
++    _iv, _xx, _iv, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, /* 0D10 - 0D1F */
++    _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _pb, /* 0D20 - 0D2F */
++    _pb, _cn, _ct, _ct, _ct, _pb, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _xx, _dr, _dr, /* 0D30 - 0D3F */
++    _dr, _dr, _dr, _dr, _xx, _xx, _dl, _dl, _dl, _xx, _s1, _s2, _s3, _vr, _xx, _xx, /* 0D40 - 0D4F */
++    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _dr, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0D50 - 0D5F */
++    _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx  /* 0D60 - 0D6F */
++};
++#else
+ // FIXME: this is correct for old-style Malayalam (MAL) but not for reformed Malayalam (MLR)
+ // FIXME: should there be a REPH for old-style Malayalam?
+ static const IndicClassTable::CharClass mlymCharClasses[] =
+@@ -185,6 +198,7 @@
+     _xx, _xx, _xx, _xx, _xx, _xx, _xx, _m2, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0D50 - 0D5F
+     _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx  // 0D60 - 0D6F
+ };
++#endif
+  
+ static const IndicClassTable::CharClass sinhCharClasses[] =
+ {
+@@ -232,7 +246,7 @@
+ #define TAML_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT | SF_FILTER_ZERO_WIDTH)
+ #define TELU_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
+ #define KNDA_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
+-#define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT | SF_FILTER_ZERO_WIDTH)
++#define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT)
+ #define SINH_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT)
+ 
+ //
diff --git a/icu.icu5431.malayam.patch b/icu.icu5431.malayam.patch
new file mode 100644
index 0000000..48a549d
--- /dev/null
+++ b/icu.icu5431.malayam.patch
@@ -0,0 +1,107 @@
+--- icu.orig/source/layout/IndicReordering.cpp	2006-12-21 09:24:42.000000000 +0000
++++ icu/source/layout/IndicReordering.cpp	2006-12-21 09:16:15.000000000 +0000
+@@ -50,6 +50,14 @@
+ #define distFeatureMask 0x00010000UL
+ #define initFeatureMask 0x00008000UL
+ 
++// TODO: Find better names for these!
++#define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask)
++#define tagArray3 (pstfFeatureMask | tagArray4)
++#define tagArray2 (halfFeatureMask | tagArray3)
++#define tagArray1 (blwfFeatureMask | tagArray2)
++#define tagArray0 (rphfFeatureMask | tagArray1)
++
++
+ class IndicReorderingOutput : public UMemory {
+ private:
+     le_int32   fOutIndex;
+@@ -154,6 +162,27 @@
+         fSMabove = fSMbelow = 0;
+     }
+ 
++    void swapChars(int a, int b)
++    {
++	LEErrorCode success = LE_NO_ERROR;
++        LEUnicode temp_char;
++        le_uint32 temp_index;
++        FeatureMask temp_tag;
++
++        temp_char = fOutChars[fOutIndex + b];
++	temp_index = fGlyphStorage.getCharIndex(fOutIndex + b, success);
++        temp_tag = fGlyphStorage.getAuxData(fOutIndex + b, success);
++
++        fOutChars[fOutIndex + b] = fOutChars[fOutIndex + a];
++        le_uint32 toswap = fGlyphStorage.getCharIndex(fOutIndex + a, success);
++        fGlyphStorage.setCharIndex(fOutIndex + b,  toswap, success);
++        fGlyphStorage.setAuxData(fOutIndex + b, tagArray3, success);
++
++        fOutChars[fOutIndex + a] = temp_char;
++        fGlyphStorage.setCharIndex(fOutIndex + a, temp_index, success);
++        fGlyphStorage.setAuxData(fOutIndex + a, temp_tag, success);
++    }
++
+     void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures)
+     {
+         LEErrorCode success = LE_NO_ERROR;
+@@ -335,13 +364,6 @@
+     C_DOTTED_CIRCLE = 0x25CC
+ };
+ 
+-// TODO: Find better names for these!
+-#define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask)
+-#define tagArray3 (pstfFeatureMask | tagArray4)
+-#define tagArray2 (halfFeatureMask | tagArray3)
+-#define tagArray1 (blwfFeatureMask | tagArray2)
+-#define tagArray0 (rphfFeatureMask | tagArray1)
+-
+ static const FeatureMap featureMap[] =
+ {
+     {loclFeatureTag, loclFeatureMask},
+@@ -629,6 +651,21 @@
+                 output.writeChar(chars[i], i, tagArray4);
+             }
+ 
++            /* for the special conjuction of Cons+0x0d4d+0x0d31 or Cons+0x0d4d+0x0d30 of Malayalam */
++            if ((baseConsonant - 2 >= 0) &&
++                (chars[baseConsonant - 1] == 0x0d4d) &&
++		((chars[baseConsonant] == 0x0d31) || 
++		 (chars[baseConsonant] == 0x0d30)) &&
++                ((chars[baseConsonant - 2] >= 0x0d15) &&
++                 (chars[baseConsonant - 2] <= 0x0d39)))  {
++               if (baseConsonant < 3 || chars[baseConsonant - 3] != 0x0d4d) {
++                    output.swapChars(-1, -3);
++
++		    if (mpreFixups)
++		        mpreFixups->reduce();
++		}
++            }
++
+             if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) {
+                 output.writeMbelow();
+                 output.writeSMbelow(); // FIXME: there are no SMs in these scripts...
+--- icu.orig/source/layout/MPreFixups.h	2006-11-10 09:42:47.000000000 +0000
++++ icu/source/layout/MPreFixups.h	2006-12-21 09:13:47.000000000 +0000
+@@ -31,6 +31,8 @@
+     
+     void apply(LEGlyphStorage &glyphStorage);
+ 
++    void reduce();
++
+ private:
+     FixupData *fFixupData;
+     le_int32   fFixupCount;
+--- icu.orig/source/layout/MPreFixups.cpp	2006-11-10 09:42:47.000000000 +0000
++++ icu/source/layout/MPreFixups.cpp	2006-12-21 09:16:33.000000000 +0000
+@@ -40,6 +40,12 @@
+     }
+ }
+ 
++void MPreFixups::reduce()
++{
++    if (fFixupCount > 0)
++        fFixupCount--;
++}
++
+ void MPreFixups::apply(LEGlyphStorage &glyphStorage)
+ {
+     for (le_int32 fixup = 0; fixup < fFixupCount; fixup += 1) {
diff --git a/icu.icu5433.oriya.patch b/icu.icu5433.oriya.patch
new file mode 100644
index 0000000..f35f5a2
--- /dev/null
+++ b/icu.icu5433.oriya.patch
@@ -0,0 +1,31 @@
+diff -ru icu.orig/source/layout/IndicClassTables.cpp icu/source/layout/IndicClassTables.cpp
+--- icu.orig/source/layout/IndicClassTables.cpp	2006-10-03 14:27:47.000000000 +0100
++++ icu/source/layout/IndicClassTables.cpp	2006-10-03 14:30:07.000000000 +0100
+@@ -120,6 +120,19 @@
+     _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx  // 0AE0 - 0AEF
+ };
+ 
++#if 1
++static const IndicClassTable::CharClass oryaCharClasses[] =
++{
++    _xx, _ma, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _iv, /* 0B00 - 0B0F */
++    _iv, _xx, _xx, _iv, _iv, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _ct, _bb, /* 0B10 - 0B1F */
++    _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _pb, /* 0B20 - 0B2F */
++    _rb, _xx, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _xx, _xx, _nu, _xx, _dr, _da, /* 0B30 - 0B3F */
++    _dr, _db, _db, _db, _xx, _xx, _xx, _dl, _s1, _xx, _xx, _s2, _s3, _vr, _xx, _xx, /* 0B40 - 0B4F */
++    _xx, _xx, _xx, _xx, _xx, _xx, _da, _dr, _xx, _xx, _xx, _xx, _cn, _cn, _xx, _pb, /* 0B50 - 0B5F */
++    _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0B60 - 0B6F */
++    _xx, _bb                                                                        /* 0B70 - 0B71 */
++};
++#else
+ static const IndicClassTable::CharClass oryaCharClasses[] =
+ {
+     _xx, _ma, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _iv, // 0B00 - 0B0F
+@@ -131,6 +144,7 @@
+     _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0B60 - 0B6F
+     _xx, _ct                                                                        // 0B70 - 0B71
+ };
++#endif
+ 
+ static const IndicClassTable::CharClass tamlCharClasses[] =
+ {
diff --git a/icu.icu5465.telegu.patch b/icu.icu5465.telegu.patch
new file mode 100644
index 0000000..7e80103
--- /dev/null
+++ b/icu.icu5465.telegu.patch
@@ -0,0 +1,29 @@
+--- icu.orig/source/layout/IndicClassTables.cpp	2007-02-05 14:44:17.000000000 +0000
++++ icu/source/layout/IndicClassTables.cpp	2007-02-05 14:47:49.000000000 +0000
+@@ -145,6 +145,7 @@
+ };
+ 
+ // FIXME: Should some of the bb's be pb's? (KA, NA, MA, YA, VA, etc. (approx 13))
++#if 0
+ static const IndicClassTable::CharClass teluCharClasses[] =
+ {
+     _xx, _mp, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, _iv, // 0C00 - 0C0F
+@@ -155,6 +156,18 @@
+     _xx, _xx, _xx, _xx, _xx, _da, _m2, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0C50 - 0C5F
+     _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx  // 0C60 - 0C6F
+ };
++#else
++static const IndicClassTable::CharClass teluCharClasses[] =
++{   
++    _xx, _mp, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, _iv, /* 0C00 - 0C0F */
++    _iv, _xx, _iv, _iv, _iv, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, /* 0C10 - 0C1F */
++    _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _bb, /* 0C20 - 0C2F */
++    _bb, _bb, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _xx, _xx, _xx, _xx, _da, _da, /* 0C30 - 0C3F */
++    _da, _dr, _dr, _dr, _dr, _xx, _da, _da, _s1, _xx, _da, _da, _da, _vr, _xx, _xx, /* 0C40 - 0C4F */
++    _xx, _xx, _xx, _xx, _xx, _da, _db, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0C50 - 0C5F */
++    _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx  /* 0C60 - 0C6F */
++};
++#endif
+ 
+ // U+CC3 and U+CC4 are _lm here not _dr since the Kannada rendering
+ // rules want them below and to the right of the entire cluster
diff --git a/icu.icu5483.backport.patch b/icu.icu5483.backport.patch
new file mode 100644
index 0000000..039dee2
--- /dev/null
+++ b/icu.icu5483.backport.patch
@@ -0,0 +1,874 @@
+diff -ru icu.orig/source/common/ucnv2022.c icu/source/common/ucnv2022.c
+--- icu.orig/source/common/ucnv2022.c	2009-06-02 11:48:38.000000000 +0100
++++ icu/source/common/ucnv2022.c	2009-06-02 12:30:29.000000000 +0100
+@@ -84,6 +84,26 @@
+ #define V_TAB   0x0B
+ #define SPACE   0x20
+ 
++enum {
++    HWKANA_START=0xff61,
++    HWKANA_END=0xff9f
++};
++
++/*
++ * 94-character sets with native byte values A1..FE are encoded in ISO 2022
++ * as bytes 21..7E. (Subtract 0x80.)
++ * 96-character sets with native byte values A0..FF are encoded in ISO 2022
++ * as bytes 20..7F. (Subtract 0x80.)
++ * Do not encode C1 control codes with native bytes 80..9F
++ * as bytes 00..1F (C0 control codes).
++ */
++enum {
++    GR94_START=0xa1,
++    GR94_END=0xfe,
++    GR96_START=0xa0,
++    GR96_END=0xff
++};
++
+ /*
+  * ISO 2022 control codes must not be converted from Unicode
+  * because they would mess up the byte stream.
+@@ -981,22 +1001,27 @@
+ 
+ 
+ /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
+- * any future change in _MBCSFromUChar32() function should be reflected in 
+- * this macro
++ * any future change in _MBCSFromUChar32() function should be reflected here.
++ * @return number of bytes in *value; negative number if fallback; 0 if no mapping
+  */
+-static U_INLINE void 
++static U_INLINE int32_t
+ MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
+                                          UChar32 c,  
+                                          uint32_t* value, 
+                                          UBool useFallback, 
+-                                         int32_t *length, 
+                                          int outputType)
+ {
+     const int32_t *cx;
+     const uint16_t *table;
+     uint32_t stage2Entry;
+     uint32_t myValue;
++    int32_t length;
+     const uint8_t *p;
++    /*
++     * TODO(markus): Use and require new, faster MBCS conversion table structures.
++     * Use internal version of ucnv_open() that verifies that the new structures are available,
++     * else U_INTERNAL_PROGRAM_ERROR.
++     */
+     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+         table=sharedData->mbcs.fromUnicodeTable;
+@@ -1005,51 +1030,60 @@
+         if(outputType==MBCS_OUTPUT_2){
+             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+             if(myValue<=0xff) {
+-                *length=1;
++                length=1;
+             } else {
+-                *length=2;
++                length=2;
+             }
+         } else /* outputType==MBCS_OUTPUT_3 */ {
+             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+             if(myValue<=0xff) {
+-                *length=1;
++                length=1;
+             } else if(myValue<=0xffff) {
+-                *length=2;
++                length=2;
+             } else {
+-                *length=3;
++                length=3;
+             }
+         }
++        /*
++         * TODO(markus): Use Shift-JIS table for JIS X 0208, to save mapping table space.
++         * Pass in parameter for type of output bytes, for validation and shifting:
++         * - Direct: Pass bytes through, but forbid control codes 00-1F (except SI/SO/ESC) and space 20?
++         *   (Need to allow some (TAB/LF/CR) or most of them for ASCII and maybe JIS X 0201.)
++         * - A1-FE: Subtract 80 after range check.
++         * - SJIS: Shift DBCS result to 21-7E x 21-7E.
++         */
+         /* is this code point assigned, or do we use fallbacks? */
+-        if( (stage2Entry&(1<<(16+(c&0xf))))!=0 ||
+-            (FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0)
+-        ) {
++        if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
++            /* assigned */
++            *value=myValue;
++            return length;
++        } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
+             /*
+              * We allow a 0 byte output if the "assigned" bit is set for this entry.
+              * There is no way with this data structure for fallback output
+              * to be a zero byte.
+              */
+-            /* assigned */
+             *value=myValue;
+-            return;
++            return -length;
+         }
+     }
+ 
+     cx=sharedData->mbcs.extIndexes;
+     if(cx!=NULL) {
+-        *length=ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
+-        return;
++        return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
+     }
+ 
+     /* unassigned */
+-    *length=0;
++    return 0;
+ }
+ 
+ /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
+- * any future change in _MBCSSingleFromUChar32() function should be reflected in 
+- * this macro
++ * any future change in _MBCSSingleFromUChar32() function should be reflected here.
++ * @param retval pointer to output byte
++ * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
+  */
+-static U_INLINE void 
++static U_INLINE int32_t
+ MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
+                                        UChar32 c, 
+                                        uint32_t* retval, 
+@@ -1059,20 +1093,21 @@
+     int32_t value;
+     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+-        *retval=(uint16_t)-1;
+-        return;
++        return 0;
+     }
+     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
+     table=sharedData->mbcs.fromUnicodeTable;
+     /* get the byte for the output */
+     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
+     /* is this code point assigned, or do we use fallbacks? */
+-    if(useFallback ? value>=0x800 : value>=0xc00) {
+-        value &=0xff;
++    *retval=(uint32_t)(value&0xff);
++    if(value>=0xf00) {
++        return 1;  /* roundtrip */
++    } else if(useFallback ? value>=0x800 : value>=0xc00) {
++        return -1;  /* fallback taken */
+     } else {
+-        value= -1;
++        return 0;  /* no mapping */
+     }
+-    *retval=(uint16_t) value;
+ }
+ 
+ #ifdef U_ENABLE_GENERIC_ISO_2022
+@@ -1316,6 +1351,7 @@
+ 
+ static void 
+ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
++    UConverter *cnv = args->converter;
+     UConverterDataISO2022 *converterData;
+     ISO2022State *pFromU2022State;
+     uint8_t *target = (uint8_t *) args->target;
+@@ -1335,14 +1371,13 @@
+     int8_t cs, g;
+ 
+     /* set up the state */
+-    converterData     = (UConverterDataISO2022*)args->converter->extraInfo;
++    converterData     = (UConverterDataISO2022*)cnv->extraInfo;
+     pFromU2022State   = &converterData->fromU2022State;
+-    useFallback       = args->converter->useFallback;
+ 
+     choiceCount = 0;
+ 
+     /* check if the last codepoint of previous buffer was a lead surrogate*/
+-    if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
++    if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
+         goto getTrail;
+     }
+ 
+@@ -1361,26 +1396,26 @@
+                         if(UTF_IS_SECOND_SURROGATE(trail)) {
+                             source++;
+                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+-                            args->converter->fromUChar32=0x00;
++                            cnv->fromUChar32=0x00;
+                             /* convert this supplementary code point */
+                             /* exit this condition tree */
+                         } else {
+                             /* this is an unmatched lead code unit (1st surrogate) */
+                             /* callback(illegal) */
+                             *err=U_ILLEGAL_CHAR_FOUND;
+-                            args->converter->fromUChar32=sourceChar;
++                            cnv->fromUChar32=sourceChar;
+                             break;
+                         }
+                     } else {
+                         /* no more input */
+-                        args->converter->fromUChar32=sourceChar;
++                        cnv->fromUChar32=sourceChar;
+                         break;
+                     }
+                 } else {
+                     /* this is an unmatched trail code unit (2nd surrogate) */
+                     /* callback(illegal) */
+                     *err=U_ILLEGAL_CHAR_FOUND;
+-                    args->converter->fromUChar32=sourceChar;
++                    cnv->fromUChar32=sourceChar;
+                     break;
+                 }
+             }
+@@ -1389,7 +1424,7 @@
+             if(IS_2022_CONTROL(sourceChar)) {
+                 /* callback(illegal) */
+                 *err=U_ILLEGAL_CHAR_FOUND;
+-                args->converter->fromUChar32=sourceChar;
++                cnv->fromUChar32=sourceChar;
+                 break;
+             }
+ 
+@@ -1407,9 +1442,10 @@
+ 
+                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
+                 if(converterData->version == 3 || converterData->version == 4) {
+-                    choices[choiceCount++] = cs = (int8_t)HWKANA_7BIT;
+-                    csm &= ~CSM(cs);
++                    choices[choiceCount++] = (int8_t)HWKANA_7BIT;
+                 }
++                /* Do not try single-byte half-width Katakana for other versions. */
++                csm &= ~CSM(HWKANA_7BIT);
+ 
+                 /* try the current G0 charset */
+                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
+@@ -1432,86 +1468,134 @@
+             }
+ 
+             cs = g = 0;
++            /*
++             * len==0: no mapping found yet
++             * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
++             * len>0: found a roundtrip result, done
++             */
+             len = 0;
++            /*
++             * We will turn off useFallback after finding a fallback,
++             * but we still get fallbacks from PUA code points as usual.
++             * Therefore, we will also need to check that we don't overwrite
++             * an early fallback with a later one.
++             */
++            useFallback = cnv->useFallback;
+ 
+-            for(i = 0; i < choiceCount && len == 0; ++i) {
+-                cs = choices[i];
+-                switch(cs) {
++            for(i = 0; i < choiceCount && len <= 0; ++i) {
++                uint32_t value;
++                int32_t len2;
++                int8_t cs0 = choices[i];
++                switch(cs0) {
+                 case ASCII:
+                     if(sourceChar <= 0x7f) {
+                         targetValue = (uint32_t)sourceChar;
+                         len = 1;
++                        cs = cs0;
++                        g = 0;
+                     }
+                     break;
+                 case ISO8859_1:
+-                    if(0x80 <= sourceChar && sourceChar <= 0xff) {
++                    if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
+                         targetValue = (uint32_t)sourceChar - 0x80;
+                         len = 1;
++                        cs = cs0;
+                         g = 2;
+                     }
+                     break;
+                 case HWKANA_7BIT:
+-                    if((uint32_t)(0xff9f-sourceChar)<=(0xff9f-0xff61)) {
+-                        targetValue = (uint32_t)(sourceChar - (0xff61 - 0x21));
+-                        len = 1;
+-
++                    if((uint32_t)(HWKANA_END-sourceChar)<=(HWKANA_END-HWKANA_START)) {
+                         if(converterData->version==3) {
+                             /* JIS7: use G1 (SO) */
+-                            pFromU2022State->cs[1] = cs; /* do not output an escape sequence */
++                            /* Shift U+FF61..U+FF9F to bytes 21..5F. */
++                            targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
++                            len = 1;
++                            pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
+                             g = 1;
+                         } else if(converterData->version==4) {
+                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
+-                            int8_t cs0;
+-
+-                            targetValue += 0x80;
++                            /* Shift U+FF61..U+FF9F to bytes A1..DF. */
++                            targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
++                            len = 1;
+ 
+-                            cs0 = pFromU2022State->cs[0];
+-                            if(IS_JP_DBCS(cs0)) {
++                            cs = pFromU2022State->cs[0];
++                            if(IS_JP_DBCS(cs)) {
+                                 /* switch from a DBCS charset to JISX201 */
+                                 cs = (int8_t)JISX201;
+-                            } else {
+-                                /* stay in the current G0 charset */
+-                                cs = cs0;
+                             }
++                            /* else stay in the current G0 charset */
++                            g = 0;
+                         }
++                        /* else do not use HWKANA_7BIT with other versions */
+                     }
+                     break;
+                 case JISX201:
+                     /* G0 SBCS */
+-                    MBCS_SINGLE_FROM_UCHAR32(
+-                        converterData->myConverterArray[cs],
+-                        sourceChar, &targetValue,
+-                        useFallback);
+-                    if(targetValue <= 0x7f) {
+-                        len = 1;
++                    len2 = MBCS_SINGLE_FROM_UCHAR32(
++                                converterData->myConverterArray[cs0],
++                                sourceChar, &value,
++                                useFallback);
++                    if(len2 != 0 && !(len2 < 0 && len != 0) && value <= 0x7f) {
++                        targetValue = value;
++                        len = len2;
++                        cs = cs0;
++                        g = 0;
++                        useFallback = FALSE;
+                     }
+                     break;
+                 case ISO8859_7:
+                     /* G0 SBCS forced to 7-bit output */
+-                    MBCS_SINGLE_FROM_UCHAR32(
+-                        converterData->myConverterArray[cs],
+-                        sourceChar, &targetValue,
+-                        useFallback);
+-                    if(0x80 <= targetValue && targetValue <= 0xff) {
+-                        targetValue -= 0x80;
+-                        len = 1;
++                    len2 = MBCS_SINGLE_FROM_UCHAR32(
++                                converterData->myConverterArray[cs0],
++                                sourceChar, &value,
++                                useFallback);
++                    if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
++                        targetValue = value - 0x80;
++                        len = len2;
++                        cs = cs0;
+                         g = 2;
++                        useFallback = FALSE;
+                     }
+                     break;
+                 default:
+                     /* G0 DBCS */
+-                    MBCS_FROM_UCHAR32_ISO2022(
+-                        converterData->myConverterArray[cs],
+-                        sourceChar, &targetValue,
+-                        useFallback, &len, MBCS_OUTPUT_2);
+-                    if(len != 2) {
+-                        len = 0;
++                    len2 = MBCS_FROM_UCHAR32_ISO2022(
++                                converterData->myConverterArray[cs0],
++                                sourceChar, &value,
++                                useFallback, MBCS_OUTPUT_2);
++                    if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
++                        if(cs0 == KSC5601) {
++                            /*
++                             * Check for valid bytes for the encoding scheme.
++                             * This is necessary because the sub-converter (windows-949)
++                             * has a broader encoding scheme than is valid for 2022.
++                             *
++                             * Check that the result is a 2-byte value with each byte in the range A1..FE
++                             * (strict EUC-KR DBCS) before accepting it and subtracting 0x80 from each byte
++                             * to move it to the ISO 2022 range 21..7E.
++                             */
++                            if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
++                                (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
++                            ) {
++                                value -= 0x8080;  /* shift down to 21..7e byte range */
++                            } else {
++                                break;  /* not valid for ISO 2022 */
++                            }
++                        }
++                        targetValue = value;
++                        len = len2;
++                        cs = cs0;
++                        g = 0;
++                        useFallback = FALSE;
+                     }
+                     break;
+                 }
+             }
+ 
+-            if(len > 0) {
++            if(len != 0) {
++                if(len < 0) {
++                    len = -len;  /* fallback */
++                }
+                 outLen = 0; /* count output bytes */
+ 
+                 /* write SI if necessary (only for JIS7) */
+@@ -1560,7 +1644,7 @@
+                  * then this is an error
+                  */
+                 *err = U_INVALID_CHAR_FOUND;
+-                args->converter->fromUChar32=sourceChar;
++                cnv->fromUChar32=sourceChar;
+                 break;
+             }
+ 
+@@ -1586,7 +1670,7 @@
+                 }
+             } else {
+                 fromUWriteUInt8(
+-                    args->converter,
++                    cnv,
+                     buffer, outLen,
+                     &target, (const char *)targetLimit,
+                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
+@@ -1615,7 +1699,7 @@
+      */
+     if( U_SUCCESS(*err) &&
+         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
+-        args->flush && source>=sourceLimit && args->converter->fromUChar32==0
++        args->flush && source>=sourceLimit && cnv->fromUChar32==0
+     ) {
+         int32_t sourceIndex;
+ 
+@@ -1654,7 +1738,7 @@
+         }
+ 
+         fromUWriteUInt8(
+-            args->converter,
++            cnv,
+             buffer, outLen,
+             &target, (const char *)targetLimit,
+             &offsets, sourceIndex,
+@@ -1777,7 +1861,7 @@
+                     !IS_JP_DBCS(cs)
+                 ) {
+                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
+-                    targetUniChar = mySourceChar + (0xff61 - 0xa1);
++                    targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
+ 
+                     /* return from a single-shift state to the previous one */
+                     if(pToU2022State->g >= 2) {
+@@ -1818,7 +1902,7 @@
+                 case HWKANA_7BIT:
+                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
+                         /* 7-bit halfwidth Katakana */
+-                        targetUniChar = mySourceChar + (0xff61 - 0x21);
++                        targetUniChar = mySourceChar + (HWKANA_START - 0x21);
+                     }
+                     break;
+                 default:
+@@ -1965,9 +2049,10 @@
+                 break;
+             }
+ 
+-           /* length= ucnv_MBCSFromUChar32(converterData->currentConverter->sharedData,
+-                sourceChar,&targetByteUnit,args->converter->useFallback);*/
+-            MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,&length,MBCS_OUTPUT_2);
++            length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
++            if(length < 0) {
++                length = -length;  /* fallback */
++            }
+             /* only DBCS or SBCS characters are expected*/
+             /* DB characters with high bit set to 1 are expected */
+             if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){
+@@ -2449,7 +2534,7 @@
+ 
+ static void 
+ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
+-
++    UConverter *cnv = args->converter;
+     UConverterDataISO2022 *converterData;
+     ISO2022State *pFromU2022State;
+     uint8_t *target = (uint8_t *) args->target;
+@@ -2466,14 +2551,13 @@
+     UBool useFallback;
+ 
+     /* set up the state */
+-    converterData     = (UConverterDataISO2022*)args->converter->extraInfo;
++    converterData     = (UConverterDataISO2022*)cnv->extraInfo;
+     pFromU2022State   = &converterData->fromU2022State;
+-    useFallback       = args->converter->useFallback;
+ 
+     choiceCount = 0;
+ 
+     /* check if the last codepoint of previous buffer was a lead surrogate*/
+-    if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
++    if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
+         goto getTrail;
+     }
+ 
+@@ -2492,26 +2576,26 @@
+                         if(UTF_IS_SECOND_SURROGATE(trail)) {
+                             source++;
+                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+-                            args->converter->fromUChar32=0x00;
++                            cnv->fromUChar32=0x00;
+                             /* convert this supplementary code point */
+                             /* exit this condition tree */
+                         } else {
+                             /* this is an unmatched lead code unit (1st surrogate) */
+                             /* callback(illegal) */
+                             *err=U_ILLEGAL_CHAR_FOUND;
+-                            args->converter->fromUChar32=sourceChar;
++                            cnv->fromUChar32=sourceChar;
+                             break;
+                         }
+                     } else {
+                         /* no more input */
+-                        args->converter->fromUChar32=sourceChar;
++                        cnv->fromUChar32=sourceChar;
+                         break;
+                     }
+                 } else {
+                     /* this is an unmatched trail code unit (2nd surrogate) */
+                     /* callback(illegal) */
+                     *err=U_ILLEGAL_CHAR_FOUND;
+-                    args->converter->fromUChar32=sourceChar;
++                    cnv->fromUChar32=sourceChar;
+                     break;
+                 }
+             }
+@@ -2522,7 +2606,7 @@
+                 if(IS_2022_CONTROL(sourceChar)) {
+                     /* callback(illegal) */
+                     *err=U_ILLEGAL_CHAR_FOUND;
+-                    args->converter->fromUChar32=sourceChar;
++                    cnv->fromUChar32=sourceChar;
+                     break;
+                 }
+ 
+@@ -2545,7 +2629,6 @@
+             }
+             else{
+                 /* convert U+0080..U+10ffff */
+-                UConverterSharedData *cnv;
+                 int32_t i;
+                 int8_t cs, g;
+ 
+@@ -2593,17 +2676,41 @@
+                 }
+ 
+                 cs = g = 0;
++                /*
++                 * len==0: no mapping found yet
++                 * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
++                 * len>0: found a roundtrip result, done
++                 */
+                 len = 0;
++                /*
++                 * We will turn off useFallback after finding a fallback,
++                 * but we still get fallbacks from PUA code points as usual.
++                 * Therefore, we will also need to check that we don't overwrite
++                 * an early fallback with a later one.
++                 */
++                useFallback = cnv->useFallback;
+ 
+-                for(i = 0; i < choiceCount && len == 0; ++i) {
+-                    cs = choices[i];
+-                    if(cs > 0) {
+-                        if(cs > CNS_11643_0) {
+-                            cnv = converterData->myConverterArray[CNS_11643];
+-                            MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_3);
+-                            if(len==3) {
+-                                cs = (int8_t)(CNS_11643_0 + (targetValue >> 16) - 0x80);
+-                                len = 2;
++                for(i = 0; i < choiceCount && len <= 0; ++i) {
++                    int8_t cs0 = choices[i];
++                    if(cs0 > 0) {
++                        uint32_t value;
++                        int32_t len2;
++                        if(cs0 > CNS_11643_0) {
++                            len2 = MBCS_FROM_UCHAR32_ISO2022(
++                                        converterData->myConverterArray[CNS_11643],
++                                        sourceChar,
++                                        &value,
++                                        useFallback,
++                                        MBCS_OUTPUT_3);
++                            if(len2 == 3 || (len2 == -3 && len == 0)) {
++                                targetValue = value;
++                                cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
++                                if(len2 >= 0) {
++                                    len = 2;
++                                } else {
++                                    len = -2;
++                                    useFallback = FALSE;
++                                }
+                                 if(cs == CNS_11643_1) {
+                                     g = 1;
+                                 } else if(cs == CNS_11643_2) {
+@@ -2617,15 +2724,25 @@
+                             }
+                         } else {
+                             /* GB2312_1 or ISO-IR-165 */
+-                            cnv = converterData->myConverterArray[cs];
+-                            MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_2);
+-                            g = 1; /* used if len == 2 */
++                            len2 = MBCS_FROM_UCHAR32_ISO2022(
++                                        converterData->myConverterArray[cs0],
++                                        sourceChar,
++                                        &value,
++                                        useFallback,
++                                        MBCS_OUTPUT_2);
++                            if(len2 == 2 || (len2 == -2 && len == 0)) {
++                                targetValue = value;
++                                len = len2;
++                                cs = cs0;
++                                g = 1;
++                                useFallback = FALSE;
++                            }
+                         }
+                     }
+                 }
+ 
+-                if(len > 0) {
+-                    len = 0; /* count output bytes; it must have been len == 2 */
++                if(len != 0) {
++                    len = 0; /* count output bytes; it must have been abs(len) == 2 */
+ 
+                     /* write the designation sequence if necessary */
+                     if(cs != pFromU2022State->cs[g]) {
+@@ -2670,7 +2787,7 @@
+                      * then this is an error
+                      */
+                     *err = U_INVALID_CHAR_FOUND;
+-                    args->converter->fromUChar32=sourceChar;
++                    cnv->fromUChar32=sourceChar;
+                     break;
+                 }
+             }
+@@ -2691,7 +2808,7 @@
+                 }
+             } else {
+                 fromUWriteUInt8(
+-                    args->converter,
++                    cnv,
+                     buffer, len,
+                     &target, (const char *)targetLimit,
+                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
+@@ -2720,7 +2837,7 @@
+      */
+     if( U_SUCCESS(*err) &&
+         pFromU2022State->g!=0 &&
+-        args->flush && source>=sourceLimit && args->converter->fromUChar32==0
++        args->flush && source>=sourceLimit && cnv->fromUChar32==0
+     ) {
+         int32_t sourceIndex;
+ 
+@@ -2748,7 +2865,7 @@
+         }
+ 
+         fromUWriteUInt8(
+-            args->converter,
++            cnv,
+             SHIFT_IN_STR, 1,
+             &target, (const char *)targetLimit,
+             &offsets, sourceIndex,
+@@ -3146,7 +3263,7 @@
+         }
+         if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
+             /* include half-width Katakana for JP */
+-            sa->addRange(sa->set, 0xff61, 0xff9f);
++            sa->addRange(sa->set, HWKANA_START, HWKANA_END);
+         }
+         break;
+     case 'c':
+diff -ru icu.orig/source/common/ucnv_ext.c icu/source/common/ucnv_ext.c
+--- icu.orig/source/common/ucnv_ext.c	2009-06-02 11:48:38.000000000 +0100
++++ icu/source/common/ucnv_ext.c	2009-06-02 12:14:20.000000000 +0100
+@@ -551,6 +551,12 @@
+         return 0;
+     }
+ 
++    /*
++     * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
++     * Do not interpret values with reserved bits used, for forward compatibility,
++     * and do not even remember intermediate results with reserved bits used.
++     */
++
+     if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
+         /* partial match, enter the loop below */
+         index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+@@ -575,7 +581,8 @@
+             value=*fromUSectionValues++;
+             if( value!=0 &&
+                 (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+-                 FROM_U_USE_FALLBACK(useFallback, firstCP))
++                 FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
++                (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
+             ) {
+                 /* remember longest match so far */
+                 matchValue=value;
+@@ -613,8 +620,9 @@
+                     /* partial match, continue */
+                     index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+                 } else {
+-                    if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+-                         FROM_U_USE_FALLBACK(useFallback, firstCP)
++                    if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
++                         FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
++                        (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
+                     ) {
+                         /* full match, stop with result */
+                         matchValue=value;
+@@ -632,8 +640,9 @@
+             return 0;
+         }
+     } else /* result from firstCP trie lookup */ {
+-        if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+-             FROM_U_USE_FALLBACK(useFallback, firstCP)
++        if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
++             FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
++            (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
+         ) {
+             /* full match, stop with result */
+             matchValue=value;
+@@ -644,20 +653,18 @@
+         }
+     }
+ 
+-    if(matchValue&UCNV_EXT_FROM_U_RESERVED_MASK) {
+-        /* do not interpret values with reserved bits used, for forward compatibility */
+-        return 0;
+-    }
+-
+     /* return result */
+     if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
+         return 1; /* assert matchLength==2 */
+     }
+ 
+-    *pMatchValue=UCNV_EXT_FROM_U_MASK_ROUNDTRIP(matchValue);
++    *pMatchValue=matchValue;
+     return matchLength;
+ }
+ 
++/*
++ * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
++ */
+ static U_INLINE void
+ ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
+                    uint32_t value,
+@@ -792,6 +799,10 @@
+     }
+ }
+ 
++/*
++ * Used by ISO 2022 implementation.
++ * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
++ */
+ U_CFUNC int32_t
+ ucnv_extSimpleMatchFromU(const int32_t *cx,
+                          UChar32 cp, uint32_t *pValue,
+@@ -809,13 +820,15 @@
+     if(match>=2) {
+         /* write result for simple, single-character conversion */
+         int32_t length;
+-        
++        int isRoundtrip;
++
++        isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
+         length=UCNV_EXT_FROM_U_GET_LENGTH(value);
+         value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
+ 
+         if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
+             *pValue=value;
+-            return length;
++            return isRoundtrip ? length : -length;
+ #if 0 /* not currently used */
+         } else if(length==4) {
+             /* de-serialize a 4-byte result */
+@@ -825,7 +838,7 @@
+                 ((uint32_t)result[1]<<16)|
+                 ((uint32_t)result[2]<<8)|
+                 result[3];
+-            return 4;
++            return isRoundtrip ? 4 : -4;
+ #endif
+         }
+     }
+diff -ru icu.orig/source/common/ucnv_ext.h icu/source/common/ucnv_ext.h
+--- icu.orig/source/common/ucnv_ext.h	2009-06-02 11:48:38.000000000 +0100
++++ icu/source/common/ucnv_ext.h	2009-06-02 12:14:20.000000000 +0100
+@@ -452,7 +452,7 @@
+ #define UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)!=0)
+ #define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)
+ 
+-/* use after masking off the roundtrip flag */
++/* get length; masks away all other bits */
+ #define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
+ 
+ /* get bytes or bytes index */
+diff -ru icu.orig/source/common/ucnvmbcs.c icu/source/common/ucnvmbcs.c
+--- icu.orig/source/common/ucnvmbcs.c	2009-06-02 11:48:38.000000000 +0100
++++ icu/source/common/ucnvmbcs.c	2009-06-02 12:14:20.000000000 +0100
+@@ -3785,7 +3785,8 @@
+ 
+     cx=sharedData->mbcs.extIndexes;
+     if(cx!=NULL) {
+-        return ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
++        length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
++        return length>=0 ? length : -length;  /* return abs(length); */
+     }
+ 
+     /* unassigned */
+diff -ru icu.orig/source/test/testdata/conversion.txt icu/source/test/testdata/conversion.txt
+--- icu.orig/source/test/testdata/conversion.txt	2009-06-02 11:48:26.000000000 +0100
++++ icu/source/test/testdata/conversion.txt	2009-06-02 12:14:20.000000000 +0100
+@@ -495,6 +495,46 @@
+         }
+         { "UTF-16BE", :bin{ 00 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00 } }
+         { "UTF-16BE", :bin{ d800dc }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ d800dc } }
++        // Verify that mappings that would result in byte values outside 20..7F (for SBCS)
++        // or 21..7E (for DBCS) are not used.
++        // ibm-9005_X110-2007.ucm (ISO 8859-7, <ESC>.F=1b2e46):
++        //   <U009F> \x9F |0 (also in ISO 8859-1)
++        //   <U0387> \xB7 |1
++        // windows-949-2000 (KSC_5601, <ESC>$(C=1b242843):
++        //   <UC829> \xA0\xA1 |0
++        //   <UD4FE> \xC0\x41 |0
++        //   <UD79D> \xC8\xFE |0
++        {
++          "JIS8",  // =ISO_2022,locale=ja,version=4
++          "\u009f\u0387\uc829\ud4fe\ud79d",
++          :bin{       1a1b2e461b4e371a1a1b242843487e1b2842 },
++          :intvector{ 0,1,1,1,1,1,1,2,3,4,4,4,4,4,4,4,4,4 },
++          :int{1}, :int{1}, "", "?", ""
++        }
++        // Ticket 5483: ISO 2022 converter incorrectly using fallback mapping
++        // Verify that a roundtrip mapping is used even when a fallback mapping is
++        // available in the current state.
++        //   U+FF61 is handled in code
++        // jisx-208.ucm (<ESC>$B=1b2442):
++        //   <U30FE> \x21\x34 |0
++        //   <UFF5D> \x21\x51 |0  and
++        // ibm-897_P100-1995.ucm (JIS X 0201, <ESC>(J=1b284a):
++        //   <UFF5D> \x7D |1
++        // ibm-9005_X110-2007.ucm (ISO 8859-7, <ESC>.F=1b2e46):
++        //   <U03D5> \xF6 |1
++        //   <U2015> \xAF |0
++        //   <UFF5D> \x7D |1 (not legal for ISO 2022)
++        // windows-949-2000 (KSC_5601, <ESC>$(C=1b242843):
++        //   <UAC00> \xB0\xA1 |0
++        //   <UFF5D> \xA3\xFD |0
++        //   <U223C> \xA1\xAD |0 (in extension table)
++        {
++          "JIS8",  // =ISO_2022,locale=ja,version=4
++          "a\uff61\u03d5\uff5d\uac00\u223c\uff5d\u30fe\uff5d",  // Make it switch to ISO-8859-7, KSC 5601 and JIS X 0208.
++          :bin{       61a11b2e461b4e761b244221511b2428433021212d237d1b2442213421511b2842 },
++          :intvector{ 0,1,2,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,4,5,5,6,6,7,7,7,7,7,8,8,8,8,8 },
++          :int{1}, :int{1}, "", "?", ""
++        }
+ 
+         // e4b8 is a partial sequence
+         { "UTF-8", :bin{ 31e4ba8ce4b8 }, "1\u4e8c", :intvector{ 0, 1 }, :int{1}, :int{0}, "truncated", ".", :bin{ e4b8 } }
diff --git a/icu.icu5488.assamese.patch b/icu.icu5488.assamese.patch
new file mode 100644
index 0000000..8b5d773
--- /dev/null
+++ b/icu.icu5488.assamese.patch
@@ -0,0 +1,11 @@
+--- icu.orig/source/layout/IndicClassTables.cpp	2006-10-18 09:05:20.000000000 +0100
++++ icu/source/layout/IndicClassTables.cpp	2006-11-01 09:26:58.000000000 +0000
+@@ -94,7 +94,7 @@
+     _dr, _db, _db, _db, _db, _xx, _xx, _l1, _dl, _xx, _xx, _s1, _s2, _vr, _xx, _xx, // 09C0 - 09CF
+     _xx, _xx, _xx, _xx, _xx, _xx, _xx, _m2, _xx, _xx, _xx, _xx, _cn, _cn, _xx, _cn, // 09D0 - 09DF
+     _iv, _iv, _dv, _dv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 09E0 - 09EF
+-    _ct, _ct, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx                           // 09F0 - 09FA
++    _rv, _ct, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx                           /* 09F0 - 09FA */
+ };
+ 
+ static const IndicClassTable::CharClass punjCharClasses[] =
diff --git a/icu.icu5500.devicetablecrash.patch b/icu.icu5500.devicetablecrash.patch
new file mode 100644
index 0000000..16ea5b7
--- /dev/null
+++ b/icu.icu5500.devicetablecrash.patch
@@ -0,0 +1,11 @@
+--- icu.orig/source/layout/DeviceTables.cpp	2006-10-18 09:05:20.000000000 +0100
++++ icu/source/layout/DeviceTables.cpp	2006-11-08 09:08:09.000000000 +0000
+@@ -22,7 +22,7 @@
+     le_uint16 format = SWAPW(deltaFormat) - 1;
+     le_int16 result = 0;
+     
+-    if (ppem >= start && ppem <= SWAPW(endSize)) {
++    if (ppem >= start && ppem <= SWAPW(endSize) && format < sizeof(fieldBits)/sizeof(fieldBits[0])) {
+         le_uint16 sizeIndex = ppem - start;
+         le_uint16 bits = fieldBits[format];
+         le_uint16 count = 16 / bits;
diff --git a/icu.icu5501.sinhala.biggerexpand.patch b/icu.icu5501.sinhala.biggerexpand.patch
new file mode 100644
index 0000000..6013780
--- /dev/null
+++ b/icu.icu5501.sinhala.biggerexpand.patch
@@ -0,0 +1,11 @@
+--- icu.orig/source/layout/IndicClassTables.cpp	2006-10-18 09:05:20.000000000 +0100
++++ icu/source/layout/IndicClassTables.cpp	2006-11-08 11:20:55.000000000 +0000
+@@ -284,7 +284,7 @@
+ 
+ static const IndicClassTable mlymClassTable = {0x0D00, 0x0D6F, 3, MLYM_SCRIPT_FLAGS, mlymCharClasses, mlymSplitTable};
+ 
+-static const IndicClassTable sinhClassTable = {0x0D80, 0x0DF4, 3, SINH_SCRIPT_FLAGS, sinhCharClasses, sinhSplitTable};
++static const IndicClassTable sinhClassTable = {0x0D80, 0x0DF4, 4, SINH_SCRIPT_FLAGS, sinhCharClasses, sinhSplitTable};
+ 
+ //
+ // IndicClassTable addresses
diff --git a/icu.icu5506.multiplevowels.patch b/icu.icu5506.multiplevowels.patch
new file mode 100644
index 0000000..a58ec64
--- /dev/null
+++ b/icu.icu5506.multiplevowels.patch
@@ -0,0 +1,61 @@
+diff -ur icu.orig/source/layout/IndicReordering.cpp icu/source/layout/IndicReordering.cpp
+--- icu.orig/source/layout/IndicReordering.cpp	2006-11-10 09:42:44.000000000 +0000
++++ icu/source/layout/IndicReordering.cpp	2006-11-10 09:47:05.000000000 +0000
+@@ -395,7 +395,7 @@
+     {-1,  6,  1, -1, -1, -1, -1, -1, -1,  5,  9,  5,  5,  4, 12}, //  2 - consonant with nukta
+     {-1,  6,  1, -1, -1, -1, -1, -1,  2,  5,  9,  5,  5,  4, 12}, //  3 - consonant
+     {-1, -1, -1, -1, -1, -1,  3,  2, -1, -1, -1, -1, -1, -1,  7}, //  4 - consonant virama
+-    {-1,  6,  1, -1, -1, -1, -1, -1, -1,  5, -1, -1, -1, -1, -1}, //  5 - dependent vowels
++    {-1,  6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, //  5 - dependent vowels
+     {-1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, //  6 - vowel mark
+     {-1, -1, -1, -1, -1, -1,  3,  2, -1, -1, -1, -1, -1, -1, -1}, //  7 - consonant virama ZWJ, consonant ZWJ virama
+     {-1,  6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  4, -1}, //  8 - independent vowels that can take a virama
+@@ -423,6 +423,48 @@
+ 
+         state = stateTable[state][charClass & CF_CLASS_MASK];
+ 
++	/*for the components of split matra*/	
++	if ((charCount >= cursor + 3) &&
++	    (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCF && chars[cursor + 2] == 0x0DCA)) {  /*for 3 split matra of Sinhala*/
++	    return cursor + 3;
++	}        
++	else if ((charCount >= cursor + 3) &&
++	         (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CC2 && chars[cursor + 2] == 0x0CD5)) {  /*for 3 split matra of Kannada*/
++	    return cursor + 3;
++	}
++        /*for 2 split matra*/	
++	else if (charCount >= cursor + 2) {
++	        /*for Bengali*/
++            if ((chars[cursor] == 0x09C7 && chars[cursor + 1] == 0x09BE) ||	       
++	        (chars[cursor] == 0x09C7 && chars[cursor + 1] == 0x09D7) ||		
++	        /*for Oriya*/
++	        (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B3E) ||		
++	        (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B56) ||		
++	        (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B57) ||
++	        /*for Tamil*/
++	        (chars[cursor] == 0x0BC6 && chars[cursor + 1] == 0x0BBE) ||		
++	        (chars[cursor] == 0x0BC6 && chars[cursor + 1] == 0x0BD7) ||		
++	        (chars[cursor] == 0x0BC7 && chars[cursor + 1] == 0x0BBE) ||		
++	        /*for Malayalam*/
++	        (chars[cursor] == 0x0D46 && chars[cursor + 1] == 0x0D3E) ||	
++	        (chars[cursor] == 0x0D46 && chars[cursor + 1] == 0x0D57) ||		
++	        (chars[cursor] == 0x0D47 && chars[cursor + 1] == 0x0D3E) ||	
++	        /*for Sinhala*/
++	        (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCA) ||		
++	        (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCF) ||		
++	        (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DDF) ||		
++	        (chars[cursor] == 0x0DDC && chars[cursor + 1] == 0x0DCA) ||		
++	        /*for Telugu*/
++	        (chars[cursor] == 0x0C46 && chars[cursor + 1] == 0x0C56) ||	
++	        /*for Kannada*/
++	        (chars[cursor] == 0x0CBF && chars[cursor + 1] == 0x0CD5) ||		    
++	        (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CD5) ||		
++	        (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CD6) ||		
++	        (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CC2) ||		
++	        (chars[cursor] == 0x0CCA && chars[cursor + 1] == 0x0CD5))
++		    return cursor + 2;
++	}
++
+         if (state < 0) {
+             break;
+         }
diff --git a/icu.icu5557.safety.patch b/icu.icu5557.safety.patch
new file mode 100644
index 0000000..682caa1
--- /dev/null
+++ b/icu.icu5557.safety.patch
@@ -0,0 +1,14 @@
+--- icu.orig/source/layout/CoverageTables.cpp	2007-01-09 12:57:41.000000000 +0000
++++ icu/source/layout/CoverageTables.cpp	2007-01-09 12:59:09.000000000 +0000
+@@ -44,6 +44,11 @@
+     le_uint16 count = SWAPW(glyphCount);
+     le_uint8 bit = OpenTypeUtilities::highBit(count);
+     le_uint16 power = 1 << bit;
++
++    if (count == 0) {
++        return -1;
++    }
++
+     le_uint16 extra = count - power;
+     le_uint16 probe = power;
+     le_uint16 index = 0;
diff --git a/icu.icu5594.gujarati.patch b/icu.icu5594.gujarati.patch
new file mode 100644
index 0000000..b21418d
--- /dev/null
+++ b/icu.icu5594.gujarati.patch
@@ -0,0 +1,14 @@
+--- icu.orig/source/layout/IndicClassTables.cpp	2007-02-09 14:26:04.000000000 +0000
++++ icu/source/layout/IndicClassTables.cpp	2007-02-13 15:41:52.000000000 +0000
+@@ -117,7 +117,11 @@
+     _rv, _xx, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _nu, _xx, _dr, _dl, // 0AB0 - 0ABF
+     _dr, _db, _db, _db, _db, _da, _xx, _da, _da, _dr, _xx, _dr, _dr, _vr, _xx, _xx, // 0AC0 - 0ACF
+     _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0AD0 - 0ADF
++#if 1
++    _iv, _xx, _db, _db, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx  // 0AE0 - 0AEF
++#else
+     _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx  // 0AE0 - 0AEF
++#endif
+ };
+ 
+ #if 1
diff --git a/icu.icu5691.backport.patch b/icu.icu5691.backport.patch
new file mode 100644
index 0000000..906ecd3
--- /dev/null
+++ b/icu.icu5691.backport.patch
@@ -0,0 +1,730 @@
+diff -ru icu.6175/source/common/ucnv2022.c icu/source/common/ucnv2022.c
+--- icu.6175/source/common/ucnv2022.c	2009-06-02 15:47:31.000000000 +0100
++++ icu/source/common/ucnv2022.c	2009-06-02 16:03:15.000000000 +0100
+@@ -754,6 +754,7 @@
+     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
+     uint32_t key = myData2022->key;
+     int32_t offset = 0;
++    int8_t initialToULength = _this->toULength;
+     char c;
+ 
+     value = VALID_NON_TERMINAL_2022;
+@@ -806,7 +807,6 @@
+         return;
+     } else if (value == INVALID_2022 ) {
+         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+-        return;
+     } else /* value == VALID_TERMINAL_2022 */ {
+         switch(var){
+ #ifdef U_ENABLE_GENERIC_ISO_2022
+@@ -938,6 +938,35 @@
+     }
+     if(U_SUCCESS(*err)) {
+         _this->toULength = 0;
++    } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
++        if(_this->toULength>1) {
++            /*
++             * Ticket 5691: consistent illegal sequences:
++             * - We include at least the first byte (ESC) in the illegal sequence.
++             * - If any of the non-initial bytes could be the start of a character,
++             *   we stop the illegal sequence before the first one of those.
++             *   In escape sequences, all following bytes are "printable", that is,
++             *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
++             *   they are valid single/lead bytes.
++             *   For simplicity, we always only report the initial ESC byte as the
++             *   illegal sequence and back out all other bytes we looked at.
++             */
++            /* Back out some bytes. */
++            int8_t backOutDistance=_this->toULength-1;
++            int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
++            if(backOutDistance<=bytesFromThisBuffer) {
++                /* same as initialToULength<=1 */
++                *source-=backOutDistance;
++            } else {
++                /* Back out bytes from the previous buffer: Need to replay them. */
++                _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
++                /* same as -(initialToULength-1) */
++                /* preToULength is negative! */
++                uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
++                *source-=bytesFromThisBuffer;
++            }
++            _this->toULength=1;
++        }
+     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
+         _this->toUCallbackReason = UCNV_UNASSIGNED;
+     }
+@@ -1973,6 +2002,7 @@
+         mySourceChar = args->converter->toUBytes[0];
+         args->converter->toULength = 0;
+         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
++        targetUniChar = missingCharMarker;
+         goto getTrailByte;
+     }
+ 
+@@ -2102,17 +2132,44 @@
+                 default:
+                     /* G0 DBCS */
+                     if(mySource < mySourceLimit) {
+-                        char trailByte;
++                        int leadIsOk, trailIsOk;
++                        uint8_t trailByte;
+ getTrailByte:
+-                        trailByte = *mySource++;
+-                        if(cs == JISX208) {
+-                            _2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf);
+-                        } else {
+-                            tempBuf[0] = (char)mySourceChar;
+-                            tempBuf[1] = trailByte;
++                        trailByte = (uint8_t)*mySource;
++                        /*
++                         * Ticket 5691: consistent illegal sequences:
++                         * - We include at least the first byte in the illegal sequence.
++                         * - If any of the non-initial bytes could be the start of a character,
++                         *   we stop the illegal sequence before the first one of those.
++                         *
++                         * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
++                         * an ESC/SO/SI, we report only the first byte as the illegal sequence.
++                         * Otherwise we convert or report the pair of bytes.
++                         */
++                        leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
++                        trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
++                        if (leadIsOk && trailIsOk) {
++                            ++mySource;
++                            uint32_t tmpSourceChar = (mySourceChar << 8) | trailByte;
++                            if(cs == JISX208) {
++                                _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
++                                mySourceChar = tmpSourceChar;
++                            } else {
++                                /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
++                                mySourceChar = tmpSourceChar;
++                                if (cs == KSC5601) {
++                                    tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
++                                }
++                                tempBuf[0] = (char)(tmpSourceChar >> 8);
++                                tempBuf[1] = (char)(tmpSourceChar);
++                            }
++                            targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
++                        } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
++                            /* report a pair of illegal bytes if the second byte is not a DBCS starter */
++                            ++mySource;
++                            /* add another bit so that the code below writes 2 bytes in case of error */
++                            mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
+                         }
+-                        mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+-                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
+                     } else {
+                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+                         args->converter->toULength = 1;
+@@ -2254,7 +2311,12 @@
+             }
+             /* only DBCS or SBCS characters are expected*/
+             /* DB characters with high bit set to 1 are expected */
+-            if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){
++            if( length > 2 || length==0 ||
++                (length == 1 && targetByteUnit > 0x7f) ||
++                (length == 2 &&
++                    ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
++                    (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
++            ) {
+                 targetByteUnit=missingCharMarker;
+             }
+             if (targetByteUnit != missingCharMarker){
+@@ -2583,17 +2645,34 @@
+             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
+             if(myData->toU2022State.g == 1) {
+                 if(mySource < mySourceLimit) {
+-                    char trailByte;
++                    int leadIsOk, trailIsOk;
++                    uint8_t trailByte;
+ getTrailByte:
+-                    trailByte = *mySource++;
+-                    tempBuf[0] = (char)(mySourceChar + 0x80);
+-                    tempBuf[1] = (char)(trailByte + 0x80);
+-                    mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+-                    if((mySourceChar & 0x8080) == 0) {
++                    targetUniChar = missingCharMarker;
++                    trailByte = (uint8_t)*mySource;
++                    /*
++                     * Ticket 5691: consistent illegal sequences:
++                     * - We include at least the first byte in the illegal sequence.
++                     * - If any of the non-initial bytes could be the start of a character,
++                     *   we stop the illegal sequence before the first one of those.
++                     *
++                     * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
++                     * an ESC/SO/SI, we report only the first byte as the illegal sequence.
++                     * Otherwise we convert or report the pair of bytes.
++                     */
++                    leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
++                    trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
++                    if (leadIsOk && trailIsOk) {
++                        ++mySource;
++                        tempBuf[0] = (char)(mySourceChar + 0x80);
++                        tempBuf[1] = (char)(trailByte + 0x80);
+                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
+-                    } else {
+-                        /* illegal bytes > 0x7f */
+-                        targetUniChar = missingCharMarker;
++                        mySourceChar = (mySourceChar << 8) | trailByte;
++                    } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
++                        /* report a pair of illegal bytes if the second byte is not a DBCS starter */
++                        ++mySource;
++                        /* add another bit so that the code below writes 2 bytes in case of error */
++                        mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
+                     }
+                 } else {
+                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+@@ -2601,8 +2680,10 @@
+                     break;
+                 }
+             }
+-            else{
++            else if(mySourceChar <= 0x7f) {
+                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
++            } else {
++                targetUniChar = 0xffff;
+             }
+             if(targetUniChar < 0xfffe){
+                 if(args->offsets) {
+@@ -3099,6 +3180,7 @@
+         /* continue with a partial double-byte character */
+         mySourceChar = args->converter->toUBytes[0];
+         args->converter->toULength = 0;
++        targetUniChar = missingCharMarker;
+         goto getTrailByte;
+     }
+ 
+@@ -3178,29 +3260,50 @@
+                         UConverterSharedData *cnv;
+                         StateEnum tempState;
+                         int32_t tempBufLen;
+-                        char trailByte;
++                        int leadIsOk, trailIsOk;
++                        uint8_t trailByte;
+ getTrailByte:
+-                        trailByte = *mySource++;
+-                        tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
+-                        if(tempState > CNS_11643_0) {
+-                            cnv = myData->myConverterArray[CNS_11643];
+-                            tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
+-                            tempBuf[1] = (char) (mySourceChar);
+-                            tempBuf[2] = trailByte;
+-                            tempBufLen = 3;
+-
+-                        }else{
+-                            cnv = myData->myConverterArray[tempState];
+-                            tempBuf[0] = (char) (mySourceChar);
+-                            tempBuf[1] = trailByte;
+-                            tempBufLen = 2;
++                        trailByte = (uint8_t)*mySource;
++                        /*
++                         * Ticket 5691: consistent illegal sequences:
++                         * - We include at least the first byte in the illegal sequence.
++                         * - If any of the non-initial bytes could be the start of a character,
++                         *   we stop the illegal sequence before the first one of those.
++                         *
++                         * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
++                         * an ESC/SO/SI, we report only the first byte as the illegal sequence.
++                         * Otherwise we convert or report the pair of bytes.
++                         */
++                        leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
++                        trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
++                        if (leadIsOk && trailIsOk) {
++                            ++mySource;
++                            tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
++                            if(tempState >= CNS_11643_0) {
++                                cnv = myData->myConverterArray[CNS_11643];
++                                tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
++                                tempBuf[1] = (char) (mySourceChar);
++                                tempBuf[2] = (char) trailByte;
++                                tempBufLen = 3;
++
++                            }else{
++                                cnv = myData->myConverterArray[tempState];
++                                tempBuf[0] = (char) (mySourceChar);
++                                tempBuf[1] = (char) trailByte;
++                                tempBufLen = 2;
++                            }
++                            targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
++                            mySourceChar = (mySourceChar << 8) | trailByte;
++                        } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
++                            /* report a pair of illegal bytes if the second byte is not a DBCS starter */
++                            ++mySource;
++                            /* add another bit so that the code below writes 2 bytes in case of error */
++                            mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
+                         }
+-                        mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+                         if(pToU2022State->g>=2) {
+                             /* return from a single-shift state to the previous one */
+                             pToU2022State->g=pToU2022State->prevG;
+                         }
+-                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
+                     } else {
+                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+                         args->converter->toULength = 1;
+diff -ru icu.6175/source/common/ucnvhz.c icu/source/common/ucnvhz.c
+--- icu.6175/source/common/ucnvhz.c	2009-06-02 15:47:31.000000000 +0100
++++ icu/source/common/ucnvhz.c	2009-06-02 15:57:18.000000000 +0100
+@@ -196,10 +196,30 @@
+                      /* if the first byte is equal to TILDE and the trail byte
+                      * is not a valid byte then it is an error condition
+                      */
+-                    mySourceChar = 0x7e00 | mySourceChar;
+-                    targetUniChar = 0xffff;
++                    /*
++                     * Ticket 5691: consistent illegal sequences:
++                     * - We include at least the first byte in the illegal sequence.
++                     * - If any of the non-initial bytes could be the start of a character,
++                     *   we stop the illegal sequence before the first one of those.
++                     */
+                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
+-                    break;
++                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                    args->converter->toUBytes[0] = UCNV_TILDE;
++                    if( myData->isStateDBCS ?
++                            (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
++                            mySourceChar <= 0x7f
++                    ) {
++                        /* The current byte could be the start of a character: Back it out. */
++                        args->converter->toULength = 1;
++                        --mySource;
++                    } else {
++                        /* Include the current byte in the illegal sequence. */
++                        args->converter->toUBytes[1] = mySourceChar;
++                        args->converter->toULength = 2;
++                    }
++                    args->target = myTarget;
++                    args->source = mySource;
++                    return;
+                 }
+             } else if(myData->isStateDBCS) {
+                 if(args->converter->toUnicodeStatus == 0x00){
+@@ -215,19 +235,36 @@
+                 }
+                 else{
+                     /* trail byte */
++                    int leadIsOk, trailIsOk;
+                     uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
+-                    if( (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21) &&
+-                        (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21)
+-                    ) {
++                    targetUniChar = 0xffff;
++                    /*
++                     * Ticket 5691: consistent illegal sequences:
++                     * - We include at least the first byte in the illegal sequence.
++                     * - If any of the non-initial bytes could be the start of a character,
++                     *   we stop the illegal sequence before the first one of those.
++                     *
++                     * In HZ DBCS, if the second byte is in the 21..7e range,
++                     * we report only the first byte as the illegal sequence.
++                     * Otherwise we convert or report the pair of bytes.
++                     */
++                    leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
++                    trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
++                    if (leadIsOk && trailIsOk) {
+                         tempBuf[0] = (char) (leadByte+0x80) ;
+                         tempBuf[1] = (char) (mySourceChar+0x80);
+                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
+                             tempBuf, 2, args->converter->useFallback);
++                        mySourceChar= (leadByte << 8) | mySourceChar;
++                    } else if (trailIsOk) {
++                        /* report a single illegal byte and continue with the following DBCS starter byte */
++                        --mySource;
++                        mySourceChar = (int32_t)leadByte;
+                     } else {
+-                        targetUniChar = 0xffff;
++                        /* report a pair of illegal bytes if the second byte is not a DBCS starter */
++                        /* add another bit so that the code below writes 2 bytes in case of error */
++                        mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
+                     }
+-                    /* add another bit so that the code below writes 2 bytes in case of error */
+-                    mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
+                     args->converter->toUnicodeStatus =0x00;
+                 }
+             }
+diff -ru icu.6175/source/common/ucnvmbcs.c icu/source/common/ucnvmbcs.c
+--- icu.6175/source/common/ucnvmbcs.c	2009-06-02 15:47:31.000000000 +0100
++++ icu/source/common/ucnvmbcs.c	2009-06-02 15:56:07.000000000 +0100
+@@ -1697,6 +1697,65 @@
+     pArgs->offsets=offsets;
+ }
+ 
++static UBool
++hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
++    const int32_t *row=stateTable[state];
++    int32_t b, entry;
++    /* First test for final entries in this state for some commonly valid byte values. */
++    entry=row[0xa1];
++    if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
++        MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
++    ) {
++        return TRUE;
++    }
++    entry=row[0x41];
++    if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
++        MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
++    ) {
++        return TRUE;
++    }
++    /* Then test for final entries in this state. */
++    for(b=0; b<=0xff; ++b) {
++        entry=row[b];
++        if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
++            MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
++        ) {
++            return TRUE;
++        }
++    }
++    /* Then recurse for transition entries. */
++    for(b=0; b<=0xff; ++b) {
++        entry=row[b];
++        if( MBCS_ENTRY_IS_TRANSITION(entry) &&
++            hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
++        ) {
++            return TRUE;
++        }
++    }
++    return FALSE;
++}
++
++/*
++ * Is byte b a single/lead byte in this state?
++ * Recurse for transition states, because here we don't want to say that
++ * b is a lead byte if all byte sequences that start with b are illegal.
++ */
++static UBool
++isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
++    const int32_t *row=stateTable[state];
++    int32_t entry=row[b];
++    if(MBCS_ENTRY_IS_TRANSITION(entry)) {   /* lead byte */
++        return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
++    } else {
++        uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
++        if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
++            return FALSE;   /* SI/SO are illegal for DBCS-only conversion */
++        } else {
++            return action!=MBCS_STATE_ILLEGAL;
++        }
++    }
++}
++
+ U_CFUNC void
+ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                           UErrorCode *pErrorCode) {
+@@ -2052,6 +2111,34 @@
+             sourceIndex=nextSourceIndex;
+         } else if(U_FAILURE(*pErrorCode)) {
+             /* callback(illegal) */
++            if(byteIndex>1) {
++                /*
++                 * Ticket 5691: consistent illegal sequences:
++                 * - We include at least the first byte in the illegal sequence.
++                 * - If any of the non-initial bytes could be the start of a character,
++                 *   we stop the illegal sequence before the first one of those.
++                 */
++                UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
++                int8_t i;
++                for(i=1;
++                    i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
++                    ++i) {}
++                if(i<byteIndex) {
++                    /* Back out some bytes. */
++                    int8_t backOutDistance=byteIndex-i;
++                    int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
++                    byteIndex=i;  /* length of reported illegal byte sequence */
++                    if(backOutDistance<=bytesFromThisBuffer) {
++                        source-=backOutDistance;
++                    } else {
++                        /* Back out bytes from the previous buffer: Need to replay them. */
++                        cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
++                        /* preToULength is negative! */
++                        uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
++                        source=(const uint8_t *)pArgs->source;
++                    }
++                }
++            }
+             break;
+         } else /* unassigned sequences indicated with byteIndex>0 */ {
+             /* try an extension mapping */
+@@ -2062,7 +2149,7 @@
+                               &offsets, sourceIndex,
+                               pArgs->flush,
+                               pErrorCode);
+-            sourceIndex=nextSourceIndex+(int32_t)(source-(const uint8_t *)pArgs->source);
++            sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
+ 
+             if(U_FAILURE(*pErrorCode)) {
+                 /* not mappable or buffer overflow */
+@@ -2353,15 +2440,37 @@
+ 
+     if(c<0) {
+         if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
+-            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+-        }
+-        if(U_FAILURE(*pErrorCode)) {
+             /* incomplete character byte sequence */
+             uint8_t *bytes=cnv->toUBytes;
+             cnv->toULength=(int8_t)(source-lastSource);
+             do {
+                 *bytes++=*lastSource++;
+             } while(lastSource<source);
++            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
++        } else if(U_FAILURE(*pErrorCode)) {
++            /* callback(illegal) */
++            /*
++             * Ticket 5691: consistent illegal sequences:
++             * - We include at least the first byte in the illegal sequence.
++             * - If any of the non-initial bytes could be the start of a character,
++             *   we stop the illegal sequence before the first one of those.
++             */
++            UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
++            uint8_t *bytes=cnv->toUBytes;
++            *bytes++=*lastSource++;     /* first byte */
++            if(lastSource==source) {
++                cnv->toULength=1;
++            } else /* lastSource<source: multi-byte character */ {
++                int8_t i;
++                for(i=1;
++                    lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
++                    ++i
++                ) {
++                    *bytes++=*lastSource++;
++                }
++                cnv->toULength=i;
++                source=lastSource;
++            }
+         } else {
+             /* no output because of empty input or only state changes */
+             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+diff -ru icu.6175/source/test/cintltst/nccbtst.c icu/source/test/cintltst/nccbtst.c
+--- icu.6175/source/test/cintltst/nccbtst.c	2009-06-02 15:47:18.000000000 +0100
++++ icu/source/test/cintltst/nccbtst.c	2009-06-02 15:47:38.000000000 +0100
+@@ -2497,13 +2497,13 @@
+ 
+ 
+     static const uint8_t text943[] = {
+-        0x82, 0xa9, 0x82, 0x20, /*0xc8,*/  0x61, 0x8a, 0xbf, 0x8e, 0x9a };
+-    static const UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22,  0x5b57};
+-    static const UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22,  0x5b57};
++        0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
++    static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
++    static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
+     static const UChar toUnicode943stop[]= { 0x304b};
+ 
+-    static const int32_t  fromIBM943Offssub[]  = {0, 2, 4, 5, 7};
+-    static const int32_t  fromIBM943Offsskip[] = { 0, 4, 5, 7};
++    static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
++    static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
+     static const int32_t  fromIBM943Offsstop[] = { 0};
+ 
+     gInBufferSize = inputsize;
+@@ -2537,9 +2537,9 @@
+ {
+     static const uint8_t sampleText[] = {
+         0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
+-        0xff, /*0x82, 0xa9,*/ 0x32, 0x33};
+-    static const UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063,  0xfffd,/*0x304b,*/ 0x0032, 0x0033};
+-    static const int32_t  fromIBM943Offssub[]  = {0, 2, 3, 4, 5, 7, 8};
++        0xff, 0x32, 0x33};
++    static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
++    static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
+     /*checking illegal value for ibm-943 with substitute*/ 
+     gInBufferSize = inputsize;
+     gOutBufferSize = outputsize;
+diff -ru icu.6175/source/test/cintltst/nucnvtst.c icu/source/test/cintltst/nucnvtst.c
+--- icu.6175/source/test/cintltst/nucnvtst.c	2009-06-02 15:47:18.000000000 +0100
++++ icu/source/test/cintltst/nucnvtst.c	2009-06-02 15:47:38.000000000 +0100
+@@ -2606,7 +2606,7 @@
+     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
+     /*Test for the condition where there is an invalid character*/
+     {
+-        static const uint8_t source2[]={0xa1, 0x01};
++        static const uint8_t source2[]={0xa1, 0x80};
+         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
+     }
+     /*Test for the condition where we have a truncated char*/
+@@ -3899,11 +3899,11 @@
+ TestISO_2022_KR() {
+     /* test input */
+     static const uint16_t in[]={
+-                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
+-                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
++                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
++                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
+                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
+                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
+-                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
++                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
+                    ,0x53E3,0x53E4,0x000A,0x000D};
+     const UChar* uSource;
+     const UChar* uSourceLimit;
+diff -ru icu.6175/source/test/testdata/conversion.txt icu/source/test/testdata/conversion.txt
+--- icu.6175/source/test/testdata/conversion.txt	2009-06-02 15:47:18.000000000 +0100
++++ icu/source/test/testdata/conversion.txt	2009-06-02 15:57:41.000000000 +0100
+@@ -48,12 +48,144 @@
+     toUnicode {
+       Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
+       Cases {
++        // Test ticket 5691: consistent illegal sequences
++        // The following test cases are for illegal character byte sequences.
++        //
++        // Unfortunately, we cannot use the Shift-JIS examples from the ticket
++        // comments because our Shift-JIS table is Windows-compatible and
++        // therefore has no illegal single bytes. Same for GBK.
++        // Instead, we use the stricter GB 18030 also for 2-byte examples.
++        // The byte sequences are generally slightly different from the ticket
++        // comment, simply using assigned characters rather than just
++        // theoretically valid sequences.
++        {
++          "gb18030",
++          :bin{ 618140813c81ff7a },
++          "a\u4e02\\x81<\\x81\\xFFz",
++          :intvector{ 0,1,3,3,3,3,4,5,5,5,5,5,5,5,5,7 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "EUC-JP",
++          :bin{ 618fb0a98fb03c8f3cb0a97a },
++          "a\u4e28\\x8F\\xB0<\\x8F<\u9022z",
++          :intvector{ 0,1,4,4,4,4,5,5,5,5,6,7,7,7,7,8,9,11 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "gb18030",
++          :bin{ 618130fc318130fc8181303c3e813cfc817a },
++          "a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z",
++          :intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "UTF-8",
++          :bin{ 61f1808182f180813cf18081fff180ff3cf1ff3c3e7a },
++          "a\U00040042\\xF1\\x80\\x81<\\xF1\\x80\\x81\\xFF\\xF1\\x80\\xFF<\\xF1\\xFF<>z",
++          :intvector{ 0,1,1,5,5,5,5,5,5,5,5,5,5,5,5,8,9,9,9,9,9,9,9,9,9,9,9,9,12,12,12,12,13,13,13,13,13,13,13,13,15,15,15,15,16,17,17,17,17,18,18,18,18,19,20,21 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-JP",
++          :bin{ 1b24424141af4142affe41431b2842 },
++          "\u758f\\xAF\u758e\\xAF\\xFE\u790e",
++          :intvector{ 3,5,5,5,5,6,8,8,8,8,8,8,8,8,10 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ibm-25546",
++          :bin{ 411b242943420e4141af4142affe41430f5a },
++          "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ",
++          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-KR",
++          :bin{ 411b242943420e4141af4142affe41430f5a },
++          "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ",
++          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-CN",
++          :bin{ 411b242941420e4141af4142affe41430f5a },
++          "AB\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z",
++          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "HZ",
++          :bin{ 417e7b4141af4142affe41437e7d5a },
++          "A\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z",
++          :intvector{ 0,3,5,5,5,5,6,8,8,8,8,8,8,8,8,10,14 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        // Test ticket 5691: consistent illegal sequences
++        // The following test cases are for illegal escape/designator/shift sequences.
++        //
++        // ISO-2022-JP and -CN with illegal escape sequences.
++        {
++          "ISO-2022-JP",
++          :bin{ 611b24201b244241411b283f1b28427a },
++          "a\\x1B$ \u758f\\x1B\u2538z",
++          :intvector{ 0,1,1,1,1,2,3,7,9,9,9,9,10,15 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-CN",
++          :bin{ 611b2429201b2429410e41410f7a },
++          "a\\x1B$) \u4eaez",
++          :intvector{ 0,1,1,1,1,2,3,4,10,13 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        // Test ticket 5691: ISO-2022-JP-2 with illegal single-shift SS2 and SS3 sequences.
++        // The first ESC N comes before its designator sequence, the last sequence is ESC+space.
++        {
++          "ISO-2022-JP-2",
++          :bin{ 4e1b4e4e1b2e414e1b4e4e4e1b204e },
++          "N\\x1BNNN\xceN\\x1B N",
++          :intvector{ 0,1,1,1,1,2,3,7,10,11,12,12,12,12,13,14 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-CN-EXT",
++          :bin{ 4e1b4e4e1b242a484e1b4e4e4e4e1b204e },
++          "N\\x1BNNN\u8f0eN\\x1B N",
++          :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-CN-EXT",
++          :bin{ 4f1b4f4f1b242b494f1b4f4f4f4f1b204f },
++          "O\\x1BOOO\u492bO\\x1B O",
++          :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        // Test ticket 5691: HZ with illegal tilde sequences.
++        {
++          "HZ",
++          :bin{ 417e20427e21437e80447e7b41417e207e41427e7f41437e7d5a },
++          "A\\x7E B\\x7E!C\\x7E\\x80D\u4eae\\x7E\\x20\\x7E\u8c05\\x7E\\x7F\u64a9Z",
++          :intvector{ 0,1,1,1,1,2,3,4,4,4,4,5,6,7,7,7,7,7,7,7,7,9,                          // SBCS
++                      12,14,14,14,14,14,14,14,14,16,16,16,16,17,19,19,19,19,19,19,19,19,21, // DBCS
++                      25 },                                                                 // SBCS
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        // Test ticket 5691: Example from Peter Edberg.
++        {
++          "ISO-2022-JP",
++          :bin{ 1b244230212f7e742630801b284a621b2458631b2842648061 },
++          "\u4e9c\ufffd\u7199\ufffdb\ufffd$Xcd\ufffda",
++          :intvector{ 3,5,7,9,14,15,16,17,18,22,23,24 },
++          :int{1}, :int{0}, "", "?", :bin{""}
++        }
+         // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e
+         {
+           "HZ",
+-          :bin{ 7e7b21212120217e217f772100007e217e7d207e7e807e0a2b },
+-          "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd ~\ufffd+",
+-          :intvector{ 2,4,6,8,10,12,14,18,19,21,24 },
++          :bin{ 7e7b21212120217e217f772100007e217e7e7d207e7e807e0a2b },
++          "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd\u3013 ~\ufffd+",
++          :intvector{ 2,4,6,8,10,12,14,15,19,20,22,25 },
+           :int{1}, :int{1}, "", "?", :bin{""}
+         }
+         // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
+@@ -61,8 +193,8 @@
+         {
+           "ISO-2022-JP",
+           :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 },
+-          "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e",
+-          :intvector{ 3,4,5,9,11,13,15,17,19,21,23,25,27 },
++          "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e",
++          :intvector{ 3,4,5,9,11,12,14,16,17,19,21,23,25,27 },
+           :int{1}, :int{1}, "", "?", :bin{""}
+         }
+         // improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets()
+@@ -341,7 +473,7 @@
+         {
+           "ISO-2022-CN-EXT",
+           :bin{ 411b4e2121 }, "\x41", :intvector{ 0 },
+-          :int{1}, :int{1}, "illesc", ".", :bin{ 1b4e }
++          :int{1}, :int{1}, "illesc", ".", :bin{ 1b }
+         }
+         // G3 designator: recognized, but not supported for -CN (only for -CN-EXT)
+         {
diff --git a/icu.icu5797.backport.patch b/icu.icu5797.backport.patch
new file mode 100644
index 0000000..39e3f77
--- /dev/null
+++ b/icu.icu5797.backport.patch
@@ -0,0 +1,749 @@
+diff -ru icu.5483/source/common/ucnv2022.c icu/source/common/ucnv2022.c
+--- icu.5483/source/common/ucnv2022.c	2009-06-02 12:47:41.000000000 +0100
++++ icu/source/common/ucnv2022.c	2009-06-02 13:18:23.000000000 +0100
+@@ -473,8 +473,7 @@
+             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
+                 myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode);
+             }
+-            myConverterData->myConverterArray[JISX201]      = ucnv_loadSharedData("JISX0201", NULL, errorCode);
+-            myConverterData->myConverterArray[JISX208]      = ucnv_loadSharedData("jisx-208", NULL, errorCode);
++            myConverterData->myConverterArray[JISX208]      = ucnv_loadSharedData("Shift-JIS", NULL, errorCode);
+             if(jpCharsetMasks[version]&CSM(JISX212)) {
+                 myConverterData->myConverterArray[JISX212]  = ucnv_loadSharedData("jisx-212", NULL, errorCode);
+             }
+@@ -1045,14 +1044,6 @@
+                 length=3;
+             }
+         }
+-        /*
+-         * TODO(markus): Use Shift-JIS table for JIS X 0208, to save mapping table space.
+-         * Pass in parameter for type of output bytes, for validation and shifting:
+-         * - Direct: Pass bytes through, but forbid control codes 00-1F (except SI/SO/ESC) and space 20?
+-         *   (Need to allow some (TAB/LF/CR) or most of them for ASCII and maybe JIS X 0201.)
+-         * - A1-FE: Subtract 80 after range check.
+-         * - SJIS: Shift DBCS result to 21-7E x 21-7E.
+-         */
+         /* is this code point assigned, or do we use fallbacks? */
+         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
+             /* assigned */
+@@ -1110,6 +1101,23 @@
+     }
+ }
+ 
++/*
++ * Check that the result is a 2-byte value with each byte in the range A1..FE
++ * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
++ * to move it to the ISO 2022 range 21..7E.
++ * Return 0 if out of range.
++ */
++static U_INLINE uint32_t
++_2022FromGR94DBCS(uint32_t value) {
++    if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
++        (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
++    ) {
++        return value - 0x8080;  /* shift down to 21..7e byte range */
++    } else {
++        return 0;  /* not valid for ISO 2022 */
++    }
++}
++
+ #ifdef U_ENABLE_GENERIC_ISO_2022
+ 
+ /**********************************************************************************
+@@ -1238,7 +1246,7 @@
+     }
+     else{
+         cnv->toUBytes[0] =(char) sourceChar;
+-        cnv->toULength = 2;
++        cnv->toULength = 1;
+     }
+ 
+     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
+@@ -1332,6 +1340,181 @@
+     3  /* length of <ESC>(I  HWKANA_7BIT */
+ };
+ 
++/* Map 00..7F to Unicode according to JIS X 0201. */
++static U_INLINE uint32_t
++jisx201ToU(uint32_t value) {
++    if(value < 0x5c) {
++        return value;
++    } else if(value == 0x5c) {
++        return 0xa5;
++    } else if(value == 0x7e) {
++        return 0x203e;
++    } else /* value <= 0x7f */ {
++        return value;
++    }
++}
++
++/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
++static U_INLINE uint32_t
++jisx201FromU(uint32_t value) {
++    if(value<=0x7f) {
++        if(value!=0x5c && value!=0x7e) {
++            return value;
++        }
++    } else if(value==0xa5) {
++        return 0x5c;
++    } else if(value==0x203e) {
++        return 0x7e;
++    }
++    return 0xfffe;
++}
++
++/*
++ * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
++ * to JIS X 0208, and convert it to a pair of 21..7E bytes.
++ * Return 0 if the byte pair is out of range.
++ */
++static U_INLINE uint32_t
++_2022FromSJIS(uint32_t value) {
++    uint8_t trail;
++
++    if(value > 0xEFFC) {
++        return 0;  /* beyond JIS X 0208 */
++    }
++
++    trail = (uint8_t)value;
++
++    value &= 0xff00;  /* lead byte */
++    if(value <= 0x9f00) {
++        value -= 0x7000;
++    } else /* 0xe000 <= value <= 0xef00 */ {
++        value -= 0xb000;
++    }
++    value <<= 1;
++
++    if(trail <= 0x9e) {
++        value -= 0x100;
++        if(trail <= 0x7e) {
++            value |= trail - 0x1f;
++        } else {
++            value |= trail - 0x20;
++        }
++    } else /* trail <= 0xfc */ {
++        value |= trail - 0x7e;
++    }
++    return value;
++}
++
++/*
++ * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
++ * If either byte is outside 21..7E make sure that the result is not valid
++ * for Shift-JIS so that the converter catches it.
++ * Some invalid byte values already turn into equally invalid Shift-JIS
++ * byte values and need not be tested explicitly.
++ */
++static U_INLINE void
++_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
++    if(c1&1) {
++        ++c1;
++        if(c2 <= 0x5f) {
++            c2 += 0x1f;
++        } else if(c2 <= 0x7e) {
++            c2 += 0x20;
++        } else {
++            c2 = 0;  /* invalid */
++        }
++    } else {
++        if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
++            c2 += 0x7e;
++        } else {
++            c2 = 0;  /* invalid */
++        }
++    }
++    c1 >>= 1;
++    if(c1 <= 0x2f) {
++        c1 += 0x70;
++    } else if(c1 <= 0x3f) {
++        c1 += 0xb0;
++    } else {
++        c1 = 0;  /* invalid */
++    }
++    bytes[0] = (char)c1;
++    bytes[1] = (char)c2;
++}
++
++/*
++ * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
++ * Katakana.
++ * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
++ * because Shift-JIS roundtrips half-width Katakana to single bytes.
++ * These were the only fallbacks in ICU's jisx-208.ucm file.
++ */
++static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
++    0x2123,  /* U+FF61 */
++    0x2156,
++    0x2157,
++    0x2122,
++    0x2126,
++    0x2572,
++    0x2521,
++    0x2523,
++    0x2525,
++    0x2527,
++    0x2529,
++    0x2563,
++    0x2565,
++    0x2567,
++    0x2543,
++    0x213C,  /* U+FF70 */
++    0x2522,
++    0x2524,
++    0x2526,
++    0x2528,
++    0x252A,
++    0x252B,
++    0x252D,
++    0x252F,
++    0x2531,
++    0x2533,
++    0x2535,
++    0x2537,
++    0x2539,
++    0x253B,
++    0x253D,
++    0x253F,  /* U+FF80 */
++    0x2541,
++    0x2544,
++    0x2546,
++    0x2548,
++    0x254A,
++    0x254B,
++    0x254C,
++    0x254D,
++    0x254E,
++    0x254F,
++    0x2552,
++    0x2555,
++    0x2558,
++    0x255B,
++    0x255E,
++    0x255F,  /* U+FF90 */
++    0x2560,
++    0x2561,
++    0x2562,
++    0x2564,
++    0x2566,
++    0x2568,
++    0x2569,
++    0x256A,
++    0x256B,
++    0x256C,
++    0x256D,
++    0x256F,
++    0x2573,
++    0x212B,
++    0x212C   /* U+FF9F */
++};
++
+ /*
+ * The iteration over various code pages works this way:
+ * i)   Get the currentState from myConverterData->currentState
+@@ -1504,7 +1687,7 @@
+                     }
+                     break;
+                 case HWKANA_7BIT:
+-                    if((uint32_t)(HWKANA_END-sourceChar)<=(HWKANA_END-HWKANA_START)) {
++                    if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
+                         if(converterData->version==3) {
+                             /* JIS7: use G1 (SO) */
+                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
+@@ -1531,13 +1714,34 @@
+                     break;
+                 case JISX201:
+                     /* G0 SBCS */
+-                    len2 = MBCS_SINGLE_FROM_UCHAR32(
++                    value = jisx201FromU(sourceChar);
++                    if(value <= 0x7f) {
++                        targetValue = value;
++                        len = 1;
++                        cs = cs0;
++                        g = 0;
++                        useFallback = FALSE;
++                    }
++                    break;
++                case JISX208:
++                    /* G0 DBCS from Shift-JIS table */
++                    len2 = MBCS_FROM_UCHAR32_ISO2022(
+                                 converterData->myConverterArray[cs0],
+                                 sourceChar, &value,
+-                                useFallback);
+-                    if(len2 != 0 && !(len2 < 0 && len != 0) && value <= 0x7f) {
+-                        targetValue = value;
+-                        len = len2;
++                                useFallback, MBCS_OUTPUT_2);
++                    if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
++                        value = _2022FromSJIS(value);
++                        if(value != 0) {
++                            targetValue = value;
++                            len = len2;
++                            cs = cs0;
++                            g = 0;
++                            useFallback = FALSE;
++                        }
++                    } else if(len == 0 && useFallback &&
++                              (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
++                        targetValue = hwkana_fb[sourceChar - HWKANA_START];
++                        len = -2;
+                         cs = cs0;
+                         g = 0;
+                         useFallback = FALSE;
+@@ -1569,17 +1773,10 @@
+                              * Check for valid bytes for the encoding scheme.
+                              * This is necessary because the sub-converter (windows-949)
+                              * has a broader encoding scheme than is valid for 2022.
+-                             *
+-                             * Check that the result is a 2-byte value with each byte in the range A1..FE
+-                             * (strict EUC-KR DBCS) before accepting it and subtracting 0x80 from each byte
+-                             * to move it to the ISO 2022 range 21..7E.
+                              */
+-                            if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
+-                                (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
+-                            ) {
+-                                value -= 0x8080;  /* shift down to 21..7e byte range */
+-                            } else {
+-                                break;  /* not valid for ISO 2022 */
++                            value = _2022FromGR94DBCS(value);
++                            if(value == 0) {
++                                break;
+                             }
+                         }
+                         targetValue = value;
+@@ -1755,7 +1952,7 @@
+ static void 
+ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
+                                                UErrorCode* err){
+-    char tempBuf[3];
++    char tempBuf[2];
+     const char *mySource = (char *) args->source;
+     UChar *myTarget = args->target;
+     const char *mySourceLimit = args->sourceLimit;
+@@ -1893,10 +2090,7 @@
+                     break;
+                 case JISX201:
+                     if(mySourceChar <= 0x7f) {
+-                        targetUniChar =
+-                            _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
+-                                myData->myConverterArray[cs],
+-                                mySourceChar);
++                        targetUniChar = jisx201ToU(mySourceChar);
+                     }
+                     break;
+                 case HWKANA_7BIT:
+@@ -1910,8 +2104,13 @@
+                     if(mySource < mySourceLimit) {
+                         char trailByte;
+ getTrailByte:
+-                        tempBuf[0] = (char) (mySourceChar);
+-                        tempBuf[1] = trailByte = *mySource++;
++                        trailByte = *mySource++;
++                        if(cs == JISX208) {
++                            _2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf);
++                        } else {
++                            tempBuf[0] = (char)mySourceChar;
++                            tempBuf[1] = trailByte;
++                        }
+                         mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
+                     } else {
+@@ -3254,6 +3453,9 @@
+     /* open a set and initialize it with code points that are algorithmically round-tripped */
+     switch(cnvData->locale[0]){
+     case 'j':
++        /* include JIS X 0201 which is hardcoded */
++        sa->add(sa->set, 0xa5);
++        sa->add(sa->set, 0x203e);
+         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
+             /* include Latin-1 for some variants of JP */
+             sa->addRange(sa->set, 0, 0xff);
+@@ -3262,6 +3464,11 @@
+             sa->addRange(sa->set, 0, 0x7f);
+         }
+         if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
++            /*
++             * TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks,
++             * we need to include half-width Katakana for all JP variants because
++             * JIS X 0208 has hardcoded fallbacks for them.
++             */
+             /* include half-width Katakana for JP */
+             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
+         }
+@@ -3281,15 +3488,7 @@
+         break;
+     }
+ 
+-    /*
+-     * Version-specific for CN:
+-     * CN version 0 does not map CNS planes 3..7 although
+-     * they are all available in the CNS conversion table;
+-     * CN version 1 does map them all.
+-     * The two versions create different Unicode sets.
+-     */
+-    for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
+-        if(cnvData->myConverterArray[i]!=NULL) {
++#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
+             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+                 cnvData->version==0 && i==CNS_11643
+             ) {
+@@ -3299,9 +3498,33 @@
+                         sa, UCNV_ROUNDTRIP_SET,
+                         0, 0x81, 0x82,
+                         pErrorCode);
++            }
++#endif
++
++    for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
++        UConverterSetFilter filter;
++        if(cnvData->myConverterArray[i]!=NULL) {
++            if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
++                cnvData->version==0 && i==CNS_11643
++            ) {
++                /*
++                 * Version-specific for CN:
++                 * CN version 0 does not map CNS planes 3..7 although
++                 * they are all available in the CNS conversion table;
++                 * CN version 1 (-EXT) does map them all.
++                 * The two versions create different Unicode sets.
++                 */
++                filter=UCNV_SET_FILTER_2022_CN;
++            } else if(cnvData->locale[0]=='j' && i==JISX208) {
++                /*
++                 * Only add code points that map to Shift-JIS codes
++                 * corresponding to JIS X 0208.
++                 */
++                filter=UCNV_SET_FILTER_SJIS;
+             } else {
+-                ucnv_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode);
++                filter=UCNV_SET_FILTER_NONE;
+             }
++            ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
+         }
+     }
+ 
+diff -ru icu.5483/source/common/ucnvmbcs.c icu/source/common/ucnvmbcs.c
+--- icu.5483/source/common/ucnvmbcs.c	2009-06-02 12:47:41.000000000 +0100
++++ icu/source/common/ucnvmbcs.c	2009-06-02 12:48:08.000000000 +0100
+@@ -340,6 +340,8 @@
+ 
+ /* Miscellaneous ------------------------------------------------------------ */
+ 
++#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
++
+ /* similar to ucnv_MBCSGetNextUChar() but recursive */
+ static void
+ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
+@@ -432,11 +434,14 @@
+         pErrorCode);
+ }
+ 
++#endif
++
+ U_CFUNC void
+-ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+-                             const USetAdder *sa,
+-                             UConverterUnicodeSet which,
+-                             UErrorCode *pErrorCode) {
++ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
++                                         const USetAdder *sa,
++                                         UConverterUnicodeSet which,
++                                         UConverterSetFilter filter,
++                                         UErrorCode *pErrorCode) {
+     const UConverterMBCSTable *mbcsTable;
+     const uint16_t *table;
+ 
+@@ -490,50 +495,26 @@
+                 c+=1024; /* empty stage 2 block */
+             }
+         }
+-    } else if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY) {
+-        /* ignore single-byte results */
++    } else {
+         const uint32_t *stage2;
+-        const uint16_t *stage3, *results;
+-
+-        results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
+-
+-        for(st1=0; st1<maxStage1; ++st1) {
+-            st2=table[st1];
+-            if(st2>(maxStage1>>1)) {
+-                stage2=(const uint32_t *)table+st2;
+-                for(st2=0; st2<64; ++st2) {
+-                    if((st3=stage2[st2])!=0) {
+-                        /* read the stage 3 block */
+-                        stage3=results+16*(uint32_t)(uint16_t)st3;
++        const uint8_t *stage3, *bytes;
++        uint32_t st3Multiplier;
++        uint32_t value;
+ 
+-                        /* get the roundtrip flags for the stage 3 block */
+-                        st3>>=16;
++        bytes=mbcsTable->fromUnicodeBytes;
+ 
+-                        /*
+-                         * Add code points for which the roundtrip flag is set.
+-                         * Once we get a set for fallback mappings, we have to check
+-                         * non-roundtrip stage 3 results for whether they are 0.
+-                         * See ucnv_MBCSFromUnicodeWithOffsets() for details.
+-                         *
+-                         * Ignore single-byte results (<0x100).
+-                         */
+-                        do {
+-                            if((st3&1)!=0 && *stage3>=0x100) {
+-                                sa->add(sa->set, c);
+-                            }
+-                            st3>>=1;
+-                            ++stage3;
+-                        } while((++c&0xf)!=0);
+-                    } else {
+-                        c+=16; /* empty stage 3 block */
+-                    }
+-                }
+-            } else {
+-                c+=1024; /* empty stage 2 block */
+-            }
++        switch(mbcsTable->outputType) {
++        case MBCS_OUTPUT_3:
++        case MBCS_OUTPUT_4_EUC:
++            st3Multiplier=3;
++            break;
++        case MBCS_OUTPUT_4:
++            st3Multiplier=4;
++            break;
++        default:
++            st3Multiplier=2;
++            break;
+         }
+-    } else {
+-        const uint32_t *stage2;
+ 
+         for(st1=0; st1<maxStage1; ++st1) {
+             st2=table[st1];
+@@ -541,6 +522,9 @@
+                 stage2=(const uint32_t *)table+st2;
+                 for(st2=0; st2<64; ++st2) {
+                     if((st3=stage2[st2])!=0) {
++                        /* read the stage 3 block */
++                        stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3;
++
+                         /* get the roundtrip flags for the stage 3 block */
+                         st3>>=16;
+ 
+@@ -550,12 +534,49 @@
+                          * non-roundtrip stage 3 results for whether they are 0.
+                          * See ucnv_MBCSFromUnicodeWithOffsets() for details.
+                          */
+-                        do {
+-                            if(st3&1) {
+-                                sa->add(sa->set, c);
+-                            }
+-                            st3>>=1;
+-                        } while((++c&0xf)!=0);
++                        switch(filter) {
++                        case UCNV_SET_FILTER_NONE:
++                            do {
++                                if(st3&1) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_DBCS_ONLY:
++                             /* Ignore single-byte results (<0x100). */
++                            do {
++                                if((st3&1)!=0 && *((const uint16_t *)stage3)>=0x100) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=2;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_2022_CN:
++                             /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
++                            do {
++                                if((st3&1)!=0 && ((value=*stage3)==0x81 || value==0x82)) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=3;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_SJIS:
++                             /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
++                            do {
++                                if((st3&1)!=0 && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=2;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        default:
++                            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
++                            return;
++                        }
+                     } else {
+                         c+=16; /* empty stage 3 block */
+                     }
+@@ -569,6 +590,19 @@
+     ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode);
+ }
+ 
++U_CFUNC void
++ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
++                                 const USetAdder *sa,
++                                 UConverterUnicodeSet which,
++                                 UErrorCode *pErrorCode) {
++    ucnv_MBCSGetFilteredUnicodeSetForUnicode(
++        sharedData, sa, which,
++        sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
++            UCNV_SET_FILTER_DBCS_ONLY :
++            UCNV_SET_FILTER_NONE,
++        pErrorCode);
++}
++
+ static void
+ ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
+                    const USetAdder *sa,
+diff -ru icu.5483/source/common/ucnvmbcs.h icu/source/common/ucnvmbcs.h
+--- icu.5483/source/common/ucnvmbcs.h	2009-06-02 12:47:41.000000000 +0100
++++ icu/source/common/ucnvmbcs.h	2009-06-02 12:48:08.000000000 +0100
+@@ -363,6 +363,7 @@
+ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                           UErrorCode *pErrorCode);
+ 
++#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
+ /*
+  * Internal function returning a UnicodeSet for toUnicode() conversion.
+  * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
+@@ -377,6 +378,7 @@
+                            UConverterUnicodeSet which,
+                            uint8_t state, int32_t lowByte, int32_t highByte,
+                            UErrorCode *pErrorCode);
++#endif
+ 
+ /*
+  * Internal function returning a UnicodeSet for toUnicode() conversion.
+@@ -388,9 +390,30 @@
+  */
+ U_CFUNC void
+ ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+-                             const USetAdder *sa,
+-                             UConverterUnicodeSet which,
+-                             UErrorCode *pErrorCode);
++                                 const USetAdder *sa,
++                                 UConverterUnicodeSet which,
++                                 UErrorCode *pErrorCode);
++
++typedef enum UConverterSetFilter {
++    UCNV_SET_FILTER_NONE,
++    UCNV_SET_FILTER_DBCS_ONLY,
++    UCNV_SET_FILTER_2022_CN,
++    UCNV_SET_FILTER_SJIS,
++    UCNV_SET_FILTER_COUNT
++} UConverterSetFilter;
++
++/*
++ * Same as ucnv_MBCSGetUnicodeSetForUnicode() but
++ * the set can be filtered by encoding scheme.
++ * Used by stateful converters which share regular conversion tables
++ * but only use a subset of their mappings.
++ */
++U_CFUNC void
++ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
++                                         const USetAdder *sa,
++                                         UConverterUnicodeSet which,
++                                         UConverterSetFilter filter,
++                                         UErrorCode *pErrorCode);
+ 
+ #endif
+ 
+diff -ru icu.5483/source/test/cintltst/nucnvtst.c icu/source/test/cintltst/nucnvtst.c
+--- icu.5483/source/test/cintltst/nucnvtst.c	2009-06-02 12:47:25.000000000 +0100
++++ icu/source/test/cintltst/nucnvtst.c	2009-06-02 12:58:02.000000000 +0100
+@@ -3202,7 +3202,7 @@
+         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
+         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
+         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
+-        0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
++        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
+         0x201D, 0x3014, 0x000D, 0x000A,
+         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
+         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
+@@ -3730,7 +3730,7 @@
+         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
+         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
+         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
+-        0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
++        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
+         0x201D, 0x000D, 0x000A,
+         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
+         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
+diff -ru icu.5483/source/test/cintltst/udatatst.c icu/source/test/cintltst/udatatst.c
+--- icu.5483/source/test/cintltst/udatatst.c	2009-06-02 12:47:25.000000000 +0100
++++ icu/source/test/cintltst/udatatst.c	2009-06-02 13:09:15.000000000 +0100
+@@ -1260,6 +1260,11 @@
+     {"gb18030",                  "cnv", ucnv_swap},
+     /* MBCS conversion table file with extension */
+     {"*test4x",                  "cnv", ucnv_swap},
++    /* 
++     * MBCS conversion table file without extension, 
++     * to test swapping and preflighting of UTF-8-friendly mbcsIndex[]. 
++     */ 
++    {"jisx-212",                 "cnv", ucnv_swap}, 
+ #endif
+ 
+ #if !UCONFIG_NO_CONVERSION
+diff -ru icu.5483/source/test/testdata/conversion.txt icu/source/test/testdata/conversion.txt
+--- icu.5483/source/test/testdata/conversion.txt	2009-06-02 12:47:25.000000000 +0100
++++ icu/source/test/testdata/conversion.txt	2009-06-02 12:49:51.000000000 +0100
+@@ -48,6 +48,15 @@
+     toUnicode {
+       Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
+       Cases {
++        // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
++        // using the Shift-JIS table for JIS X 0208 (ticket #5797)
++        {
++          "ISO-2022-JP",
++          :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 },
++          "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e",
++          :intvector{ 3,4,5,9,11,13,15,17,19,21,23,25,27 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
+         // improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets()
+         {
+           "ISO-8859-3",
+@@ -495,6 +504,15 @@
+         }
+         { "UTF-16BE", :bin{ 00 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00 } }
+         { "UTF-16BE", :bin{ d800dc }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ d800dc } }
++        // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
++        // using the Shift-JIS table for JIS X 0208 (ticket #5797)
++        {
++          "ISO-2022-JP",
++          "\u203e\xa5\u4e00\ufa10\u6f3e\u0391",
++          :bin{       1b284a7e5c1b2442306c222e5f2126211b2842 },
++          :intvector{ 0,0,0,0,1,2,2,2,2,2,3,3,4,4,5,5,5,5,5 },
++          :int{1}, :int{0}, "", "?=\u3013", ""  // U+3013 Geta Mark converts to 222e
++        }
+         // Verify that mappings that would result in byte values outside 20..7F (for SBCS)
+         // or 21..7E (for DBCS) are not used.
+         // ibm-9005_X110-2007.ucm (ISO 8859-7, <ESC>.F=1b2e46):
+@@ -1273,13 +1291,13 @@
+         // versions of ISO-2022-JP
+         {
+           "ISO-2022-JP",
+-          "[\x00-\x0d\x10-\x1a\x1c-\x7f\u0391-\u03a1\uff61-\uff9f\u4e00\u4e01\uffe5]",
+-          "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\uffe6-\U0010ffff]",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u203e\uff61-\uff9f\u4e00\u4e01\uffe5]",
++          "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\ufa0e-\ufa2d\uffe6-\U0010ffff]",
+           :int{0}
+         }
+         {
+           "ISO-2022-JP-2",
+-          "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0385-\u038a\u0390-\u03a1\uff61-\uff9f\u4e00-\u4e05\uffe6]",
++          "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\uff61-\uff9f\u4e00-\u4e05\uffe6]",
+           "[\x0e\x0f\x1b\uffe7-\U0010ffff]",
+           :int{0}
+         }
diff --git a/icu.icu6001.backport.patch b/icu.icu6001.backport.patch
new file mode 100644
index 0000000..11b2ee3
--- /dev/null
+++ b/icu.icu6001.backport.patch
@@ -0,0 +1,741 @@
+diff -ru icu.5797/source/common/ucnv2022.c icu/source/common/ucnv2022.c
+--- icu.5797/source/common/ucnv2022.c	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/ucnv2022.c	2009-06-02 15:05:10.000000000 +0100
+@@ -3399,11 +3399,19 @@
+             /* include ASCII for JP */
+             sa->addRange(sa->set, 0, 0x7f);
+         }
+-        if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
++        if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
+             /*
+-             * TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks,
+-             * we need to include half-width Katakana for all JP variants because
+-             * JIS X 0208 has hardcoded fallbacks for them.
++             * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
++             * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
++             * use half-width Katakana.
++             * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
++             * half-width Katakana via the ESC ( I sequence.
++             * However, we only emit (fromUnicode) half-width Katakana according to the
++             * definition of each variant.
++             *
++             * When including fallbacks,
++             * we need to include half-width Katakana Unicode code points for all JP variants because
++             * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
+              */
+             /* include half-width Katakana for JP */
+             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
+@@ -3457,6 +3465,12 @@
+                  * corresponding to JIS X 0208.
+                  */
+                 filter=UCNV_SET_FILTER_SJIS;
++            } else if(i==KSC5601) {
++                /*
++                 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
++                 * are broader than GR94.
++                 */
++                filter=UCNV_SET_FILTER_GR94DBCS;
+             } else {
+                 filter=UCNV_SET_FILTER_NONE;
+             }
+@@ -3472,6 +3486,9 @@
+     sa->remove(sa->set, 0x0e);
+     sa->remove(sa->set, 0x0f);
+     sa->remove(sa->set, 0x1b);
++
++    /* ISO 2022 converters do not convert C1 controls either */
++    sa->removeRange(sa->set, 0x80, 0x9f);
+ }
+ 
+ static const UConverterImpl _ISO2022Impl={
+diff -ru icu.5797/source/common/ucnv_ext.c icu/source/common/ucnv_ext.c
+--- icu.5797/source/common/ucnv_ext.c	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/ucnv_ext.c	2009-06-02 15:12:21.000000000 +0100
+@@ -946,7 +946,7 @@
+ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
+                             const int32_t *cx,
+                             const USetAdder *sa,
+-                            UConverterUnicodeSet which,
++                            UBool useFallback,
+                             int32_t minLength,
+                             UChar32 c,
+                             UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
+@@ -966,7 +966,7 @@
+     value=*fromUSectionValues++;
+ 
+     if( value!=0 &&
+-        UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) &&
++        (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) &&
+         UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
+     ) {
+         if(c>=0) {
+@@ -987,12 +987,14 @@
+             /* no mapping, do nothing */
+         } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
+             ucnv_extGetUnicodeSetString(
+-                sharedData, cx, sa, which, minLength,
++                sharedData, cx, sa, useFallback, minLength,
+                 U_SENTINEL, s, length+1,
+                 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
+                 pErrorCode);
+-        } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+-                           UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
++        } else if((useFallback ?
++                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
++                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
++                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
+                   UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
+         ) {
+             sa->addString(sa->set, s, length+1);
+@@ -1004,6 +1006,7 @@
+ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
+                       const USetAdder *sa,
+                       UConverterUnicodeSet which,
++                      UConverterSetFilter filter,
+                       UErrorCode *pErrorCode) {
+     const int32_t *cx;
+     const uint16_t *stage12, *stage3, *ps2, *ps3;
+@@ -1011,6 +1014,7 @@
+ 
+     uint32_t value;
+     int32_t st1, stage1Length, st2, st3, minLength;
++    UBool useFallback;
+ 
+     UChar s[UCNV_EXT_MAX_UCHARS];
+     UChar32 c;
+@@ -1027,12 +1031,20 @@
+ 
+     stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
+ 
++    useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
++
+     /* enumerate the from-Unicode trie table */
+     c=0; /* keep track of the current code point while enumerating */
+ 
+-    if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
++    if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
++        filter==UCNV_SET_FILTER_DBCS_ONLY ||
++        filter==UCNV_SET_FILTER_SJIS ||
++        filter==UCNV_SET_FILTER_GR94DBCS
++    ) {
+         /* DBCS-only, ignore single-byte results */
+         minLength=2;
++    } else if(filter==UCNV_SET_FILTER_2022_CN) {
++        minLength=3;
+     } else {
+         minLength=1;
+     }
+@@ -1064,14 +1076,41 @@
+                             length=0;
+                             U16_APPEND_UNSAFE(s, length, c);
+                             ucnv_extGetUnicodeSetString(
+-                                sharedData, cx, sa, which, minLength,
++                                sharedData, cx, sa, useFallback, minLength,
+                                 c, s, length,
+                                 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
+                                 pErrorCode);
+-                        } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+-                                           UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
++                        } else if((useFallback ?
++                                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
++                                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
++                                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
+                                   UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
+                         ) {
++                            switch(filter) {
++                            case UCNV_SET_FILTER_2022_CN:
++                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
++                                    continue;
++                                }
++                                break;
++                            case UCNV_SET_FILTER_SJIS:
++                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
++                                    continue;
++                                }
++                                break;
++                            case UCNV_SET_FILTER_GR94DBCS:
++                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
++                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
++                                     (uint8_t)(value - 0xa1)<=(0xfe - 0xa1))) {
++                                    continue;
++                                }
++                                break;
++                            default:
++                                /*
++                                 * UCNV_SET_FILTER_NONE,
++                                 * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
++                                 */
++                                break;
++                            }
+                             sa->add(sa->set, c);
+                         }
+                     } while((++c&0xf)!=0);
+diff -ru icu.5797/source/common/ucnv_ext.h icu/source/common/ucnv_ext.h
+--- icu.5797/source/common/ucnv_ext.h	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/ucnv_ext.h	2009-06-02 15:05:10.000000000 +0100
+@@ -382,10 +382,20 @@
+                            UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
+                            UErrorCode *pErrorCode);
+ 
++/*
++ * Add code points and strings to the set according to the extension mappings.
++ * Limitation on the UConverterSetFilter:
++ * The filters currently assume that they are used with 1:1 mappings.
++ * They only apply to single input code points, and then they pass through
++ * only mappings with single-charset-code results.
++ * For example, the Shift-JIS filter only works for 2-byte results and tests
++ * that those 2 bytes are in the JIS X 0208 range of Shift-JIS.
++ */
+ U_CFUNC void
+ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
+                       const USetAdder *sa,
+                       UConverterUnicodeSet which,
++                      UConverterSetFilter filter,
+                       UErrorCode *pErrorCode);
+ 
+ /* toUnicode helpers -------------------------------------------------------- */
+diff -ru icu.5797/source/common/ucnvhz.c icu/source/common/ucnvhz.c
+--- icu.5797/source/common/ucnvhz.c	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/ucnvhz.c	2009-06-02 15:05:10.000000000 +0100
+@@ -528,6 +528,7 @@
+     sa->add(sa->set, 0x7e);
+ 
+     /* add all of the code points that the sub-converter handles */
++    /* ucnv_MBCSGetFilteredUnicodeSetForUnicode(((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, sa, which, UCNV_SET_FILTER_GR94DBCS, pErrorCode); */
+     ((UConverterDataHZ*)cnv->extraInfo)->
+         gbConverter->sharedData->impl->
+             getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
+diff -ru icu.5797/source/common/ucnv_lmb.c icu/source/common/ucnv_lmb.c
+--- icu.5797/source/common/ucnv_lmb.c	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/ucnv_lmb.c	2009-06-02 15:09:13.000000000 +0100
+@@ -536,7 +536,7 @@
+     NULL,\
+     NULL,\
+     _LMBCSSafeClone,\
+-    _LMBCSGetUnicodeSet\
++    ucnv_getCompleteUnicodeSet\
+ };\
+ static const UConverterStaticData _LMBCSStaticData##n={\
+   sizeof(UConverterStaticData),\
+@@ -662,15 +662,14 @@
+     return &newLMBCS->cnv;
+ }
+ 
+-static void
+-_LMBCSGetUnicodeSet(const UConverter *cnv,
+-                   const USetAdder *sa,
+-                   UConverterUnicodeSet which,
+-                   UErrorCode *pErrorCode) {
+-    /* all but U+F6xx, see LMBCS explanation above (search for F6xx) */
+-    sa->addRange(sa->set, 0, 0xf5ff);
+-    sa->addRange(sa->set, 0xf700, 0x10ffff);
+-}
++/*
++ * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117)
++ * which added all code points except for U+F6xx
++ * because those cannot be represented in the Unicode group.
++ * However, it turns out that windows-950 has roundtrips for all of U+F6xx
++ * which means that LMBCS can convert all Unicode code points after all.
++ * We now simply use ucnv_getCompleteUnicodeSet().
++ */
+ 
+ /* 
+    Here's the basic helper function that we use when converting from
+diff -ru icu.5797/source/common/ucnvmbcs.c icu/source/common/ucnvmbcs.c
+--- icu.5797/source/common/ucnvmbcs.c	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/ucnvmbcs.c	2009-06-02 15:12:40.000000000 +0100
+@@ -463,9 +463,23 @@
+ 
+     if(mbcsTable->outputType==MBCS_OUTPUT_1) {
+         const uint16_t *stage2, *stage3, *results;
++        uint16_t minValue;
+ 
+         results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
+ 
++        /*
++         * Set a threshold variable for selecting which mappings to use.
++         * See ucnv_MBCSSingleFromBMPWithOffsets() and
++         * MBCS_SINGLE_RESULT_FROM_U() for details.
++         */
++        if(which==UCNV_ROUNDTRIP_SET) {
++            /* use only roundtrips */
++            minValue=0xf00;
++        } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
++            /* use all roundtrip and fallback results */
++            minValue=0x800;
++        }
++
+         for(st1=0; st1<maxStage1; ++st1) {
+             st2=table[st1];
+             if(st2>maxStage1) {
+@@ -475,15 +489,8 @@
+                         /* read the stage 3 block */
+                         stage3=results+st3;
+ 
+-                        /*
+-                         * Add code points for which the roundtrip flag is set.
+-                         * Once we get a set for fallback mappings, we have to use
+-                         * a threshold variable with a value of 0x800.
+-                         * See ucnv_MBCSSingleFromBMPWithOffsets() and
+-                         * MBCS_SINGLE_RESULT_FROM_U() for details.
+-                         */
+                         do {
+-                            if(*stage3++>=0xf00) {
++                            if(*stage3++>=minValue) {
+                                 sa->add(sa->set, c);
+                             }
+                         } while((++c&0xf)!=0);
+@@ -500,9 +507,12 @@
+         const uint8_t *stage3, *bytes;
+         uint32_t st3Multiplier;
+         uint32_t value;
++        UBool useFallback;
+ 
+         bytes=mbcsTable->fromUnicodeBytes;
+ 
++        useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
++
+         switch(mbcsTable->outputType) {
+         case MBCS_OUTPUT_3:
+         case MBCS_OUTPUT_4_EUC:
+@@ -529,9 +539,8 @@
+                         st3>>=16;
+ 
+                         /*
+-                         * Add code points for which the roundtrip flag is set.
+-                         * Once we get a set for fallback mappings, we have to check
+-                         * non-roundtrip stage 3 results for whether they are 0.
++                         * Add code points for which the roundtrip flag is set,
++                         * or which map to non-zero bytes if we use fallbacks.
+                          * See ucnv_MBCSFromUnicodeWithOffsets() for details.
+                          */
+                         switch(filter) {
+@@ -539,6 +548,23 @@
+                             do {
+                                 if(st3&1) {
+                                     sa->add(sa->set, c);
++                                    stage3+=st3Multiplier;
++                                } else if(useFallback) {
++                                    uint8_t b=0;
++                                    switch(st3Multiplier) {
++                                    case 4:
++                                        b|=*stage3++;
++                                    case 3:
++                                        b|=*stage3++;
++                                    case 2:
++                                        b|=stage3[0]|stage3[1];
++                                        stage3+=2;
++                                    default:
++                                        break;
++                                    }
++                                    if(b!=0) {
++                                        sa->add(sa->set, c);
++                                    }
+                                 }
+                                 st3>>=1;
+                             } while((++c&0xf)!=0);
+@@ -546,7 +572,7 @@
+                         case UCNV_SET_FILTER_DBCS_ONLY:
+                              /* Ignore single-byte results (<0x100). */
+                             do {
+-                                if((st3&1)!=0 && *((const uint16_t *)stage3)>=0x100) {
++                                if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) {
+                                     sa->add(sa->set, c);
+                                 }
+                                 st3>>=1;
+@@ -556,7 +582,7 @@
+                         case UCNV_SET_FILTER_2022_CN:
+                              /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
+                             do {
+-                                if((st3&1)!=0 && ((value=*stage3)==0x81 || value==0x82)) {
++                                if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) {
+                                     sa->add(sa->set, c);
+                                 }
+                                 st3>>=1;
+@@ -566,7 +592,20 @@
+                         case UCNV_SET_FILTER_SJIS:
+                              /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
+                             do {
+-                                if((st3&1)!=0 && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
++                                if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=2;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_GR94DBCS:
++                            /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
++                            do {
++                                if( ((st3&1)!=0 || useFallback) &&
++                                    (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
++                                    (uint8_t)(value - 0xa1)<=(0xfe - 0xa1)
++                                ) {
+                                     sa->add(sa->set, c);
+                                 }
+                                 st3>>=1;
+@@ -587,7 +626,7 @@
+         }
+     }
+ 
+-    ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode);
++    ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode);
+ }
+ 
+ U_CFUNC void
+diff -ru icu.5797/source/common/ucnvmbcs.h icu/source/common/ucnvmbcs.h
+--- icu.5797/source/common/ucnvmbcs.h	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/ucnvmbcs.h	2009-06-02 15:05:10.000000000 +0100
+@@ -399,6 +399,7 @@
+     UCNV_SET_FILTER_DBCS_ONLY,
+     UCNV_SET_FILTER_2022_CN,
+     UCNV_SET_FILTER_SJIS,
++    UCNV_SET_FILTER_GR94DBCS,
+     UCNV_SET_FILTER_COUNT
+ } UConverterSetFilter;
+ 
+diff -ru icu.5797/source/common/ucnv_set.c icu/source/common/ucnv_set.c
+--- icu.5797/source/common/ucnv_set.c	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/ucnv_set.c	2009-06-02 15:05:10.000000000 +0100
+@@ -1,7 +1,7 @@
+ /*
+ *******************************************************************************
+ *
+-*   Copyright (C) 2003-2005, International Business Machines
++*   Copyright (C) 2003-2007, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ *******************************************************************************
+@@ -52,7 +52,8 @@
+             uset_add,
+             uset_addRange,
+             uset_addString,
+-            uset_remove
++            uset_remove,
++            uset_removeRange
+         };
+         sa.set=setFillIn;
+ 
+diff -ru icu.5797/source/common/unicode/ucnv.h icu/source/common/unicode/ucnv.h
+--- icu.5797/source/common/unicode/ucnv.h	2009-06-02 14:45:30.000000000 +0100
++++ icu/source/common/unicode/ucnv.h	2009-06-02 15:05:10.000000000 +0100
+@@ -870,6 +870,8 @@
+ typedef enum UConverterUnicodeSet {
+     /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+     UCNV_ROUNDTRIP_SET,
++    /** Select the set of Unicode code points with roundtrip or fallback mappings. @draft ICU 4.0 */
++    UCNV_ROUNDTRIP_AND_FALLBACK_SET,
+     /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
+     UCNV_SET_COUNT
+ } UConverterUnicodeSet;
+@@ -878,11 +880,16 @@
+ /**
+  * Returns the set of Unicode code points that can be converted by an ICU converter.
+  *
+- * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET):
++ * Returns one of several kinds of set:
++ *
++ * 1. UCNV_ROUNDTRIP_SET
++ *
+  * The set of all Unicode code points that can be roundtrip-converted
+- * (converted without any data loss) with the converter.
++ * (converted without any data loss) with the converter (ucnv_fromUnicode()).
+  * This set will not include code points that have fallback mappings
+  * or are only the result of reverse fallback mappings.
++ * This set will also not include PUA code points with fallbacks, although
++ * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
+  * See UTR #22 "Character Mapping Markup Language"
+  * at http://www.unicode.org/reports/tr22/
+  *
+@@ -893,6 +900,12 @@
+  *   by comparing its roundtrip set with the set of ExemplarCharacters from
+  *   ICU's locale data or other sources
+  *
++ * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
++ *
++ * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
++ * when fallbacks are turned on (see ucnv_setFallback()).
++ * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
++ *
+  * In the future, there may be more UConverterUnicodeSet choices to select
+  * sets with different properties.
+  *
+diff -ru icu.5797/source/common/uset_imp.h icu/source/common/uset_imp.h
+--- icu.5797/source/common/uset_imp.h	2009-06-02 14:45:31.000000000 +0100
++++ icu/source/common/uset_imp.h	2009-06-02 15:05:10.000000000 +0100
+@@ -36,6 +36,9 @@
+ typedef void U_CALLCONV
+ USetRemove(USet *set, UChar32 c);
+ 
++typedef void U_CALLCONV
++USetRemoveRange(USet *set, UChar32 start, UChar32 end);
++
+ /**
+  * Interface for adding items to a USet, to keep low-level code from
+  * statically depending on the USet implementation.
+@@ -47,6 +50,7 @@
+     USetAddRange *addRange;
+     USetAddString *addString;
+     USetRemove *remove;
++    USetRemoveRange *removeRange;
+ };
+ typedef struct USetAdder USetAdder;
+ 
+diff -ru icu.5797/source/test/intltest/convtest.cpp icu/source/test/intltest/convtest.cpp
+--- icu.5797/source/test/intltest/convtest.cpp	2009-06-02 14:45:18.000000000 +0100
++++ icu/source/test/intltest/convtest.cpp	2009-06-02 15:09:31.000000000 +0100
+@@ -59,6 +59,7 @@
+         case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
+         case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
+         case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
++        case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
+         default: name=""; break; //needed to end loop
+     }
+ }
+@@ -454,6 +455,183 @@
+     }
+ }
+ 
++U_CDECL_BEGIN
++static void U_CALLCONV
++getUnicodeSetCallback(const void *context,
++                      UConverterFromUnicodeArgs *fromUArgs,
++                      const UChar* codeUnits,
++                      int32_t length,
++                      UChar32 codePoint,
++                      UConverterCallbackReason reason,
++                      UErrorCode *pErrorCode) {
++    if(reason<=UCNV_IRREGULAR) {
++        ((UnicodeSet *)context)->remove(codePoint);  // the converter cannot convert this code point
++        *pErrorCode=U_ZERO_ERROR;                    // skip
++    }  // else ignore the reset, close and clone calls.
++}
++U_CDECL_END
++
++// Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
++void
++ConversionTest::TestGetUnicodeSet2() {
++    // Build a string with all code points.
++    UChar32 cpLimit;
++    int32_t s0Length;
++    if(quick) {
++        cpLimit=s0Length=0x10000;  // BMP only
++    } else {
++        cpLimit=0x110000;
++        s0Length=0x10000+0x200000;  // BMP + surrogate pairs
++    }
++    UChar *s0=new UChar[s0Length];
++    if(s0==NULL) {
++        return;
++    }
++    UChar *s=s0;
++    UChar32 c;
++    UChar c2;
++    // low BMP
++    for(c=0; c<=0xd7ff; ++c) {
++        *s++=(UChar)c;
++    }
++    // trail surrogates
++    for(c=0xdc00; c<=0xdfff; ++c) {
++        *s++=(UChar)c;
++    }
++    // lead surrogates
++    // (after trails so that there is not even one surrogate pair in between)
++    for(c=0xd800; c<=0xdbff; ++c) {
++        *s++=(UChar)c;
++    }
++    // high BMP
++    for(c=0xe000; c<=0xffff; ++c) {
++        *s++=(UChar)c;
++    }
++    // supplementary code points = surrogate pairs
++    if(cpLimit==0x110000) {
++        for(c=0xd800; c<=0xdbff; ++c) {
++            for(c2=0xdc00; c2<=0xdfff; ++c2) {
++                *s++=(UChar)c;
++                *s++=c2;
++            }
++        }
++    }
++
++    static const char *const cnvNames[]={
++        "UTF-8",
++        "UTF-7",
++        "UTF-16",
++        "US-ASCII",
++        "ISO-8859-1",
++        "windows-1252",
++        "Shift-JIS",
++        "ibm-1390",  // EBCDIC_STATEFUL table
++        "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
++        // "HZ", TODO(markus): known bug, the set incorrectly contains [\u02CA\u02CB\u02D9\u2010\u2013\u2015...]
++        "ISO-2022-JP",
++        "JIS7",
++        "ISO-2022-CN",
++        "ISO-2022-CN-EXT",
++        "LMBCS"
++    };
++    char buffer[1024];
++    int32_t i;
++    for(i=0; i<LENGTHOF(cnvNames); ++i) {
++        UErrorCode errorCode=U_ZERO_ERROR;
++        UConverter *cnv=cnv_open(cnvNames[i], errorCode);
++        if(U_FAILURE(errorCode)) {
++            errln("failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
++            continue;
++        }
++        UnicodeSet expected;
++        ucnv_setFromUCallBack(cnv, getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
++        if(U_FAILURE(errorCode)) {
++            errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
++            ucnv_close(cnv);
++            continue;
++        }
++        UConverterUnicodeSet which;
++        for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
++            if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
++                ucnv_setFallback(cnv, TRUE);
++            }
++            expected.add(0, cpLimit-1);
++            s=s0;
++            UBool flush;
++            do {
++                char *t=buffer;
++                flush=(UBool)(s==s0+s0Length);
++                ucnv_fromUnicode(cnv, &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
++                if(U_FAILURE(errorCode)) {
++                    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
++                        errorCode=U_ZERO_ERROR;
++                        continue;
++                    } else {
++                        break;  // unexpected error, should not occur
++                    }
++                }
++            } while(!flush);
++            UnicodeSet set;
++            ucnv_getUnicodeSet(cnv, (USet *)&set, which, &errorCode);
++            if(cpLimit<0x110000) {
++                set.remove(cpLimit, 0x10ffff);
++            }
++            if(which==UCNV_ROUNDTRIP_SET) {
++                // ignore PUA code points because they will be converted even if they
++                // are fallbacks and when other fallbacks are turned off,
++                // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
++                expected.remove(0xe000, 0xf8ff);
++                expected.remove(0xf0000, 0xffffd);
++                expected.remove(0x100000, 0x10fffd);
++                set.remove(0xe000, 0xf8ff);
++                set.remove(0xf0000, 0xffffd);
++                set.remove(0x100000, 0x10fffd);
++            }
++            if(set!=expected) {
++                // First try to see if we have different sets because ucnv_getUnicodeSet()
++                // added strings: The above conversion method does not tell us what strings might be convertible.
++                // Remove strings from the set and compare again.
++                // Unfortunately, there are no good, direct set methods for finding out whether there are strings
++                // in the set, nor for enumerating or removing just them.
++                // Intersect all code points with the set. The intersection will not contain strings.
++                UnicodeSet temp(0, 0x10ffff);
++                temp.retainAll(set);
++                set=temp;
++            }
++            if(set!=expected) {
++                UnicodeSet diffSet;
++                UnicodeString out;
++
++                // are there items that must be in the set but are not?
++                (diffSet=expected).removeAll(set);
++                if(!diffSet.isEmpty()) {
++                    diffSet.toPattern(out, TRUE);
++                    if(out.length()>100) {
++                        out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
++                    }
++                    errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
++                            cnvNames[i], which);
++                    errln(out);
++                }
++
++                // are there items that must not be in the set but are?
++                (diffSet=set).removeAll(expected);
++                if(!diffSet.isEmpty()) {
++                    diffSet.toPattern(out, TRUE);
++                    if(out.length()>100) {
++                        out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
++                    }
++                    errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
++                            cnvNames[i], which);
++                    errln(out);
++                }
++            }
++        }
++    }
++
++    delete [] s0;
++}
++
+ // open testdata or ICU data converter ------------------------------------- ***
+ 
+ UConverter *
+diff -ru icu.5797/source/test/intltest/convtest.h icu/source/test/intltest/convtest.h
+--- icu.5797/source/test/intltest/convtest.h	2009-06-02 14:45:18.000000000 +0100
++++ icu/source/test/intltest/convtest.h	2009-06-02 15:05:10.000000000 +0100
+@@ -64,6 +64,7 @@
+     void TestToUnicode();
+     void TestFromUnicode();
+     void TestGetUnicodeSet();
++    void TestGetUnicodeSet2();
+ 
+ private:
+     UBool
+diff -ru icu.5797/source/test/testdata/conversion.txt icu/source/test/testdata/conversion.txt
+--- icu.5797/source/test/testdata/conversion.txt	2009-06-02 14:45:18.000000000 +0100
++++ icu/source/test/testdata/conversion.txt	2009-06-02 15:25:04.000000000 +0100
+@@ -1198,16 +1198,29 @@
+         // versions of ISO-2022-JP
+         {
+           "ISO-2022-JP",
+-          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u203e\uff61-\uff9f\u4e00\u4e01\uffe5]",
+-          "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\ufa0e-\ufa2d\uffe6-\U0010ffff]",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u2015\u203e\u4e00\u4e01\uffe5]",
++          "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u2014\u301c\u4e02\u4e27-\u4e29\u4fe0\u663b\u9eb5\ufa0e-\ufa2d\uff61-\uff9f\uffe4\uffe6-\U0010ffff]",
+           :int{0}
+         }
+         {
+           "ISO-2022-JP-2",
+-          "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\uff61-\uff9f\u4e00-\u4e05\uffe6]",
+-          "[\x0e\x0f\x1b\uffe7-\U0010ffff]",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa0-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\u4e00-\u4e05\u4fe0\u663b\uffe6]",
++          "[\x0e\x0f\x1b\uff61-\uff9f\uffe4\uffe7-\U0010ffff]",
+           :int{0}
+         }
++        {
++          "JIS7",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa0-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\u4e00-\u4e05\u4fe0\u663b\uff61-\uff9f\uffe6]",
++          "[\x0e\x0f\x1b\uffe4\uffe7-\U0010ffff]",
++          :int{0}
++        }
++        // with fallbacks
++        {
++          "ISO-2022-JP",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u2014\u2015\u203e\u301c\u4e00\u4e01\u4fe0\u9eb5\uff61-\uff9f\uffe5]",
++          "[\x0e\x0f\x1b\xa6\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\u663b\ufa0e-\ufa2d\uffe4\uffe6-\U0010ffff]",
++          :int{1}
++        }
+ 
+         // versions of ISO-2022-CN
+         {
+@@ -1223,6 +1236,14 @@
+           :int{0}
+         }
+ 
++        // LMBCS
++        {
++          "LMBCS",
++          "[\x00-\U0010ffff]",
++          "[]",
++          :int{0}
++        }
++
+         // DBCS-only
+         {
+           "ibm-971",
diff --git a/icu.icu6002.backport.patch b/icu.icu6002.backport.patch
new file mode 100644
index 0000000..51f0d75
--- /dev/null
+++ b/icu.icu6002.backport.patch
@@ -0,0 +1,397 @@
+diff -ru icu.6001/source/common/ucnv_ext.c icu/source/common/ucnv_ext.c
+--- icu.6001/source/common/ucnv_ext.c	2009-06-02 15:29:01.000000000 +0100
++++ icu/source/common/ucnv_ext.c	2009-06-02 15:29:18.000000000 +0100
+@@ -1036,15 +1036,13 @@
+     /* enumerate the from-Unicode trie table */
+     c=0; /* keep track of the current code point while enumerating */
+ 
+-    if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
+-        filter==UCNV_SET_FILTER_DBCS_ONLY ||
+-        filter==UCNV_SET_FILTER_SJIS ||
+-        filter==UCNV_SET_FILTER_GR94DBCS
++    if(filter==UCNV_SET_FILTER_2022_CN) {
++        minLength=3;
++    } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
++               filter!=UCNV_SET_FILTER_NONE
+     ) {
+         /* DBCS-only, ignore single-byte results */
+         minLength=2;
+-    } else if(filter==UCNV_SET_FILTER_2022_CN) {
+-        minLength=3;
+     } else {
+         minLength=1;
+     }
+@@ -1104,6 +1102,13 @@
+                                     continue;
+                                 }
+                                 break;
++                            case UCNV_SET_FILTER_HZ:
++                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
++                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
++                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
++                                    continue;
++                                }
++                                break;
+                             default:
+                                 /*
+                                  * UCNV_SET_FILTER_NONE,
+diff -ru icu.6001/source/common/ucnvhz.c icu/source/common/ucnvhz.c
+--- icu.6001/source/common/ucnvhz.c	2009-06-02 15:29:01.000000000 +0100
++++ icu/source/common/ucnvhz.c	2009-06-02 15:29:15.000000000 +0100
+@@ -72,7 +72,7 @@
+     cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ));
+     if(cnv->extraInfo != NULL){
+         uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ));
+-        ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
++        ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("GBK",errorCode);
+     }
+     else {
+         *errorCode = U_MEMORY_ALLOCATION_ERROR;
+@@ -141,7 +141,7 @@
+     UChar *myTarget = args->target;
+     const char *mySourceLimit = args->sourceLimit;
+     UChar32 targetUniChar = 0x0000;
+-    UChar mySourceChar = 0x0000;
++    int32_t mySourceChar = 0x0000;
+     UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
+     tempBuf[0]=0; 
+     tempBuf[1]=0;
+@@ -156,90 +156,71 @@
+             
+             mySourceChar= (unsigned char) *mySource++;
+ 
+-            switch(mySourceChar){
++            if(args->converter->mode == UCNV_TILDE) {
++                /* second byte after ~ */
++                args->converter->mode=0;
++                switch(mySourceChar) {
+                 case 0x0A:
+-                    if(args->converter->mode ==UCNV_TILDE){
+-                        args->converter->mode=0;
+-                        
+-                    }
+-                    *(myTarget++)=(UChar)mySourceChar;
++                    /* no output for ~\n (line-continuation marker) */
+                     continue;
+-            
+                 case UCNV_TILDE:
+-                    if(args->converter->mode ==UCNV_TILDE){
+-                        *(myTarget++)=(UChar)mySourceChar;
+-                        args->converter->mode=0;
+-                        continue;
+-                        
++                    if(args->offsets) {
++                        args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
+                     }
+-                    else if(args->converter->toUnicodeStatus !=0){
+-                        args->converter->mode=0;
+-                        break;
+-                    }
+-                    else{
+-                        args->converter->mode = UCNV_TILDE;
+-                        continue;
+-                    }
+-                
+-                
++                    *(myTarget++)=(UChar)mySourceChar;
++                    continue;
+                 case UCNV_OPEN_BRACE:
+-                    if(args->converter->mode == UCNV_TILDE){
+-                        args->converter->mode=0;
+-                        myData->isStateDBCS = TRUE;
+-                        continue;
+-                    }
+-                    else{
+-                        break;
+-                    }
+-               
+-                
++                    myData->isStateDBCS = TRUE;
++                    continue;
+                 case UCNV_CLOSE_BRACE:
+-                    if(args->converter->mode == UCNV_TILDE){
+-                        args->converter->mode=0;
+-                         myData->isStateDBCS = FALSE;
+-                        continue;
+-                    }
+-                    else{
+-                        break;
+-                    }
+-                
++                    myData->isStateDBCS = FALSE;
++                    continue;
+                 default:
+                      /* if the first byte is equal to TILDE and the trail byte
+                      * is not a valid byte then it is an error condition
+                      */
+-                    if(args->converter->mode == UCNV_TILDE){
+-                        args->converter->mode=0;
+-                        mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
+-                        goto SAVE_STATE;
+-                    }
+-                    
++                    mySourceChar = 0x7e00 | mySourceChar;
++                    targetUniChar = 0xffff;
+                     break;
+-
+-            }
+-             
+-            if(myData->isStateDBCS){
++                }
++            } else if(myData->isStateDBCS) {
+                 if(args->converter->toUnicodeStatus == 0x00){
+-                    args->converter->toUnicodeStatus = (UChar) mySourceChar;
++                    /* lead byte */
++                    if(mySourceChar == UCNV_TILDE) {
++                        args->converter->mode = UCNV_TILDE;
++                    } else {
++                        /* add another bit to distinguish a 0 byte from not having seen a lead byte */
++                        args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
++                    }
+                     continue;
+                 }
+                 else{
+-                    tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
+-                    tempBuf[1] = (char) (mySourceChar+0x80);
+-                    mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
++                    /* trail byte */
++                    uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
++                    if( (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21) &&
++                        (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21)
++                    ) {
++                        tempBuf[0] = (char) (leadByte+0x80) ;
++                        tempBuf[1] = (char) (mySourceChar+0x80);
++                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
++                            tempBuf, 2, args->converter->useFallback);
++                    } else {
++                        targetUniChar = 0xffff;
++                    }
++                    /* add another bit so that the code below writes 2 bytes in case of error */
++                    mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
+                     args->converter->toUnicodeStatus =0x00;
+-                    targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
+-                        tempBuf, 2, args->converter->useFallback);
+                 }
+             }
+             else{
+-                if(args->converter->fromUnicodeStatus == 0x00){
+-                    targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
+-                        mySource - 1, 1, args->converter->useFallback);
+-                }
+-                else{
+-                    goto SAVE_STATE;
++                if(mySourceChar == UCNV_TILDE) {
++                    args->converter->mode = UCNV_TILDE;
++                    continue;
++                } else if(mySourceChar <= 0x7f) {
++                    targetUniChar = (UChar)mySourceChar;  /* ASCII */
++                } else {
++                    targetUniChar = 0xffff;
+                 }
+-
+             }
+             if(targetUniChar < 0xfffe){
+                 if(args->offsets) {
+@@ -248,26 +229,17 @@
+ 
+                 *(myTarget++)=(UChar)targetUniChar;
+             }
+-            else if(targetUniChar>=0xfffe){
+-SAVE_STATE:
++            else /* targetUniChar>=0xfffe */ {
+                 if(targetUniChar == 0xfffe){
+                     *err = U_INVALID_CHAR_FOUND;
+                 }
+                 else{
+                     *err = U_ILLEGAL_CHAR_FOUND;
+                 }
+-                if(myData->isStateDBCS){
+-                    /* this should never occur since isStateDBCS is set to true 
+-                     * only after tempBuf[0] and tempBuf[1]
+-                     * are set to the input ..  just to please BEAM 
+-                     */
+-                    if(tempBuf[0]==0 || tempBuf[1]==0){
+-                        *err = U_INTERNAL_PROGRAM_ERROR;
+-                    }else{
+-                        args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80);
+-                        args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80);
+-                        args->converter->toULength=2;
+-                    }
++                if(mySourceChar > 0xff){
++                    args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
++                    args->converter->toUBytes[1] = (uint8_t)mySourceChar;
++                    args->converter->toULength=2;
+                 }
+                 else{
+                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+@@ -328,16 +300,21 @@
+                 escSeq = TILDE_ESCAPE;
+                 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
+                 continue;
+-            }
+-            else{
++            } else if(mySourceChar <= 0x7f) {
++                length = 1;
++                targetUniChar = mySourceChar;
++            } else {
+                 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
+                     mySourceChar,&targetUniChar,args->converter->useFallback);
+-
+-            }
+-            /* only DBCS or SBCS characters are expected*/
+-            /* DB haracters with high bit set to 1 are expected */
+-            if(length > 2 || length==0 ||(((targetUniChar & 0x8080) != 0x8080)&& length==2)){
+-                targetUniChar= missingCharMarker;
++                /* we can only use lead bytes 21..7D and trail bytes 21..7E */
++                if( length == 2 &&
++                    (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
++                    (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
++                ) {
++                    targetUniChar -= 0x8080;
++                } else {
++                    targetUniChar = missingCharMarker;
++                }
+             }
+             if (targetUniChar != missingCharMarker){
+                myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);     
+@@ -360,22 +337,22 @@
+             
+                 if(isTargetUCharDBCS){
+                     if( myTargetIndex <targetLength){
+-                        myTarget[myTargetIndex++] =(char) ((targetUniChar >> 8) -0x80);
++                        myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
+                         if(offsets){
+                             *(offsets++) = mySourceIndex-1;
+                         }
+                         if(myTargetIndex < targetLength){
+-                            myTarget[myTargetIndex++] =(char) ((targetUniChar & 0x00FF) -0x80);
++                            myTarget[myTargetIndex++] =(char) targetUniChar;
+                             if(offsets){
+                                 *(offsets++) = mySourceIndex-1;
+                             }
+                         }else{
+-                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
++                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
+                             *err = U_BUFFER_OVERFLOW_ERROR;
+                         } 
+                     }else{
+-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) ((targetUniChar >> 8) -0x80);
+-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
++                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
++                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
+                         *err = U_BUFFER_OVERFLOW_ERROR;
+                     }
+ 
+@@ -524,15 +501,14 @@
+                   const USetAdder *sa,
+                   UConverterUnicodeSet which,
+                   UErrorCode *pErrorCode) {
+-    /* the tilde '~' is hardcoded in the converter */
+-    sa->add(sa->set, 0x7e);
++    /* HZ converts all of ASCII */
++    sa->addRange(sa->set, 0, 0x7f);
+ 
+     /* add all of the code points that the sub-converter handles */
+-    /* ucnv_MBCSGetFilteredUnicodeSetForUnicode(((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, sa, which, UCNV_SET_FILTER_GR94DBCS, pErrorCode); */
+-    ((UConverterDataHZ*)cnv->extraInfo)->
+-        gbConverter->sharedData->impl->
+-            getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
+-                          sa, which, pErrorCode);
++    ucnv_MBCSGetFilteredUnicodeSetForUnicode(
++        ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
++        sa, which, UCNV_SET_FILTER_HZ,
++        pErrorCode);
+ }
+ 
+ static const UConverterImpl _HZImpl={
+diff -ru icu.6001/source/common/ucnvmbcs.c icu/source/common/ucnvmbcs.c
+--- icu.6001/source/common/ucnvmbcs.c	2009-06-02 15:29:01.000000000 +0100
++++ icu/source/common/ucnvmbcs.c	2009-06-02 15:35:01.000000000 +0100
+@@ -612,6 +612,19 @@
+                                 stage3+=2;  /* +=st3Multiplier */
+                             } while((++c&0xf)!=0);
+                             break;
++                        case UCNV_SET_FILTER_HZ:
++                            /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
++                            do {
++                                if( ((st3&1)!=0 || useFallback) &&
++                                    (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
++                                    (uint8_t)(value - 0xa1)<=(0xfe - 0xa1)
++                                ) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=2;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
+                         default:
+                             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+                             return;
+diff -ru icu.6001/source/common/ucnvmbcs.h icu/source/common/ucnvmbcs.h
+--- icu.6001/source/common/ucnvmbcs.h	2009-06-02 15:29:01.000000000 +0100
++++ icu/source/common/ucnvmbcs.h	2009-06-02 15:29:15.000000000 +0100
+@@ -400,6 +400,7 @@
+     UCNV_SET_FILTER_2022_CN,
+     UCNV_SET_FILTER_SJIS,
+     UCNV_SET_FILTER_GR94DBCS,
++    UCNV_SET_FILTER_HZ,
+     UCNV_SET_FILTER_COUNT
+ } UConverterSetFilter;
+ 
+diff -ru icu.6001/source/test/cintltst/ncnvtst.c icu/source/test/cintltst/ncnvtst.c
+--- icu.6001/source/test/cintltst/ncnvtst.c	2009-06-02 15:28:46.000000000 +0100
++++ icu/source/test/cintltst/ncnvtst.c	2009-06-02 15:29:15.000000000 +0100
+@@ -1928,7 +1928,7 @@
+ #if !UCONFIG_NO_LEGACY_CONVERSION
+         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
+         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
+-        { "HZ", 0x410, 0x44f, 0x4e00, 0x4eff, 0xac00, 0xd7ff },
++        /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
+         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
+ #else
+         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
+diff -ru icu.6001/source/test/intltest/convtest.cpp icu/source/test/intltest/convtest.cpp
+--- icu.6001/source/test/intltest/convtest.cpp	2009-06-02 15:28:46.000000000 +0100
++++ icu/source/test/intltest/convtest.cpp	2009-06-02 15:29:15.000000000 +0100
+@@ -527,7 +527,7 @@
+         "Shift-JIS",
+         "ibm-1390",  // EBCDIC_STATEFUL table
+         "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
+-        // "HZ", TODO(markus): known bug, the set incorrectly contains [\u02CA\u02CB\u02D9\u2010\u2013\u2015...]
++        "HZ",
+         "ISO-2022-JP",
+         "JIS7",
+         "ISO-2022-CN",
+diff -ru icu.6001/source/test/testdata/conversion.txt icu/source/test/testdata/conversion.txt
+--- icu.6001/source/test/testdata/conversion.txt	2009-06-02 15:28:46.000000000 +0100
++++ icu/source/test/testdata/conversion.txt	2009-06-02 15:29:15.000000000 +0100
+@@ -48,6 +48,14 @@
+     toUnicode {
+       Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
+       Cases {
++        // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e
++        {
++          "HZ",
++          :bin{ 7e7b21212120217e217f772100007e217e7d207e7e807e0a2b },
++          "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd ~\ufffd+",
++          :intvector{ 2,4,6,8,10,12,14,18,19,21,24 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
+         // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
+         // using the Shift-JIS table for JIS X 0208 (ticket #5797)
+         {
+@@ -1244,6 +1252,14 @@
+           :int{0}
+         }
+ 
++        // HZ
++        {
++          "HZ",
++          "[\u0410-\u044f\u4e00\u4e01\u4e03]",
++          "[\u4e02\u4e04-\u4e06\uac00-\ud7ff]",
++          :int{0}
++        }
++        
+         // DBCS-only
+         {
+           "ibm-971",
diff --git a/icu.icu6175.emptysegments.patch b/icu.icu6175.emptysegments.patch
new file mode 100644
index 0000000..bb40bd5
--- /dev/null
+++ b/icu.icu6175.emptysegments.patch
@@ -0,0 +1,535 @@
+diff -ru icu.6002/source/common/ucnv2022.c icu/source/common/ucnv2022.c
+--- icu.6002/source/common/ucnv2022.c	2009-06-02 15:38:08.000000000 +0100
++++ icu/source/common/ucnv2022.c	2009-06-02 15:40:20.000000000 +0100
+@@ -201,6 +201,7 @@
+ #ifdef U_ENABLE_GENERIC_ISO_2022
+     UBool isFirstBuffer;
+ #endif
++    UBool isEmptySegment;
+     char name[30];
+     char locale[3];
+ }UConverterDataISO2022;
+@@ -609,6 +610,7 @@
+     if(choice<=UCNV_RESET_TO_UNICODE) {
+         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
+         myConverterData->key = 0;
++        myConverterData->isEmptySegment = FALSE;
+     }
+     if(choice!=UCNV_RESET_TO_UNICODE) {
+         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
+@@ -814,6 +816,7 @@
+             if(chosenConverterName == NULL) {
+                 /* SS2 or SS3 */
+                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
++                _this->toUCallbackReason = UCNV_UNASSIGNED;
+                 return;
+             }
+ 
+@@ -935,6 +938,8 @@
+     }
+     if(U_SUCCESS(*err)) {
+         _this->toULength = 0;
++    } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
++        _this->toUCallbackReason = UCNV_UNASSIGNED;
+     }
+ }
+ 
+@@ -1986,6 +1991,7 @@
+                     continue;
+                 } else {
+                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
++                    myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
+                     break;
+                 }
+ 
+@@ -1997,21 +2003,39 @@
+                     continue;
+                 } else {
+                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
++                    myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
+                     break;
+                 }
+ 
+             case ESC_2022:
+                 mySource--;
+ escape:
+-                changeState_2022(args->converter,&(mySource), 
+-                    mySourceLimit, ISO_2022_JP,err);
++                {
++                    const char * mySourceBefore = mySource;
++                    int8_t toULengthBefore = args->converter->toULength;
++
++                    changeState_2022(args->converter,&(mySource),
++                        mySourceLimit, ISO_2022_JP,err);
++
++                    /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
++                    if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
++                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                        args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                        args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
++                    }
++                }
+ 
+                 /* invalid or illegal escape sequence */
+                 if(U_FAILURE(*err)){
+                     args->target = myTarget;
+                     args->source = mySource;
++                    myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
+                     return;
+                 }
++                /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
++                if(myData->key==0) {
++                    myData->isEmptySegment = TRUE;
++                }
+                 continue;
+ 
+             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
+@@ -2028,6 +2052,7 @@
+                 /* falls through */
+             default:
+                 /* convert one or two bytes */
++                myData->isEmptySegment = FALSE;
+                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
+                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
+                     !IS_JP_DBCS(cs)
+@@ -2524,15 +2549,27 @@
+ 
+             if(mySourceChar==UCNV_SI){
+                 myData->toU2022State.g = 0;
++                if (myData->isEmptySegment) {
++                    myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
++                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                    args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                    args->converter->toUBytes[0] = mySourceChar;
++                    args->converter->toULength = 1;
++                    args->target = myTarget;
++                    args->source = mySource;
++                    return;
++                }
+                 /*consume the source */
+                 continue;
+             }else if(mySourceChar==UCNV_SO){
+                 myData->toU2022State.g = 1;
++                myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
+                 /*consume the source */
+                 continue;
+             }else if(mySourceChar==ESC_2022){
+                 mySource--;
+ escape:
++                myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
+                 changeState_2022(args->converter,&(mySource), 
+                                 mySourceLimit, ISO_2022_KR, err);
+                 if(U_FAILURE(*err)){
+@@ -2543,6 +2580,7 @@
+                 continue;
+             }   
+ 
++            myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
+             if(myData->toU2022State.g == 1) {
+                 if(mySource < mySourceLimit) {
+                     char trailByte;
+@@ -3075,27 +3113,52 @@
+             switch(mySourceChar){
+             case UCNV_SI:
+                 pToU2022State->g=0;
++                if (myData->isEmptySegment) {
++                    myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
++                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                    args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                    args->converter->toUBytes[0] = mySourceChar;
++                    args->converter->toULength = 1;
++                    args->target = myTarget;
++                    args->source = mySource;
++                    return;
++                }
+                 continue;
+ 
+             case UCNV_SO:
+                 if(pToU2022State->cs[1] != 0) {
+                     pToU2022State->g=1;
++                    myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
+                     continue;
+                 } else {
+                     /* illegal to have SO before a matching designator */
++                    myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
+                     break;
+                 }
+ 
+             case ESC_2022:
+                 mySource--;
+ escape:
+-                changeState_2022(args->converter,&(mySource), 
+-                    mySourceLimit, ISO_2022_CN,err);
++                {
++                    const char * mySourceBefore = mySource;
++                    int8_t toULengthBefore = args->converter->toULength;
++
++                    changeState_2022(args->converter,&(mySource),
++                        mySourceLimit, ISO_2022_CN,err);
++
++                    /* After SO there must be at least one character before a designator (designator error handled separately) */
++                    if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
++                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                        args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                        args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
++                    }
++                }
+ 
+                 /* invalid or illegal escape sequence */
+                 if(U_FAILURE(*err)){
+                     args->target = myTarget;
+                     args->source = mySource;
++                    myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
+                     return;
+                 }
+                 continue;
+@@ -3109,6 +3172,7 @@
+                 /* falls through */
+             default:
+                 /* convert one or two bytes */
++                myData->isEmptySegment = FALSE;
+                 if(pToU2022State->g != 0) {
+                     if(mySource < mySourceLimit) {
+                         UConverterSharedData *cnv;
+diff -ru icu.6002/source/common/ucnv_bld.c icu/source/common/ucnv_bld.c
+--- icu.6002/source/common/ucnv_bld.c	2009-06-02 15:38:05.000000000 +0100
++++ icu/source/common/ucnv_bld.c	2009-06-02 15:38:31.000000000 +0100
+@@ -914,6 +914,7 @@
+     myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen;
+     myUConverter->subChars = (uint8_t *)myUConverter->subUChars;
+     uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen);
++    myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */
+ 
+     if(mySharedConverterData->impl->open != NULL) {
+         mySharedConverterData->impl->open(myUConverter, realName, locale, options, err);
+diff -ru icu.6002/source/common/ucnv_bld.h icu/source/common/ucnv_bld.h
+--- icu.6002/source/common/ucnv_bld.h	2009-06-02 15:38:08.000000000 +0100
++++ icu/source/common/ucnv_bld.h	2009-06-02 15:38:31.000000000 +0100
+@@ -226,6 +226,9 @@
+     char preToU[UCNV_EXT_MAX_BYTES];
+     int8_t preFromULength, preToULength;    /* negative: replay */
+     int8_t preToUFirstLength;               /* length of first character */
++
++    /* new fields for ICU 4.0 */
++    UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */
+ };
+ 
+ U_CDECL_END /* end of UConverter */
+diff -ru icu.6002/source/common/ucnv.c icu/source/common/ucnv.c
+--- icu.6002/source/common/ucnv.c	2009-06-02 15:38:05.000000000 +0100
++++ icu/source/common/ucnv.c	2009-06-02 15:38:31.000000000 +0100
+@@ -1473,11 +1473,14 @@
+             cnv->toULength=0;
+ 
+             /* call the callback function */
++            if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
++                cnv->toUCallbackReason = UCNV_UNASSIGNED;
++            }
+             cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
+                 cnv->invalidCharBuffer, errorInputLength,
+-                (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ?
+-                    UCNV_UNASSIGNED : UCNV_ILLEGAL,
++                cnv->toUCallbackReason,
+                 err);
++            cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
+ 
+             /*
+              * loop back to the offset handling
+diff -ru icu.6002/source/common/ucnvhz.c icu/source/common/ucnvhz.c
+--- icu.6002/source/common/ucnvhz.c	2009-06-02 15:38:08.000000000 +0100
++++ icu/source/common/ucnvhz.c	2009-06-02 15:38:31.000000000 +0100
+@@ -59,6 +59,7 @@
+     UBool isEscapeAppended;
+     UBool isStateDBCS;
+     UBool isTargetUCharDBCS;
++    UBool isEmptySegment;
+ }UConverterDataHZ;
+ 
+ 
+@@ -98,6 +99,7 @@
+         cnv->mode=0;
+         if(cnv->extraInfo != NULL){
+             ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
++            ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
+         }
+     }
+     if(choice!=UCNV_RESET_TO_UNICODE) {
+@@ -130,6 +132,10 @@
+ *   from-GB code '~}' ($7E7D) is outside the defined GB range.)
+ *
+ *   Source: RFC 1842
++*
++*   Note that the formal syntax in RFC 1842 is invalid. I assume that the
++*   intended definition of single-byte-segment is as follows (pedberg):
++*   single-byte-segment = single-byte-seq 1*single-byte-char
+ */
+ 
+ 
+@@ -168,12 +174,23 @@
+                         args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
+                     }
+                     *(myTarget++)=(UChar)mySourceChar;
++                    myData->isEmptySegment = FALSE;
+                     continue;
+                 case UCNV_OPEN_BRACE:
+-                    myData->isStateDBCS = TRUE;
+-                    continue;
+                 case UCNV_CLOSE_BRACE:
+-                    myData->isStateDBCS = FALSE;
++                    myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
++                    if (myData->isEmptySegment) {
++                        myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
++                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                        args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                        args->converter->toUBytes[0] = UCNV_TILDE;
++                        args->converter->toUBytes[1] = mySourceChar;
++                        args->converter->toULength = 2;
++                        args->target = myTarget;
++                        args->source = mySource;
++                        return;
++                    }
++                    myData->isEmptySegment = TRUE;
+                     continue;
+                 default:
+                      /* if the first byte is equal to TILDE and the trail byte
+@@ -181,6 +198,7 @@
+                      */
+                     mySourceChar = 0x7e00 | mySourceChar;
+                     targetUniChar = 0xffff;
++                    myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
+                     break;
+                 }
+             } else if(myData->isStateDBCS) {
+@@ -191,6 +209,7 @@
+                     } else {
+                         /* add another bit to distinguish a 0 byte from not having seen a lead byte */
+                         args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
++                        myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
+                     }
+                     continue;
+                 }
+@@ -218,8 +237,10 @@
+                     continue;
+                 } else if(mySourceChar <= 0x7f) {
+                     targetUniChar = (UChar)mySourceChar;  /* ASCII */
++                    myData->isEmptySegment = FALSE; /* the segment has something valid */
+                 } else {
+                     targetUniChar = 0xffff;
++                    myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
+                 }
+             }
+             if(targetUniChar < 0xfffe){
+diff -ru icu.6002/source/test/cintltst/nucnvtst.c icu/source/test/cintltst/nucnvtst.c
+--- icu.6002/source/test/cintltst/nucnvtst.c	2009-06-02 15:37:53.000000000 +0100
++++ icu/source/test/cintltst/nucnvtst.c	2009-06-02 15:40:52.000000000 +0100
+@@ -81,6 +81,7 @@
+ static void TestJitterbug2411(void);
+ #endif
+ 
++static void TestJitterbug6175(void);
+ static void TestRoundTrippingAllUTF(void);
+ static void TestConv(const uint16_t in[],
+                      int len,
+@@ -294,6 +295,7 @@
+ #if !UCONFIG_NO_LEGACY_CONVERSION
+    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
+    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
++   addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
+ #endif
+ 
+ }
+@@ -4454,6 +4456,70 @@
+     free(offsets);
+ }
+ 
++/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
++typedef struct {
++    const char *    converterName;
++    const char *    inputText;
++    int             inputTextLength;
++} EmptySegmentTest;
++
++/* Callback for TestJitterbug6175, should only get called for empty segment errors */
++static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
++                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
++    if (reason > UCNV_IRREGULAR) {
++        return;
++    }
++    if (reason != UCNV_IRREGULAR) {
++        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
++    }
++    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
++    *err = U_ZERO_ERROR;
++    ucnv_cbToUWriteSub(toArgs,0,err);
++}
++
++enum { kEmptySegmentToUCharsMax = 64 };
++static void TestJitterbug6175(void) {
++    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
++    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
++    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
++    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
++    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
++    static const EmptySegmentTest emptySegmentTests[] = {
++        /* converterName inputText    inputTextLength */
++        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
++        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
++        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
++        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
++        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
++        /* terminator: */
++        { NULL,          NULL,        0,                  }
++    };
++    const EmptySegmentTest * testPtr;
++    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
++        UErrorCode   err = U_ZERO_ERROR;
++        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
++        if (U_FAILURE(err)) {
++            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
++            return;
++        }
++        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
++        if (U_FAILURE(err)) {
++            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
++            ucnv_close(cnv);
++            return;
++        }
++        {
++            UChar         toUChars[kEmptySegmentToUCharsMax];
++            UChar *       toUCharsPtr = toUChars;
++            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
++            const char *  inCharsPtr = testPtr->inputText;
++            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
++            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
++        }
++        ucnv_close(cnv);
++    }
++}
++
+ static void
+ TestEBCDIC_STATEFUL() {
+     /* test input */
+diff -ru icu.6002/source/test/testdata/conversion.txt icu/source/test/testdata/conversion.txt
+--- icu.6002/source/test/testdata/conversion.txt	2009-06-02 15:37:54.000000000 +0100
++++ icu/source/test/testdata/conversion.txt	2009-06-02 15:40:52.000000000 +0100
+@@ -199,6 +199,21 @@
+           :intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 },
+           :int{1}, :int{1}, "", "&", :bin{""}
+         }
++        // empty segment (using substitution and stop)
++        {
++          "ISO-2022-KR",
++          :bin{ 1b242943610e0f620d0a },
++          "a\uFFFDb\u000D\u000A",
++          :intvector{ 4, 6, 7, 8, 9 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "ISO-2022-KR",
++          :bin{ 1b242943610e0f620d0a },
++          "a",
++          :intvector{ 4 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"0f"}
++        }
+ 
+         // ISO-2022-JP
+ 
+@@ -249,6 +264,21 @@
+           :bin{ 41c15c1b284a5cc242 }, "A\uff81\\\xa5\uff82B", :intvector{ 0, 1, 2, 6, 7, 8 },
+           :int{1}, :int{1}, "", ".", :bin{""}
+         }
++        // empty segment (using substitution and stop)
++        {
++          "ISO-2022-JP",
++          :bin{ 61621b24421b284263640d0a },
++          "ab\uFFFDcd\u000D\u000A",
++          :intvector{ 0, 1, 5, 8, 9, 10, 11 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "ISO-2022-JP",
++          :bin{ 61621b24421b284263640d0a },
++          "ab",
++          :intvector{ 0, 1 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"1b2842"}
++        }
+ 
+         // ISO-2022-CN
+ 
+@@ -319,6 +349,36 @@
+           :bin{ 411b242b491b4f2121 }, "\x41", :intvector{ 0 },
+           :int{1}, :int{1}, "unsuppesc", ".", :bin{ 1b242b49 }
+         }
++        // empty segment 1 (using substitution and stop)
++        {
++          "ISO-2022-CN",
++          :bin{ 611b242941620e0f1b242a481b4e6a65630d0a },
++          "ab\uFFFD\u994Cc\u000D\u000A",
++          :intvector{ 0, 5, 7, 14, 16, 17, 18 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "ISO-2022-CN",
++          :bin{ 611b242941620e0f1b242a481b4e6a65630d0a },
++          "ab",
++          :intvector{ 0, 5 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"0f"}
++        }
++        // empty segment 2 (using substitution and stop)
++        {
++          "ISO-2022-CN",
++          :bin{ 611b242941620e1b24294768640f630d0a },
++          "ab\uFFFD\u5F70c\u000D\u000A",
++          :intvector{ 0, 5, 7, 11, 14, 15, 16 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "ISO-2022-CN",
++          :bin{ 611b242941620e1b24294768640f630d0a },
++          "ab",
++          :intvector{ 0, 5 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"1b242947"}
++        }
+ 
+         // ISO-2022 SBCS
+         // [U_ENABLE_GENERIC_ISO_2022]
+@@ -333,6 +393,39 @@
+         //  :int{1}, :int{1}, "", ".", :bin{""}
+         //}
+ 
++        // HZ-GB-2312
++
++        // empty segment 1 (using substitution and stop)
++        {
++          "HZ-GB-2312",
++          :bin{ 61627e7b7e7d6364 },
++          "ab\uFFFDcd",
++          :intvector{ 0, 1, 4, 6, 7 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "HZ-GB-2312",
++          :bin{ 61627e7b7e7d63640d0a },
++          "ab",
++          :intvector{ 0, 1 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"7e7d"}
++        }
++        // empty segment 2 & legal redundant switches (using substitution and stop)
++        {
++          "HZ-GB-2312",
++          :bin{ 61627e7b323b3f557e7b7e7b523b7e7d63647e7d65667e7d7e7d },
++          "ab\u4E0D\u7A7A\uFFFD\u4E00cdef\uFFFD",
++          :intvector{ 0, 1, 4, 6, 10, 12, 16, 17, 20, 21, 24 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "HZ-GB-2312",
++          :bin{ 61627e7b323b3f557e7b7e7b523b7e7d63647e7d65667e7d7e7d },
++          "ab\u4E0D\u7A7A",
++          :intvector{ 0, 1, 4, 6 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"7e7b"}
++        }
++
+         // DBCS-only extensions
+         {
+           "ibm-970",
diff --git a/icu.icuXXXX.malayalam.bysyllable.patch b/icu.icuXXXX.malayalam.bysyllable.patch
new file mode 100644
index 0000000..d0cd1b1
--- /dev/null
+++ b/icu.icuXXXX.malayalam.bysyllable.patch
@@ -0,0 +1,250 @@
+diff -ruN icu.orig/source/layout/IndicReordering.h icu/source/layout/IndicReordering.h
+--- icu.orig/source/layout/IndicReordering.h	2007-04-27 10:28:22.000000000 +0100
++++ icu/source/layout/IndicReordering.h	2007-04-27 10:39:22.000000000 +0100
+@@ -142,6 +142,7 @@
+     // do not instantiate
+     IndicReordering();
+ 
++public:
+     static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
+ 
+ };
+diff -ruN icu.orig/source/layout/LayoutEngine.cpp icu/source/layout/LayoutEngine.cpp
+--- icu.orig/source/layout/LayoutEngine.cpp	2007-04-27 10:28:22.000000000 +0100
++++ icu/source/layout/LayoutEngine.cpp	2007-04-27 10:39:22.000000000 +0100
+@@ -14,6 +14,7 @@
+ #include "CanonShaping.h"
+ #include "HanLayoutEngine.h"
+ #include "HangulLayoutEngine.h"
++#include "MalayalamLayoutEngine.h"
+ #include "IndicLayoutEngine.h"
+ #include "KhmerLayoutEngine.h"
+ #include "ThaiLayoutEngine.h"
+@@ -451,11 +452,13 @@
+ 
+     if (gsubTable != NULL && gsubTable->coversScript(scriptTag = OpenTypeLayoutEngine::getScriptTag(scriptCode))) {
+         switch (scriptCode) {
++        case mlymScriptCode:
++            result = new MalayalamOpenTypeLayoutEngine(fontInstance, scriptCode, languageCode, typoFlags, gsubTable);
++	    break;
+         case bengScriptCode:
+         case devaScriptCode:
+         case gujrScriptCode:
+         case kndaScriptCode:
+-        case mlymScriptCode:
+         case oryaScriptCode:
+         case guruScriptCode:
+         case tamlScriptCode:
+@@ -512,11 +515,13 @@
+             result = new GXLayoutEngine(fontInstance, scriptCode, languageCode, morphTable);
+         } else {
+             switch (scriptCode) {
++            case mlymScriptCode:
++                result = new MalayalamOpenTypeLayoutEngine(fontInstance, scriptCode, languageCode, typoFlags);
++	        break;
+             case bengScriptCode:
+             case devaScriptCode:
+             case gujrScriptCode:
+             case kndaScriptCode:
+-            case mlymScriptCode:
+             case oryaScriptCode:
+             case guruScriptCode:
+             case tamlScriptCode:
+diff -ruN icu.orig/source/layout/LEGlyphStorage.h icu/source/layout/LEGlyphStorage.h
+--- icu.orig/source/layout/LEGlyphStorage.h	2007-04-27 10:28:22.000000000 +0100
++++ icu/source/layout/LEGlyphStorage.h	2007-04-27 10:43:54.000000000 +0100
+@@ -413,6 +413,8 @@
+      */
+     void adoptGlyphArray(LEGlyphStorage &from);
+ 
++    void appendGlyphStorage(LEGlyphStorage &from);
++
+     /**
+      * Delete the char indices array and replace it with the one
+      * in <code>from</code>. Set the char indices array pointer
+diff -ruN icu.orig/source/layout/Makefile.in icu/source/layout/Makefile.in
+--- icu.orig/source/layout/Makefile.in	2007-04-27 10:28:22.000000000 +0100
++++ icu/source/layout/Makefile.in	2007-04-27 10:39:22.000000000 +0100
+@@ -66,6 +66,7 @@
+ ArabicLayoutEngine.o \
+ GXLayoutEngine.o \
+ HanLayoutEngine.o \
++MalayalamLayoutEngine.o \
+ IndicLayoutEngine.o \
+ LayoutEngine.o \
+ ContextualGlyphSubstProc.o \
+diff -ruN icu.orig/source/layout/MalayalamLayoutEngine.cpp icu/source/layout/MalayalamLayoutEngine.cpp
+--- icu.orig/source/layout/MalayalamLayoutEngine.cpp	1970-01-01 01:00:00.000000000 +0100
++++ icu/source/layout/MalayalamLayoutEngine.cpp	2007-04-27 10:44:26.000000000 +0100
+@@ -0,0 +1,126 @@
++
++/*
++ *
++ * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
++ *
++ */
++
++#include "LETypes.h"
++#include "LayoutEngine.h"
++#include "OpenTypeLayoutEngine.h"
++#include "MalayalamLayoutEngine.h"
++#include "ScriptAndLanguageTags.h"
++
++#include "GlyphSubstitutionTables.h"
++#include "GlyphDefinitionTables.h"
++#include "GlyphPositioningTables.h"
++
++#include "GDEFMarkFilter.h"
++#include "LEGlyphStorage.h"
++
++#include "IndicReordering.h"
++
++#include <stdio.h>
++
++U_NAMESPACE_BEGIN
++
++UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MalayalamOpenTypeLayoutEngine)
++
++void LEGlyphStorage::appendGlyphStorage(LEGlyphStorage &from)
++{
++    if (fInsertionList) applyInsertions();
++    if (from.fInsertionList) from.applyInsertions();
++    if ((!fInsertionList) && (from.fInsertionList))
++    {
++        fInsertionList = from.fInsertionList;
++        from.fInsertionList = NULL;
++    }
++
++    if (!from.fGlyphCount)
++        return;
++
++    le_int32 newGlyphCount = fGlyphCount + from.fGlyphCount;
++
++    fGlyphs = (LEGlyphID*)LE_GROW_ARRAY(fGlyphs, newGlyphCount);
++    LE_ARRAY_COPY(fGlyphs+fGlyphCount, from.fGlyphs, from.fGlyphCount);
++
++    le_int32 nLargestIndex = 0;
++    if (fGlyphCount)
++    {
++        for (le_int32 i = 0; i < fGlyphCount; ++i)
++        {
++            if (fCharIndices[i] > nLargestIndex)
++                nLargestIndex = fCharIndices[i];
++        }
++        nLargestIndex+=1;
++    }
++    fCharIndices = (le_int32 *)LE_GROW_ARRAY(fCharIndices, newGlyphCount);
++    for (le_int32 i = 0; i < from.fGlyphCount; ++i)
++        fCharIndices[fGlyphCount+i] = from.fCharIndices[i] + nLargestIndex;
++
++    fAuxData = (le_uint32 *)LE_GROW_ARRAY(fAuxData, newGlyphCount);
++    LE_ARRAY_COPY(fAuxData+fGlyphCount, from.fAuxData, from.fGlyphCount);
++
++    fGlyphCount = newGlyphCount;
++}
++
++le_int32 MalayalamOpenTypeLayoutEngine::glyphPostProcessing(LEGlyphStorage &tempGlyphStorage, LEGlyphStorage &glyphStorage, LEErrorCode &success)
++{
++    if (LE_FAILURE(success)) {
++        return 0;
++    }
++
++    glyphStorage.appendGlyphStorage(tempGlyphStorage);
++
++    return glyphStorage.getGlyphCount();
++}
++
++
++le_int32 MalayalamOpenTypeLayoutEngine::computeGlyphs(const LEUnicode chars[], le_int32 offset, le_int32 count, le_int32 max, le_bool rightToLeft, LEGlyphStorage &glyphStorage, LEErrorCode &success)
++{
++    if (LE_FAILURE(success)) {
++        return 0;
++    }
++
++    if (chars == NULL || offset < 0 || count < 0 || max < 0 || offset >= max || offset + count > max) {
++        success = LE_ILLEGAL_ARGUMENT_ERROR;
++        return 0;
++    }
++
++    le_int32 outGlyphCount=0;
++
++    const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(fScriptCode);
++    le_int32 prev = 0;
++    while (prev < count)
++    { 
++        le_int32 outCharCount=0, fakeGlyphCount=0;
++        LEUnicode *outChars = NULL;
++        LEGlyphStorage fakeGlyphStorage;
++
++        le_int32 syllable = IndicReordering::findSyllable(classTable, chars+offset, prev, count);
++        outCharCount = characterProcessing(chars+prev, offset, syllable-prev, max, rightToLeft, outChars, fakeGlyphStorage, success);
++
++        if (LE_FAILURE(success)) {
++            return 0;
++        }
++
++        if (outChars != NULL) {
++            fakeGlyphCount = glyphProcessing(outChars, 0, outCharCount, outCharCount, rightToLeft, fakeGlyphStorage, success);
++            LE_DELETE_ARRAY(outChars); // FIXME: a subclass may have allocated this, in which case this delete might not work...
++        } else {
++            fakeGlyphCount = glyphProcessing(chars+prev, offset, syllable-prev, max, rightToLeft, fakeGlyphStorage, success);
++        }
++
++        if (LE_FAILURE(success)) {
++            return 0;
++        }
++
++        outGlyphCount = glyphPostProcessing(fakeGlyphStorage, glyphStorage, success);
++
++        prev = syllable;
++    }
++
++    return outGlyphCount;
++}
++
++U_NAMESPACE_END
+diff -ruN icu.orig/source/layout/MalayalamLayoutEngine.h icu/source/layout/MalayalamLayoutEngine.h
+--- icu.orig/source/layout/MalayalamLayoutEngine.h	1970-01-01 01:00:00.000000000 +0100
++++ icu/source/layout/MalayalamLayoutEngine.h	2007-04-27 10:39:52.000000000 +0100
+@@ -0,0 +1,41 @@
++
++/*
++ *
++ * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
++ *
++ */
++
++#ifndef __MALAYALAMLAYOUTENGINE_H
++#define __MALAYALAMLAYOUTENGINE_H
++
++#include "IndicLayoutEngine.h"
++
++U_NAMESPACE_BEGIN
++
++class MalayalamOpenTypeLayoutEngine : public IndicOpenTypeLayoutEngine
++{
++public:
++    MalayalamOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 languageCode,
++                            le_int32 typoFlags, const GlyphSubstitutionTableHeader *gsubTable) :
++        IndicOpenTypeLayoutEngine(fontInstance, scriptCode, languageCode, typoFlags, gsubTable)
++
++    {}
++
++    MalayalamOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 languageCode,
++			      le_int32 typoFlags) :
++        IndicOpenTypeLayoutEngine(fontInstance, scriptCode, languageCode, typoFlags)
++
++    {}
++
++    virtual UClassID getDynamicClassID() const;
++    static UClassID getStaticClassID();
++
++protected:
++    virtual le_int32 glyphPostProcessing(LEGlyphStorage &tempGlyphStorage, LEGlyphStorage &glyphStorage, LEErrorCode &success);
++
++    virtual le_int32 computeGlyphs(const LEUnicode chars[], le_int32 offset, le_int32 count, le_int32 max, le_bool rightToLeft, LEGlyphStorage &glyphStorage, LEErrorCode &success);
++};
++
++U_NAMESPACE_END
++#endif
++
diff --git a/icu.icuXXXX.rollbackabi.patch b/icu.icuXXXX.rollbackabi.patch
new file mode 100644
index 0000000..038d4b6
--- /dev/null
+++ b/icu.icuXXXX.rollbackabi.patch
@@ -0,0 +1,131 @@
+diff -ru icu.5691/source/common/ucnv2022.c icu/source/common/ucnv2022.c
+--- icu.5691/source/common/ucnv2022.c	2009-06-02 16:07:36.000000000 +0100
++++ icu/source/common/ucnv2022.c	2009-06-02 16:21:56.000000000 +0100
+@@ -3566,7 +3566,7 @@
+             /* include ASCII for JP */
+             sa->addRange(sa->set, 0, 0x7f);
+         }
+-        if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
++        if(cnvData->version==3 || cnvData->version==4) {
+             /*
+              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
+              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
+diff -ru icu.5691/source/common/ucnv_ext.c icu/source/common/ucnv_ext.c
+--- icu.5691/source/common/ucnv_ext.c	2009-06-02 16:07:36.000000000 +0100
++++ icu/source/common/ucnv_ext.c	2009-06-02 16:23:12.000000000 +0100
+@@ -1031,7 +1031,7 @@
+ 
+     stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
+ 
+-    useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
++    useFallback=(UBool)(FALSE);
+ 
+     /* enumerate the from-Unicode trie table */
+     c=0; /* keep track of the current code point while enumerating */
+diff -ru icu.5691/source/common/ucnvmbcs.c icu/source/common/ucnvmbcs.c
+--- icu.5691/source/common/ucnvmbcs.c	2009-06-02 16:07:36.000000000 +0100
++++ icu/source/common/ucnvmbcs.c	2009-06-02 16:23:50.000000000 +0100
+@@ -340,7 +340,7 @@
+ 
+ /* Miscellaneous ------------------------------------------------------------ */
+ 
+-#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
++/* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
+ 
+ /* similar to ucnv_MBCSGetNextUChar() but recursive */
+ static void
+@@ -434,8 +434,6 @@
+         pErrorCode);
+ }
+ 
+-#endif
+-
+ U_CFUNC void
+ ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+                                          const USetAdder *sa,
+@@ -511,7 +509,7 @@
+ 
+         bytes=mbcsTable->fromUnicodeBytes;
+ 
+-        useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
++        useFallback=(UBool)(FALSE);
+ 
+         switch(mbcsTable->outputType) {
+         case MBCS_OUTPUT_3:
+diff -ru icu.5691/source/common/ucnvmbcs.h icu/source/common/ucnvmbcs.h
+--- icu.5691/source/common/ucnvmbcs.h	2009-06-02 16:07:36.000000000 +0100
++++ icu/source/common/ucnvmbcs.h	2009-06-02 16:23:50.000000000 +0100
+@@ -363,7 +363,8 @@
+ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                           UErrorCode *pErrorCode);
+ 
+-#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
++/* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
++
+ /*
+  * Internal function returning a UnicodeSet for toUnicode() conversion.
+  * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
+@@ -378,7 +379,6 @@
+                            UConverterUnicodeSet which,
+                            uint8_t state, int32_t lowByte, int32_t highByte,
+                            UErrorCode *pErrorCode);
+-#endif
+ 
+ /*
+  * Internal function returning a UnicodeSet for toUnicode() conversion.
+diff -ru icu.5691/source/common/unicode/ucnv.h icu/source/common/unicode/ucnv.h
+--- icu.5691/source/common/unicode/ucnv.h	2009-06-02 16:07:32.000000000 +0100
++++ icu/source/common/unicode/ucnv.h	2009-06-02 16:20:18.000000000 +0100
+@@ -870,8 +870,6 @@
+ typedef enum UConverterUnicodeSet {
+     /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+     UCNV_ROUNDTRIP_SET,
+-    /** Select the set of Unicode code points with roundtrip or fallback mappings. @draft ICU 4.0 */
+-    UCNV_ROUNDTRIP_AND_FALLBACK_SET,
+     /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
+     UCNV_SET_COUNT
+ } UConverterUnicodeSet;
+@@ -880,16 +878,11 @@
+ /**
+  * Returns the set of Unicode code points that can be converted by an ICU converter.
+  *
+- * Returns one of several kinds of set:
+- *
+- * 1. UCNV_ROUNDTRIP_SET
+- *
++ * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET):
+  * The set of all Unicode code points that can be roundtrip-converted
+- * (converted without any data loss) with the converter (ucnv_fromUnicode()).
++ * (converted without any data loss) with the converter.
+  * This set will not include code points that have fallback mappings
+  * or are only the result of reverse fallback mappings.
+- * This set will also not include PUA code points with fallbacks, although
+- * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
+  * See UTR #22 "Character Mapping Markup Language"
+  * at http://www.unicode.org/reports/tr22/
+  *
+@@ -900,12 +893,6 @@
+  *   by comparing its roundtrip set with the set of ExemplarCharacters from
+  *   ICU's locale data or other sources
+  *
+- * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
+- *
+- * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
+- * when fallbacks are turned on (see ucnv_setFallback()).
+- * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
+- *
+  * In the future, there may be more UConverterUnicodeSet choices to select
+  * sets with different properties.
+  *
+diff -ru icu.5691/source/test/intltest/convtest.cpp icu/source/test/intltest/convtest.cpp
+--- icu.5691/source/test/intltest/convtest.cpp	2009-06-02 16:07:21.000000000 +0100
++++ icu/source/test/intltest/convtest.cpp	2009-06-02 16:24:08.000000000 +0100
+@@ -552,7 +552,7 @@
+         }
+         UConverterUnicodeSet which;
+         for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
+-            if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
++            if(FALSE) {
+                 ucnv_setFallback(cnv, TRUE);
+             }
+             expected.add(0, cpLimit-1);
diff --git a/icu.icuXXXX.virama.prevnext.patch b/icu.icuXXXX.virama.prevnext.patch
new file mode 100644
index 0000000..49393c2
--- /dev/null
+++ b/icu.icuXXXX.virama.prevnext.patch
@@ -0,0 +1,98 @@
+diff -ur icu.orig/source/common/rbbi.cpp icu/source/common/rbbi.cpp
+--- icu.orig/source/common/rbbi.cpp	2006-10-05 11:54:13.000000000 +0100
++++ icu/source/common/rbbi.cpp	2006-10-05 11:57:31.000000000 +0100
+@@ -879,6 +879,22 @@
+     RBBI_END        // state machine processing is after end of user text.
+ };
+ 
++#define VIRAMA_SCRIPT(wc)        ((wc) >= 0x0901 && (wc) <= 0x17FF)
++#define VIRAMA(wc) ((wc) == 0x094D || \
++                    (wc) == 0x09CD || \
++                    (wc) == 0x0A4D || \
++                    (wc) == 0x0ACD || \
++                    (wc) == 0x0B4D || \
++                    (wc) == 0x0BCD || \
++                    (wc) == 0x0C4D || \
++                    (wc) == 0x0CCD || \
++                    (wc) == 0x0D4D || \
++                    (wc) == 0x0DCA || \
++                    (wc) == 0x0E3A || \
++                    (wc) == 0x0F84 || \
++                    (wc) == 0x1039 || \
++                    (wc) == 0x17D2 || \
++                    (wc) == 0x200D)
+ 
+ //-----------------------------------------------------------------------------------
+ //
+@@ -896,6 +911,7 @@
+     RBBIRunMode         mode;
+     
+     RBBIStateTableRow  *row;
++    UChar32             prevchar;
+     UChar32             c;
+     int32_t             lookaheadStatus = 0;
+     int32_t             lookaheadTagIdx = 0;
+@@ -919,6 +935,7 @@
+     // if we're already at the end of the text, return DONE.
+     initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText); 
+     result          = initialPosition;
++    prevchar        = 0;
+     c               = UTEXT_NEXT32(fText);
+     if (fData == NULL || c==U_SENTINEL) {
+         return BreakIterator::DONE;
+@@ -1001,6 +1018,11 @@
+ 
+         // State Transition - move machine to its next state
+         //
++        if (VIRAMA_SCRIPT(c) && VIRAMA(prevchar))
++        {
++                state = START_STATE;
++                row = (RBBIStateTableRow *) (tableData + tableRowLen * state);
++        }
+         state = row->fNextState[category];
+         row = (RBBIStateTableRow *)
+             // (statetable->fTableData + (statetable->fRowLen * state));
+@@ -1059,6 +1081,7 @@
+         //    the input position.  The next iteration will be processing the
+         //    first real input character.
+         if (mode == RBBI_RUN) {
++            prevchar = c;
+             c = UTEXT_NEXT32(fText);
+         } else {
+             if (mode == RBBI_START) {
+@@ -1107,6 +1130,7 @@
+     int16_t             category        = 0;
+     RBBIRunMode         mode;
+     RBBIStateTableRow  *row;
++    UChar32             prevchar;
+     UChar32             c;
+     int32_t             lookaheadStatus = 0;
+     int32_t             result          = 0;
+@@ -1135,6 +1159,7 @@
+     //  Set up the starting char.
+     initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
+     result          = initialPosition;
++    prevchar        = 0;
+     c               = UTEXT_PREVIOUS32(fText);
+ 
+     //  Set the initial state for the state machine
+@@ -1218,6 +1243,11 @@
+ 
+         // State Transition - move machine to its next state
+         //
++	if (VIRAMA_SCRIPT(prevchar) && VIRAMA(c))
++        {
++                state = START_STATE;
++                row = (RBBIStateTableRow *) (statetable->fTableData + (statetable->fRowLen * state));
++        }
+         state = row->fNextState[category];
+         row = (RBBIStateTableRow *)
+             (statetable->fTableData + (statetable->fRowLen * state));
+@@ -1269,6 +1299,7 @@
+         //    the input position.  The next iteration will be processing the
+         //    first real input character.
+         if (mode == RBBI_RUN) {
++            prevchar = c;
+             c = UTEXT_PREVIOUS32(fText);
+         } else {            
+             if (mode == RBBI_START) {
diff --git a/icu.rh429023.regexp.patch b/icu.rh429023.regexp.patch
new file mode 100644
index 0000000..ef8eded
--- /dev/null
+++ b/icu.rh429023.regexp.patch
@@ -0,0 +1,307 @@
+diff -ru icu.orig/source/common/uvectr32.cpp icu/source/common/uvectr32.cpp
+--- icu.orig/source/common/uvectr32.cpp	2003-08-27 02:01:30.000000000 +0100
++++ icu/source/common/uvectr32.cpp	2008-01-22 08:37:06.000000000 +0000
+@@ -1,6 +1,6 @@
+ /*
+ ******************************************************************************
+-* Copyright (C) 1999-2003, International Business Machines Corporation and   *
++* Copyright (C) 1999-2008, International Business Machines Corporation and   *
+ * others. All Rights Reserved.                                               *
+ ******************************************************************************
+ *   Date        Name        Description
+@@ -26,6 +26,7 @@
+ UVector32::UVector32(UErrorCode &status) :
+     count(0),
+     capacity(0),
++    maxCapacity(0),
+     elements(NULL)
+ {
+     _init(DEFUALT_CAPACITY, status);
+@@ -34,6 +35,7 @@
+ UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) :
+     count(0),
+     capacity(0),
++    maxCapacity(0),
+     elements(0)
+ {
+     _init(initialCapacity, status);
+@@ -46,6 +48,9 @@
+     if (initialCapacity < 1) {
+         initialCapacity = DEFUALT_CAPACITY;
+     }
++    if (maxCapacity>0 && maxCapacity<initialCapacity) {
++        initialCapacity = maxCapacity;
++    }
+     elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity);
+     if (elements == 0) {
+         status = U_MEMORY_ALLOCATION_ERROR;
+@@ -189,21 +194,35 @@
+ UBool UVector32::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
+     if (capacity >= minimumCapacity) {
+         return TRUE;
+-    } else {
+-        int32_t newCap = capacity * 2;
+-        if (newCap < minimumCapacity) {
+-            newCap = minimumCapacity;
+-        }
+-        int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap);
+-        if (newElems == 0) {
+-            status = U_MEMORY_ALLOCATION_ERROR;
+-            return FALSE;
+-        }
+-        uprv_memcpy(newElems, elements, sizeof(elements[0]) * count);
+-        uprv_free(elements);
+-        elements = newElems;
+-        capacity = newCap;
+-        return TRUE;
++    }
++    if (maxCapacity>0 && minimumCapacity>maxCapacity) {
++        status = U_BUFFER_OVERFLOW_ERROR;
++        return FALSE;
++    }
++    int32_t newCap = capacity * 2;
++    if (newCap < minimumCapacity) {
++        newCap = minimumCapacity;
++    }
++    if (maxCapacity > 0 && newCap > maxCapacity) {
++        newCap = maxCapacity;
++    }
++    int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap);
++    if (newElems == 0) {
++        status = U_MEMORY_ALLOCATION_ERROR;
++        return FALSE;
++    }
++    uprv_memcpy(newElems, elements, sizeof(elements[0]) * count);
++    uprv_free(elements);
++    elements = newElems;
++    capacity = newCap;
++    return TRUE;
++}
++
++void UVector32::setMaxCapacity(int32_t limit) {
++    U_ASSERT(limit >= 0);
++    maxCapacity = limit;
++    if (maxCapacity < 0) {
++        maxCapacity = 0;
+     }
+ }
+ 
+diff -ru icu.orig/source/common/uvectr32.h icu/source/common/uvectr32.h
+--- icu.orig/source/common/uvectr32.h	2006-01-18 03:52:04.000000000 +0000
++++ icu/source/common/uvectr32.h	2008-01-22 08:37:07.000000000 +0000
+@@ -1,6 +1,6 @@
+ /*
+ **********************************************************************
+-*   Copyright (C) 1999-2006, International Business Machines
++*   Copyright (C) 1999-2008, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+@@ -61,6 +61,8 @@
+     int32_t   count;
+ 
+     int32_t   capacity;
++    
++    int32_t   maxCapacity;   // Limit beyond which capacity is not permitted to grow.
+ 
+     int32_t*  elements;
+ 
+@@ -162,6 +164,14 @@
+     int32_t *getBuffer() const;
+ 
+     /**
++     * Set the maximum allowed buffer capacity for this vector/stack.
++     * Default with no limit set is unlimited, go until malloc() fails.
++     * A Limit of zero means unlimited capacity.
++     * Units are vector elements (32 bits each), not bytes.
++     */
++    void setMaxCapacity(int32_t limit);
++
++    /**
+      * ICU "poor man's RTTI", returns a UClassID for this class.
+      */
+     static UClassID U_EXPORT2 getStaticClassID();
+@@ -221,7 +231,9 @@
+ }
+ 
+ inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) {
+-    ensureCapacity(count+size, status);
++    if (ensureCapacity(count+size, status) == FALSE) {
++        return NULL;
++    }
+     int32_t  *rp = elements+count;
+     count += size;
+     return rp;
+diff -ru icu.orig/source/i18n/regexcmp.cpp icu/source/i18n/regexcmp.cpp
+--- icu.orig/source/i18n/regexcmp.cpp	2006-02-02 04:37:14.000000000 +0000
++++ icu/source/i18n/regexcmp.cpp	2008-01-22 08:37:06.000000000 +0000
+@@ -1187,14 +1187,17 @@
+             // Because capture groups can be forward-referenced by back-references,
+             //  we fill the operand with the capture group number.  At the end
+             //  of compilation, it will be changed to the variable's location.
+-            U_ASSERT(groupNum > 0);
+-            int32_t  op;
+-            if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
+-                op = URX_BUILD(URX_BACKREF_I, groupNum);
++            if (groupNum < 1) { 
++                error(U_REGEX_INVALID_BACK_REF);
+             } else {
+-                op = URX_BUILD(URX_BACKREF, groupNum);
++                int32_t  op;
++                if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
++                    op = URX_BUILD(URX_BACKREF_I, groupNum);
++                } else {
++                    op = URX_BUILD(URX_BACKREF, groupNum);
++                }
++                fRXPat->fCompiledPat->addElement(op, *fStatus);
+             }
+-            fRXPat->fCompiledPat->addElement(op, *fStatus);
+         }
+         break;
+ 
+diff -ru icu.orig/source/i18n/rematch.cpp icu/source/i18n/rematch.cpp
+--- icu.orig/source/i18n/rematch.cpp	2005-08-25 19:02:20.000000000 +0100
++++ icu/source/i18n/rematch.cpp	2008-01-22 08:37:44.000000000 +0000
+@@ -30,6 +30,15 @@
+ 
+ U_NAMESPACE_BEGIN
+ 
++// Limit the size of the back track stack, to avoid system failures caused
++//   by heap exhaustion.  Units are in 32 bit words, not bytes.
++// This value puts ICU's limits higher than most other regexp implementations,
++//  which use recursion rather than the heap, and take more storage per
++//  backtrack point.
++// This constant is _temporary_.  Proper API to control the value will added.
++//
++static const int32_t BACKTRACK_STACK_CAPACITY = 8000000;
++
+ //-----------------------------------------------------------------------------
+ //
+ //   Constructor and Destructor
+@@ -53,6 +62,8 @@
+     }
+     if (fStack == NULL || fData == NULL) {
+         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
++    } else {
++        fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
+     }
+         
+     reset(*RegexStaticSets::gStaticSets->fEmptyString);
+@@ -78,6 +89,8 @@
+     }
+     if (fStack == NULL || fData == NULL) {
+         status = U_MEMORY_ALLOCATION_ERROR;
++    } else {
++        fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
+     }
+     reset(input);
+ }
+@@ -102,6 +115,8 @@
+     }
+     if (fStack == NULL || fData == NULL) {
+         status = U_MEMORY_ALLOCATION_ERROR;
++    } else {
++        fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
+     }
+     reset(*RegexStaticSets::gStaticSets->fEmptyString);
+ }
+@@ -1015,6 +1030,14 @@
+ inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, int32_t frameSize, UErrorCode &status) {
+     // push storage for a new frame. 
+     int32_t *newFP = fStack->reserveBlock(frameSize, status);
++    if (newFP == NULL) {
++        // Heap allocation error on attempted stack expansion.
++        // We need to return a writable stack frame, so just return the
++        //    previous frame.  The match operation will stop quickly
++        //    becuase of the error status, after which the frame will never
++        //    be looked at again.
++        return fp;
++    }
+     fp = (REStackFrame *)(newFP - frameSize);  // in case of realloc of stack.
+     
+     // New stack frame = copy of old top frame.
+@@ -1030,8 +1053,8 @@
+     fp->fPatIdx = savePatIdx;
+     return (REStackFrame *)newFP;
+ }
+-    
+-            
++
++
+ //--------------------------------------------------------------------------------
+ //
+ //   MatchAt      This is the actual matching engine.
+@@ -2262,6 +2285,7 @@
+         }
+ 
+         if (U_FAILURE(status)) {
++            isMatch = FALSE;
+             break;
+         }
+     }
+diff -ru icu.orig/source/test/intltest/regextst.cpp icu/source/test/intltest/regextst.cpp
+--- icu.orig/source/test/intltest/regextst.cpp	2005-07-05 19:39:00.000000000 +0100
++++ icu/source/test/intltest/regextst.cpp	2008-01-22 08:38:21.000000000 +0000
+@@ -66,6 +66,10 @@
+         case 6: name = "PerlTests";
+             if (exec) PerlTests();
+             break;
++        case 7: name = "Bug 6149";
++            if (exec) Bug6149();
++            break;
++            
+ 
+ 
+         default: name = "";
+@@ -1637,6 +1641,13 @@
+     // UnicodeSet containing a string
+     REGEX_ERR("abc[{def}]xyz", 1, 10, U_REGEX_SET_CONTAINS_STRING);
+ 
++    
++    // Invalid Back Reference \0
++    //    For ICU 3.8 and earlier
++    //    For ICU versions newer than 3.8, \0 introduces an octal escape.
++    //
++    REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_INVALID_BACK_REF);
++
+ }
+ 
+ 
+@@ -2119,6 +2130,26 @@
+ }
+ 
+ 
++//--------------------------------------------------------------
++//
++//  Bug6149   Verify limits to heap expansion for backtrack stack.
++//             Use this pattern,
++//                 "(a?){1,}"
++//             The zero-length match will repeat forever.
++//                (That this goes into a loop is another bug)
++//
++//---------------------------------------------------------------
++void RegexTest::Bug6149() {
++    UnicodeString pattern("(a?){1,}");
++    UnicodeString s("xyz");
++    uint32_t flags = 0;
++    UErrorCode status = U_ZERO_ERROR;
++    
++    RegexMatcher  matcher(pattern, s, flags, status);
++    UBool result = false;
++    REGEX_ASSERT_FAIL(result=matcher.matches(status), U_BUFFER_OVERFLOW_ERROR);
++    REGEX_ASSERT(result == FALSE);
++ }
+ 
+ #endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
+ 
+diff -ru icu.orig/source/test/intltest/regextst.h icu/source/test/intltest/regextst.h
+--- icu.orig/source/test/intltest/regextst.h	2003-12-03 06:58:28.000000000 +0000
++++ icu/source/test/intltest/regextst.h	2008-01-22 08:37:06.000000000 +0000
+@@ -30,6 +30,7 @@
+     virtual void Extended();
+     virtual void Errors();
+     virtual void PerlTests();
++    virtual void Bug6149();
+ 
+     // The following functions are internal to the regexp tests.
+     virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int line);
author	Remi Collet <fedora@famillecollet.com>	2013-03-20 10:29:29 +0100
committer	Remi Collet <fedora@famillecollet.com>	2013-03-20 10:29:29 +0100
commit	6deac027c98f5d99e1805f9ddc21ff2dbebe0fb7 (patch)
tree	008990c48199f2d517fc9b1a4b47c6b162ec30ef