Add code support for ICU.

diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c
index b86a547..0f41df9 100644
--- a/third_party/libxml/encoding.c
+++ b/third_party/libxml/encoding.c
@@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
 static int xmlCharEncodingAliasesNb = 0;
 static int xmlCharEncodingAliasesMax = 0;
 
-#ifdef LIBXML_ICONV_ENABLED
+#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
 #if 0
 #define DEBUG_ENCODING  /* Define this to get encoding traces */
 #endif
@@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
 }
 
+#ifdef LIBXML_ICU_ENABLED
+static uconv_t* 
+openIcuConverter(const char* name, int toUnicode)
+{
+  UErrorCode status = U_ZERO_ERROR;
+  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
+  if (conv == NULL)
+    return NULL;
+
+  conv->uconv = ucnv_open(name, &status);
+  if (U_FAILURE(status))
+    goto error;
+
+  status = U_ZERO_ERROR;
+  if (toUnicode) {
+    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 
+                        NULL, NULL, NULL, &status);
+  }
+  else {
+    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 
+                        NULL, NULL, NULL, &status);
+  }
+  if (U_FAILURE(status))
+    goto error;
+
+  status = U_ZERO_ERROR;
+  conv->utf8 = ucnv_open("UTF-8", &status);
+  if (U_SUCCESS(status))
+    return conv;
+
+error:
+  if (conv->uconv) 
+    ucnv_close(conv->uconv);
+  xmlFree(conv);
+  return NULL;
+}
+
+static void
+closeIcuConverter(uconv_t *conv)
+{
+  if (conv != NULL) {
+    ucnv_close(conv->uconv);
+    ucnv_close(conv->utf8);
+    xmlFree(conv);
+  }
+}
+#endif /* LIBXML_ICU_ENABLED */
+
 /************************************************************************
  *									*
  *		Conversions To/From UTF8 encoding			*
@@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name,
 #ifdef LIBXML_ICONV_ENABLED
     handler->iconv_in = NULL;
     handler->iconv_out = NULL;
-#endif /* LIBXML_ICONV_ENABLED */
+#endif
+#ifdef LIBXML_ICU_ENABLED
+    handler->uconv_in = NULL;
+    handler->uconv_out = NULL;
+#endif
 
     /*
      * registers and returns the handler.
@@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) {
     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
 #endif /* LIBXML_OUTPUT_ENABLED */
-#ifndef LIBXML_ICONV_ENABLED
+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
 #ifdef LIBXML_ISO8859X_ENABLED
     xmlRegisterCharEncodingHandlersISO8859x ();
 #endif
@@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) {
     xmlCharEncodingHandlerPtr enc;
     iconv_t icv_in, icv_out;
 #endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    xmlCharEncodingHandlerPtr enc;
+    uconv_t *ucv_in, *ucv_out;
+#endif /* LIBXML_ICU_ENABLED */
     char upper[100];
     int i;
 
@@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) {
 		    "iconv : problems with filters for '%s'\n", name);
     }
 #endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    /* check whether icu can handle this */
+    ucv_in = openIcuConverter(name, 1);
+    ucv_out = openIcuConverter(name, 0);
+    if (ucv_in != NULL && ucv_out != NULL) {
+	    enc = (xmlCharEncodingHandlerPtr)
+	          xmlMalloc(sizeof(xmlCharEncodingHandler));
+	    if (enc == NULL) {
+                closeIcuConverter(ucv_in);
+                closeIcuConverter(ucv_out);
+		return(NULL);
+	    }
+	    enc->name = xmlMemStrdup(name);
+	    enc->input = NULL;
+	    enc->output = NULL;
+	    enc->uconv_in = ucv_in;
+	    enc->uconv_out = ucv_out;
+#ifdef DEBUG_ENCODING
+            xmlGenericError(xmlGenericErrorContext,
+		    "Found ICU converter handler for encoding %s\n", name);
+#endif
+	    return enc;
+    } else if (ucv_in != NULL || ucv_out != NULL) {
+            closeIcuConverter(ucv_in);
+            closeIcuConverter(ucv_out);
+	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
+		    "ICU converter : problems with filters for '%s'\n", name);
+    }
+#endif /* LIBXML_ICU_ENABLED */
 
 #ifdef DEBUG_ENCODING
     xmlGenericError(xmlGenericErrorContext,
@@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
 
 /************************************************************************
  *									*
+ *		ICU based generic conversion functions	         	*
+ *									*
+ ************************************************************************/
+
+#ifdef LIBXML_ICU_ENABLED
+/**
+ * xmlUconvWrapper:
+ * @cd: ICU uconverter data structure
+ * @toUnicode : non-zero if toUnicode. 0 otherwise.
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of ISO Latin 1 chars
+ * @inlen:  the length of @in
+ *
+ * Returns 0 if success, or 
+ *     -1 by lack of space, or
+ *     -2 if the transcoding fails (for *in is not valid utf8 string or
+ *        the result of transformation can't fit into the encoding we want), or
+ *     -3 if there the last byte can't form a single output char.
+ *     
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictable.
+ * The value of @outlen after return is the number of ocetes consumed.
+ */
+static int
+xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
+                const unsigned char *in, int *inlen) {
+    const char *ucv_in = (const char *) in;
+    char *ucv_out = (char *) out;
+    UErrorCode err = U_ZERO_ERROR;
+
+    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
+        if (outlen != NULL) *outlen = 0;
+        return(-1);
+    }
+
+    /* 
+     * TODO(jungshik)
+     * 1. is ucnv_convert(To|From)Algorithmic better?
+     * 2. had we better use an explicit pivot buffer?
+     * 3. error returned comes from 'fromUnicode' only even
+     *    when toUnicode is true !
+     */
+    if (toUnicode) {
+        /* encoding => UTF-16 => UTF-8 */
+        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
+                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
+                       0, TRUE, &err);
+    } else {
+        /* UTF-8 => UTF-16 => encoding */
+        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
+                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
+                       0, TRUE, &err);
+    }
+    *inlen = ucv_in - (const char*) in; 
+    *outlen = ucv_out - (char *) out;
+    if (U_SUCCESS(err))
+        return 0;
+    if (err == U_BUFFER_OVERFLOW_ERROR)
+        return -1;
+    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
+        return -2;
+    /* if (err == U_TRUNCATED_CHAR_FOUND) */
+    return -3;
+}
+#endif /* LIBXML_ICU_ENABLED */
+
+/************************************************************************
+ *									*
  *		The real API used by libxml for on-the-fly conversion	*
  *									*
  ************************************************************************/
@@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
 	if (ret == -1) ret = -3;
     }
 #endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    else if (handler->uconv_in != NULL) {
+	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
+	                      &written, in->content, &toconv);
+	xmlBufferShrink(in, toconv);
+	out->use += written;
+	out->content[out->use] = 0;
+	if (ret == -1) ret = -3;
+    }
+#endif /* LIBXML_ICU_ENABLED */
 #ifdef DEBUG_ENCODING
     switch (ret) {
         case 0:
@@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
             ret = -3;
     }
 #endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    else if (handler->uconv_in != NULL) {
+        ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
+                              &written, in->content, &toconv);
+        xmlBufferShrink(in, toconv);
+        out->use += written;
+        out->content[out->use] = 0;
+        if (ret == -1)
+            ret = -3;
+    }
+#endif /* LIBXML_ICU_ENABLED */
     switch (ret) {
         case 0:
 #ifdef DEBUG_ENCODING
@@ -2015,6 +2190,15 @@ retry:
 	    out->content[out->use] = 0;
 	}
 #endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+	else if (handler->uconv_out != NULL) {
+	    ret = xmlUconvWrapper(handler->uconv_out, 0,
+                              &out->content[out->use],
+ 				              &written, NULL, &toconv);
+	    out->use += written;
+	    out->content[out->use] = 0;
+	}
+#endif /* LIBXML_ICU_ENABLED */
 #ifdef DEBUG_ENCODING
 	xmlGenericError(xmlGenericErrorContext,
 		"initialized encoder\n");
@@ -2061,6 +2245,26 @@ retry:
 	}
     }
 #endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    else if (handler->uconv_out != NULL) {
+	ret = xmlUconvWrapper(handler->uconv_out, 0,
+                              &out->content[out->use],
+	                      &written, in->content, &toconv);
+	xmlBufferShrink(in, toconv);
+	out->use += written;
+	writtentot += written;
+	out->content[out->use] = 0;
+	if (ret == -1) {
+	    if (written > 0) {
+		/*
+		 * Can be a limitation of iconv
+		 */
+		goto retry;
+	    }
+	    ret = -3;
+	}
+    }
+#endif /* LIBXML_ICU_ENABLED */
     else {
 	xmlEncodingErr(XML_I18N_NO_OUTPUT,
 		       "xmlCharEncOutFunc: no output function !\n", NULL);
@@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
 	xmlFree(handler);
     }
 #endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
+	if (handler->name != NULL)
+	    xmlFree(handler->name);
+	handler->name = NULL;
+	if (handler->uconv_out != NULL) {
+	    closeIcuConverter(handler->uconv_out);
+	    handler->uconv_out = NULL;
+	}
+	if (handler->uconv_in != NULL) {
+	    closeIcuConverter(handler->uconv_in);
+	    handler->uconv_in = NULL;
+	}
+	xmlFree(handler);
+    }
+#endif
 #ifdef DEBUG_ENCODING
     if (ret)
         xmlGenericError(xmlGenericErrorContext,
@@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
 		    cur += toconv;
 		} while (ret == -2);
 #endif
+#ifdef LIBXML_ICU_ENABLED
+	    } else if (handler->uconv_out != NULL) {
+	        do {
+		    toconv = in->end - cur;
+		    written = 32000;
+		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
+	                      &written, cur, &toconv);
+		    if (ret < 0) {
+		        if (written > 0)
+			    ret = -2;
+			else
+			    return(-1);
+		    }
+		    unused += written;
+		    cur += toconv;
+		} while (ret == -2);
             } else {
 	        /* could not find a converter */
 	        return(-1);
@@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
     }
     return(in->consumed + (in->cur - in->base));
 }
+#endif
 
-#ifndef LIBXML_ICONV_ENABLED
+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
 #ifdef LIBXML_ISO8859X_ENABLED
 
 /**
diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/include/libxml/encoding.h
index c74b25f..b5f8b48 100644
--- a/third_party/libxml/include/libxml/encoding.h
+++ b/third_party/libxml/include/libxml/encoding.h
@@ -26,6 +26,24 @@
 
 #ifdef LIBXML_ICONV_ENABLED
 #include <iconv.h>
+#else 
+#ifdef LIBXML_ICU_ENABLED
+#include <unicode/ucnv.h>
+#if 0
+/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h>
+ * to prevent unwanted ICU symbols being exposed to users of libxml2.
+ * One particular case is Qt4 conflicting on UChar32.
+ */
+#include <stdint.h>
+struct UConverter;
+typedef struct UConverter UConverter;
+#ifdef _MSC_VER
+typedef wchar_t UChar;
+#else
+typedef uint16_t UChar;
+#endif
+#endif
+#endif
 #endif
 #ifdef __cplusplus
 extern "C" {
@@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
  * Block defining the handlers for non UTF-8 encodings.
  * If iconv is supported, there are two extra fields.
  */
+#ifdef LIBXML_ICU_ENABLED
+struct _uconv_t {
+  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
+  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
+};
+typedef struct _uconv_t uconv_t;
+#endif
 
 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
@@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler {
     iconv_t                    iconv_in;
     iconv_t                    iconv_out;
 #endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    uconv_t                    *uconv_in;
+    uconv_t                    *uconv_out;
+#endif /* LIBXML_ICU_ENABLED */
 };
 
 #ifdef __cplusplus
diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/include/libxml/parser.h
index dd79c42..3580b63 100644
--- a/third_party/libxml/include/libxml/parser.h
+++ b/third_party/libxml/include/libxml/parser.h
@@ -1222,6 +1222,7 @@ typedef enum {
     XML_WITH_DEBUG_MEM = 29,
     XML_WITH_DEBUG_RUN = 30,
     XML_WITH_ZLIB = 31,
+    XML_WITH_ICU = 32,
     XML_WITH_NONE = 99999 /* just to be sure of allocation size */
 } xmlFeature;
 
diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/libxml/include/libxml/xmlversion.h.in
index 4739f3a..de310ab 100644
--- a/third_party/libxml/include/libxml/xmlversion.h.in
+++ b/third_party/libxml/include/libxml/xmlversion.h.in
@@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 #endif
 
 /**
+ * LIBXML_ICU_ENABLED:
+ *
+ * Whether icu support is available
+ */
+#if @WITH_ICU@
+#define LIBXML_ICU_ENABLED
+#endif
+
+/**
  * LIBXML_ISO8859X_ENABLED:
  *
  * Whether ISO-8859-* support is made available in case iconv is not
diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c
index 85e7599..3ba2a06 100644
--- a/third_party/libxml/parser.c
+++ b/third_party/libxml/parser.c
@@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature)
 #else
             return(0);
 #endif
+        case XML_WITH_ICU:
+#ifdef LIBXML_ICU_ENABLED
+            return(1);
+#else
+            return(0);
+#endif
         default:
 	    break;
      }