ICU-5018 charset conversion support

X-SVN-Rev: 20172
2025-04-06 22:15:31 +00:00 · 2006-08-26 05:30:49 +00:00 · 2006-08-26 05:30:49 +00:00 · e33252c102
commit e33252c102
parent d2841a5885
21 changed files with 9915 additions and 2 deletions
--- a/icu4j/build.xml
+++ b/icu4j/build.xml
@ -177,7 +177,7 @@

    <!-- core does not build richedit or tests -->
    <target name="core" depends="init,coreData,icudata" description="build core classes and data">
-        <javac includes="com/ibm/icu/util/**/*.java,com/ibm/icu/text/**/*.java,com/ibm/icu/math/**/*.java,com/ibm/icu/impl/**/*.java,com/ibm/icu/lang/*.java"
+        <javac includes="com/ibm/icu/util/**/*.java,com/ibm/icu/text/**/*.java,com/ibm/icu/math/**/*.java,com/ibm/icu/impl/**/*.java,com/ibm/icu/lang/*.java,com/ibm/icu/charset/**/*.java"
            excludes="**/CVS/**/*"
            srcdir="${src.dir}"
            destdir="${build.dir}"
@ -431,9 +431,12 @@
    <target name="jarRelease" depends="jar,jarSrc,jarDocs"/>

    <target name="jar" depends="core,indices" description="build full 'icu4j.jar' jar file">
+        <copy todir="${build.dir}/META-INF">
+          <fileset dir="${src.dir}/META-INF" includes="**/*"/>
+        </copy>
        <jar jarfile="${jar.file}"
            compress="true"
-            includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*"
+            includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*,META-INF/services/*"
            basedir="${build.dir}"
            manifest="${icu4j.manifest}"/>
    </target>
--- a/icu4j/src/META-INF/services/java.nio.charset.spi.CharsetProvider
+++ b/icu4j/src/META-INF/services/java.nio.charset.spi.CharsetProvider
@ -0,0 +1,3 @@
+# Copyright (C) 2006, International Business Machines Corporation and others. All Rights Reserved.   
+# icu4j converters
+com.ibm.icu.charset.CharsetProviderICU
--- a/icu4j/src/com/ibm/icu/charset/CharsetCallback.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetCallback.java
@ -0,0 +1,158 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CoderResult;
+
+
+/*public*/ class CharsetCallback {
+    /**
+     * FROM_U, TO_U context options for sub callback
+     * @draft ICU 3.6
+     */
+    /*public*/ static final String SUB_STOP_ON_ILLEGAL = "i";
+
+    /**
+     * FROM_U, TO_U context options for skip callback
+     * @draft ICU 3.6
+     */
+    /*public*/ static final String SKIP_STOP_ON_ILLEGAL = "i";
+
+    /**
+     * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 
+     * @draft ICU 3.6
+     */
+    /*public*/ static final String ESCAPE_ICU  = null;
+    /**
+     * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+     * @draft ICU 3.6
+     */
+    /*public*/ static final String ESCAPE_JAVA     =  "J";
+    /**
+     * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+     * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+     * @draft ICU 3.6
+     */
+    /*public*/ static final String ESCAPE_C        = "C";
+    /**
+     * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+     * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+     * @draft ICU 3.6
+     */
+    /*public*/ static final String ESCAPE_XML_DEC  = "D";
+    /**
+     * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+     * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+     * @draft ICU 3.6
+     */
+    /*public*/ static final String ESCAPE_XML_HEX  = "X";
+    /**
+     * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
+     * @draft ICU 3.6
+     */
+    /*public*/ static final String ESCAPE_UNICODE  = "U";
+
+    public interface Decoder {
+        public CoderResult call(CharsetDecoderICU decoder, Object context, 
+                                ByteBuffer source, CharBuffer target, IntBuffer offsets,
+                                char[] buffer, int length, CoderResult cr);
+    }
+
+    public interface Encoder {
+        public CoderResult call(CharsetEncoderICU encoder, Object context, 
+                                CharBuffer source, ByteBuffer target, IntBuffer offsets, 
+                                char[] buffer, int length, int cp, CoderResult cr);
+    }    
+    public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() {
+        public CoderResult call(CharsetEncoderICU encoder, Object context, 
+                CharBuffer source, ByteBuffer target, IntBuffer offsets, 
+                char[] buffer, int length, int cp, CoderResult cr){
+            if(context==null){
+                return CoderResult.UNDERFLOW;
+            }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+                if(!cr.isUnmappable()){
+                    return cr;
+                }else{
+                    return CoderResult.UNDERFLOW;
+                }
+            }
+            return cr;
+        }
+    };
+    public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() {
+        public CoderResult call(CharsetDecoderICU decoder, Object context, 
+                ByteBuffer source, CharBuffer target, IntBuffer offsets,
+                char[] buffer, int length, CoderResult cr){
+            if(context==null){
+                return CoderResult.UNDERFLOW;
+            }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+                if(!cr.isUnmappable()){
+                    return cr;
+                }else{
+                    return CoderResult.UNDERFLOW;
+                }
+            }
+            return cr;
+        }
+    };
+
+    public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){        
+        public CoderResult call(CharsetEncoderICU encoder, Object context, 
+                CharBuffer source, ByteBuffer target, IntBuffer offsets, 
+                char[] buffer, int length, int cp, CoderResult cr){
+            if(context==null){
+                return encoder.cbFromUWriteSub(encoder, source, target, offsets);
+            }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+                if(!cr.isUnmappable()){
+                    return cr;
+                }else{
+                   return encoder.cbFromUWriteSub(encoder, source, target, offsets);
+                }
+            }
+            return cr;
+        }
+    };
+
+    public static final Decoder TO_U_CALLBACK_SUBSTITUTE  = new Decoder() {
+        public CoderResult call(CharsetDecoderICU decoder, Object context, 
+                ByteBuffer source, CharBuffer target, IntBuffer offsets,
+                char[] buffer, int length, CoderResult cr){
+
+            if(context==null){
+                return decoder.cbToUWriteSub(decoder, source, target, offsets);
+            }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+                if(!cr.isUnmappable()){
+                    return cr;
+                }else{
+                   return decoder.cbToUWriteSub(decoder, source, target, offsets);
+                }
+            }
+            return cr;
+        }
+    };
+
+    public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() {
+        public CoderResult call(CharsetEncoderICU encoder, Object context, 
+                CharBuffer source, ByteBuffer target, IntBuffer offsets, 
+                char[] buffer, int length, int cp, CoderResult cr){
+            return cr;
+        }
+    };
+    public static final Decoder TO_U_CALLBACK_STOP = new Decoder() {
+        public CoderResult call(CharsetDecoderICU decoder, Object context, 
+                ByteBuffer source, CharBuffer target, IntBuffer offsets,
+                char[] buffer, int length, CoderResult cr){
+            return cr;
+        }
+    };  
+}
--- a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java
@ -0,0 +1,639 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.MalformedInputException;
+import java.nio.ByteBuffer;
+
+import com.ibm.icu.impl.Assert;
+
+public abstract class CharsetDecoderICU extends CharsetDecoder{ 
+
+    protected int    toUnicodeStatus;
+    protected byte[] toUBytesArray = new byte[128];
+    protected int    toUBytesBegin = 0;
+    protected int    toULength;
+    protected char[] charErrorBufferArray = new char[128];
+    protected int    charErrorBufferLength;
+    protected int    charErrorBufferBegin;
+    protected char[] invalidCharBuffer = new char[128];
+    protected int    invalidCharLength;
+    
+    /* store previous UChars/chars to continue partial matches */
+    protected byte[] preToUArray;
+    protected int    preToUBegin;
+    protected int    preToULength;       /* negative: replay */
+    protected int    preToUFirstLength;  /* length of first character */
+    
+    protected Object toUContext = null;
+    private CharsetCallback.Decoder onUnmappableInput = CharsetCallback.TO_U_CALLBACK_STOP;
+    private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;
+    protected CharsetCallback.Decoder toCharErrorBehaviour= new CharsetCallback.Decoder(){
+                                                                        public CoderResult call(CharsetDecoderICU decoder, Object context, 
+                                                                                                ByteBuffer source, CharBuffer target, IntBuffer offsets, 
+                                                                                                char[] buffer, int length, CoderResult cr) {
+                                                                            if(cr.isUnmappable()){
+                                                                                return onUnmappableInput.call(decoder, context, 
+                                                                                                               source, target, offsets, 
+                                                                                                               buffer, length, cr);
+                                                                            }else if(cr.isMalformed()){
+                                                                                return onMalformedInput.call(decoder, context, 
+                                                                                                             source, target, offsets, 
+                                                                                                             buffer, length, cr);    
+                                                                            }
+                                                                            return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, 
+                                                                                                                           source, target, offsets, 
+                                                                                                                           buffer, length, cr); 
+                                                                        }
+                                                                };
+                                                                
+    protected CharsetDecoderICU(CharsetICU cs) {
+        super(cs, (float) (1/(float)cs.maxCharsPerByte), cs.maxCharsPerByte);
+    }
+
+    
+    /**
+     * Sets the action to be taken if an illegal sequence is encountered
+     * @param newAction action to be taken
+     * @exception IllegalArgumentException
+     * @draft ICU 3.6
+     */
+    protected final void implOnMalformedInput(CodingErrorAction newAction) {
+        onMalformedInput = getCallback(newAction);
+    }
+    
+    /**
+     * Sets the action to be taken if an illegal sequence is encountered
+     * @param newAction action to be taken
+     * @exception IllegalArgumentException
+     * @draft ICU 3.6
+     */
+    protected final void implOnUnmappableCharacter(CodingErrorAction newAction) {
+        onUnmappableInput = getCallback(newAction);
+    }
+    private static CharsetCallback.Decoder getCallback(CodingErrorAction action){
+        if(action==CodingErrorAction.REPLACE){
+            return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE;
+        }else if(action==CodingErrorAction.IGNORE){
+            return CharsetCallback.TO_U_CALLBACK_SKIP;
+        }else if(action==CodingErrorAction.REPORT){
+            return CharsetCallback.TO_U_CALLBACK_STOP;
+        }
+        return CharsetCallback.TO_U_CALLBACK_STOP;
+    }
+    /**
+     * Flushes any characters saved in the converter's internal buffer and
+     * resets the converter.
+     * @param out action to be taken
+     * @return result of flushing action and completes the decoding all input. 
+     *         Returns CoderResult.UNDERFLOW if the action succeeds.
+     * @draft ICU 3.6
+     */
+    protected final CoderResult implFlush(CharBuffer out) {
+        return CoderResult.UNDERFLOW;
+    }
+    
+    /**
+     * Resets the to Unicode mode of converter
+     * @draft ICU 3.6
+     */
+    protected void implReset() {
+        toUnicodeStatus = 0 ;
+        toULength = 0;
+        charErrorBufferLength = 0;
+        charErrorBufferBegin = 0;
+        
+        /* store previous UChars/chars to continue partial matches */
+        preToUBegin = 0;
+        preToULength = 0;       /* negative: replay */
+        preToUFirstLength = 0; 
+    }
+      
+    /**
+     * Decodes one or more bytes. The default behaviour of the converter
+     * is stop and report if an error in input stream is encountered. 
+     * To set different behaviour use @see CharsetDecoder.onMalformedInput()
+     * This  method allows a buffer by buffer conversion of a data stream.  
+     * The state of the conversion is saved between calls to convert.  
+     * Among other things, this means multibyte input sequences can be 
+     * split between calls. If a call to convert results in an Error, the 
+     * conversion may be continued by calling convert again with suitably 
+     * modified parameters.All conversions should be finished with a call to 
+     * the flush method.
+     * @param in buffer to decode
+     * @param out buffer to populate with decoded result
+     * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+     *         action succeeds or more input is needed for completing the decoding action.
+     * @draft ICU 3.6
+     */
+    protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
+        if(!in.hasRemaining()){
+            return CoderResult.UNDERFLOW;
+        }
+        in.position(in.position()+toUCountPending());
+        /* do the conversion */
+        CoderResult ret = decode(in, out, null, false);
+
+        setSourcePosition(in);
+        return ret;
+	}
+    
+    /**
+     * Implements the ICU semantic for decode operation
+     * @param in
+     * @param out
+     * @return
+     */
+    protected abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets);
+    
+    /**
+     * Implements the ICU semantic for decode operation
+     * @param source
+     * @param target
+     * @param offsets
+     * @param flush
+     * @return
+     * @throws MalformedInputException
+     */
+    protected final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+    
+        /* check parameters */
+        if(target==null || source==null) {
+            throw new IllegalArgumentException();
+        }
+        /*
+         * Make sure that the buffer sizes do not exceed the number range for
+         * int32_t because some functions use the size (in units or bytes)
+         * rather than comparing pointers, and because offsets are int32_t values.
+         *
+         * size_t is guaranteed to be unsigned and large enough for the job.
+         *
+         * Return with an error instead of adjusting the limits because we would
+         * not be able to maintain the semantics that either the source must be
+         * consumed or the target filled (unless an error occurs).
+         * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+         */
+            /*agljport:fix
+        if(
+            ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
+            ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t)
+        ) {
+            *err=U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+            */
+        
+        /* flush the target overflow buffer */
+        if(charErrorBufferLength>0) {
+            char[] overflow = null;
+            int i, length;
+    
+            overflow=charErrorBufferArray;
+            length=charErrorBufferLength;
+            i=0;
+            do {
+                if(target.remaining()<0) {
+                    /* the overflow buffer contains too much, keep the rest */
+                    int j=0;
+    
+                    do {
+                        overflow[j++]=overflow[i++];
+                    } while(i<length);
+    
+                    charErrorBufferLength=(byte)j;
+                    return CoderResult.OVERFLOW;
+                }
+    
+                /* copy the overflow contents to the target */
+                target.put(overflow[i++]);
+                if(offsets!=null) {
+                    offsets.put(-1); /* no source index available for old output */
+                }
+            } while(i<length);
+    
+            /* the overflow buffer is completely copied to the target */
+            charErrorBufferLength=0;
+        }
+    
+        if(!flush && source.remaining()==0 && preToULength>=0) {
+            /* the overflow buffer is emptied and there is no new input: we are done */
+            return CoderResult.UNDERFLOW;
+        }
+    
+        /*
+         * Do not simply return with a buffer overflow error if
+         * !flush && t==targetLimit
+         * because it is possible that the source will not generate any output.
+         * For example, the skip callback may be called;
+         * it does not output anything.
+         */
+    
+        return toUnicodeWithCallback(source, target, offsets, flush);
+    }
+
+    /* maximum number of indexed bytes */
+    private static final int EXT_MAX_BYTES = 0x1f;
+    private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) {
+        int limit;
+        int delta, offset;
+
+        if(sourceIndex>=0) {
+            /*
+             * adjust each offset by adding the previous sourceIndex
+             * minus the length of the input sequence that caused an
+             * error, if any
+             */
+            delta=sourceIndex-errorInputLength;
+        } else {
+            /*
+             * set each offset to -1 because this conversion function
+             * does not handle offsets
+             */
+            delta=-1;
+        }
+        limit=offsets.position()+length;
+        if(delta==0) {
+            /* most common case, nothing to do */
+        } else if(delta>0) {
+            /* add the delta to each offset (but not if the offset is <0) */
+            while(offsets.position()<limit) {
+                offset=offsets.get(offsets.position());
+                if(offset>=0) {
+                    offsets.put(offset+delta);
+                }
+                //FIXME: ++offsets;
+            }
+        } else /* delta<0 */ {
+            /*
+             * set each offset to -1 because this conversion function
+             * does not handle offsets
+             * or the error input sequence started in a previous buffer
+             */
+            while(offsets.position()<limit) {
+                offsets.put(-1);
+            }
+        }
+    }
+    protected final CoderResult toUnicodeWithCallback(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+        
+        int sourceIndex;
+        int errorInputLength;
+        boolean converterSawEndOfInput, calledCallback;
+        int t=target.position();
+        int s=source.position();
+        /* variables for m:n conversion */
+        ByteBuffer replayArray = ByteBuffer.allocate(EXT_MAX_BYTES);
+        int replayArrayIndex = 0;
+            
+        ByteBuffer realSource=null;
+        boolean realFlush=false;
+        int realSourceIndex=0;
+    
+
+        CoderResult cr = CoderResult.UNDERFLOW;
+        
+        /* get the converter implementation function */
+        sourceIndex=0;
+
+        if(preToULength>=0) {
+            /* normal mode */
+        } else {
+            /*
+             * Previous m:n conversion stored source units from a partial match
+             * and failed to consume all of them.
+             * We need to "replay" them from a temporary buffer and convert them first.
+             */
+            realSource=source;
+            realFlush=flush;
+            realSourceIndex=sourceIndex;
+            //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
+            replayArray.put(preToUArray,0, -preToULength);
+            source=replayArray;
+            source.position(0);
+            source.limit(replayArrayIndex-preToULength);
+            flush=false;
+            sourceIndex=-1;
+            preToULength=0;
+        }
+    
+        /*
+         * loop for conversion and error handling
+         *
+         * loop {
+         *   convert
+         *   loop {
+         *     update offsets
+         *     handle end of input
+         *     handle errors/call callback
+         *   }
+         * }
+         */
+        for(;;) {
+            if(cr.isUnderflow()) {
+                /* convert */
+                cr = decodeLoop(source, target, offsets);
+    
+                /*
+                 * set a flag for whether the converter
+                 * successfully processed the end of the input
+                 *
+                 * need not check cnv->preToULength==0 because a replay (<0) will cause
+                 * s<sourceLimit before converterSawEndOfInput is checked
+                 */
+                converterSawEndOfInput= (cr.isUnderflow() && flush && source.remaining()==0 && toULength==0);
+            } else {
+                /* handle error from getNextUChar() */
+                converterSawEndOfInput=false;
+            }
+    
+            /* no callback called yet for this iteration */
+            calledCallback=false;
+    
+            /* no sourceIndex adjustment for conversion, only for callback output */
+            errorInputLength=0;
+    
+            /*
+             * loop for offsets and error handling
+             *
+             * iterates at most 3 times:
+             * 1. to clean up after the conversion function
+             * 2. after the callback
+             * 3. after the callback again if there was truncated input
+             */
+            for(;;) {
+                /* update offsets if we write any */
+                if(offsets!=null) {
+
+                    int length=(target.position()-t);
+                    if(length>0) {
+                        updateOffsets(offsets, length, sourceIndex, errorInputLength);
+    
+                                            
+                        /*
+                         * if a converter handles offsets and updates the offsets
+                         * pointer at the end, then pArgs->offset should not change
+                         * here;
+                         * however, some converters do not handle offsets at all
+                         * (sourceIndex<0) or may not update the offsets pointer
+                         */
+                        //TODO: pArgs->offsets=offsets+=length;
+                    }
+    
+                    if(sourceIndex>=0) {
+                        sourceIndex+=(source.position()-s);
+                    }
+                                    
+                }
+    
+                if(preToULength<0) {
+                    /*
+                     * switch the source to new replay units (cannot occur while replaying)
+                     * after offset handling and before end-of-input and callback handling
+                     */
+                    if(realSource==null)
+                                    {
+                        realSource=source;
+                        realFlush=flush;
+                        realSourceIndex=sourceIndex;
+    
+                        //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
+                        replayArray.put(preToUArray,0, -preToULength);
+
+                        source=replayArray;
+                        source.limit(replayArrayIndex-preToULength);
+                        flush=false;
+                        if((sourceIndex+=preToULength)<0) {
+                            sourceIndex=-1;
+                        }
+    
+                        preToULength=0;
+                    } else {
+                        /* see implementation note before _fromUnicodeWithCallback() */
+                        //agljport:todo U_ASSERT(realSource==NULL);
+                       Assert.assrt(realSource==null);
+                    }
+                }
+    
+                /* update pointers */
+                s=source.position();
+                t=target.position();
+    
+                if(cr.isUnderflow()) {
+                    if(s<source.limit())
+                                    {
+                        /*
+                         * continue with the conversion loop while there is still input left
+                         * (continue converting by breaking out of only the inner loop)
+                         */
+                        break;
+                    } else if(realSource!=null) {
+                        /* switch back from replaying to the real source and continue */
+                        source = realSource;
+                        flush=realFlush;
+                        sourceIndex=realSourceIndex;
+                        realSource=null;
+                        break;
+                    } else if(flush && toULength>0) {
+                        /*
+                         * the entire input stream is consumed
+                         * and there is a partial, truncated input sequence left
+                         */
+    
+                        /* inject an error and continue with callback handling */
+                        cr = CoderResult.malformedForLength(toULength);
+                        calledCallback=false; /* new error condition */
+                    } else {
+                        /* input consumed */
+                        if(flush) {
+                            /*
+                             * return to the conversion loop once more if the flush
+                             * flag is set and the conversion function has not
+                             * successfully processed the end of the input yet
+                             *
+                             * (continue converting by breaking out of only the inner loop)
+                             */
+                            if(!converterSawEndOfInput) {
+                                break;
+                            }
+    
+                            /* reset the converter without calling the callback function */
+                            implReset();
+                        }
+    
+                        /* done successfully */
+                        return cr;
+                    }
+                }
+    
+                /* U_FAILURE(*err) */
+                {
+    
+                    if( calledCallback || cr.isOverflow() ||
+                        (cr.isMalformed() && cr.isUnmappable())
+                      ) {
+                        /*
+                         * the callback did not or cannot resolve the error:
+                         * set output pointers and return
+                         *
+                         * the check for buffer overflow is redundant but it is
+                         * a high-runner case and hopefully documents the intent
+                         * well
+                         *
+                         * if we were replaying, then the replay buffer must be
+                         * copied back into the UConverter
+                         * and the real arguments must be restored
+                         */
+                        if(realSource!=null) {
+                            int length;
+                            Assert.assrt(preToULength==0);
+                            length=(int)(source.limit()-source.position());
+                            if(length>0) {
+                                //UConverterUtility.uprv_memcpy(preToUArray, preToUBegin, pArgs.sourceArray, pArgs.sourceBegin, length);
+                                source.get(preToUArray, preToUBegin, length);
+                                preToULength=(byte)-length;
+                            }
+    
+                            source=realSource;
+                            flush=realFlush;
+                        }
+                        return cr;
+                    }
+                }
+    
+                /* copy toUBytes[] to invalidCharBuffer[] */
+                errorInputLength=invalidCharLength=toULength;
+                if(errorInputLength>0) {
+                    copy(toUBytesArray, 0, invalidCharBuffer, 0, errorInputLength);
+                }
+    
+                /* set the converter state to deal with the next character */
+                toULength=0;
+    
+                /* call the callback function */
+                cr = toCharErrorBehaviour.call(this, toUContext, source, target, offsets, invalidCharBuffer, errorInputLength, cr);
+                /*
+                 * loop back to the offset handling
+                 *
+                 * this flag will indicate after offset handling
+                 * that a callback was called;
+                 * if the callback did not resolve the error, then we return
+                 */
+                calledCallback=true;
+            }
+        }
+    }
+	/**
+     * Releases the system resources by cleanly closing ICU converter opened
+     * @draft ICU 3.6
+     */
+    protected void finalize()throws Throwable{
+    }
+
+    /**
+     * Returns the number of chars held in the converter's internal state
+     * because more input is needed for completing the conversion. This function is 
+     * useful for mapping semantics of ICU's converter interface to those of iconv,
+     * and this information is not needed for normal conversion.
+     * @param cnv       The converter in which the input is held as internal state
+     * @param status    ICU error code in/out parameter.
+     *                  Must fulfill U_SUCCESS before the function call.
+     * @return The number of chars in the state. -1 if an error is encountered.
+     * @draft ICU 3.4
+     */
+    /*public*/ int toUCountPending()    {
+        if(preToULength > 0){
+            return preToULength ;
+        }else if(preToULength < 0){
+            return -preToULength;
+        }else if(toULength > 0){
+            return toULength;
+        }
+        return 0;
+    }
+    
+
+    private final void setSourcePosition(ByteBuffer source){
+        // ok was there input held in the previous invocation of decodeLoop 
+        // that resulted in output in this invocation?
+        source.position(source.position() - toUCountPending());
+        
+    }
+    private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) {
+        for(int i=srcOffset; i<length; i++){
+            dst[dstOffset++]=(char)src[srcOffset++];
+        }
+    }
+    protected static final CoderResult toUWriteUChars( CharsetDecoderICU cnv,
+                                                char[] ucharsArray, int ucharsBegin, int length,  
+                                                CharBuffer target, IntBuffer offsets, int sourceIndex) {
+        
+        CoderResult cr = CoderResult.UNDERFLOW;
+        
+        /* write UChars */
+        if(offsets==null) {
+            try{
+                while(length>0) {
+                    target.put(ucharsArray[ucharsBegin++]);
+                    --length;
+                }
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+        } else {
+            /* output with offsets */
+            try{
+                while(length>0) {
+                    target.put(ucharsArray[ucharsBegin++]);
+                    offsets.put(sourceIndex);
+                    --length;
+                }
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+        }
+        /* write overflow */
+        if(length>0) {        
+            cnv.charErrorBufferLength= length;
+            do {
+                cnv.charErrorBufferArray[cnv.charErrorBufferBegin++]=ucharsArray[ucharsBegin++];
+            } while(--length>0);
+        }
+        return cr;
+    }
+    /**
+     * Sub classes to override this method if required
+     * @param decoder
+     * @param source
+     * @param target
+     * @param offsets
+     * @return
+     */
+    protected CoderResult cbToUWriteSub(CharsetDecoderICU decoder, 
+                                        ByteBuffer source, CharBuffer target, 
+                                        IntBuffer offsets){
+        String sub = decoder.replacement();
+        CharsetICU cs = (CharsetICU) decoder.charset();
+        if (decoder.invalidCharLength==1 && cs.subChar1 != 0x00) {
+            char[] subArr = new char[] { 0x1a };
+            return CharsetDecoderICU.toUWriteUChars(decoder, subArr, 0, sub
+                    .length(), target, offsets, source.position());
+        } else {
+            return CharsetDecoderICU.toUWriteUChars(decoder, sub.toCharArray(),
+                    0, sub.length(), target, offsets, source.position());
+            
+        }
+    }
+}
--- a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java
@ -0,0 +1,631 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.				                                  *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.MalformedInputException;
+
+import com.ibm.icu.impl.Assert;
+import com.ibm.icu.text.UTF16;
+
+
+public abstract class CharsetEncoderICU extends CharsetEncoder {
+
+    protected byte[] errorBuffer = new byte[30];
+    protected int errorBufferLength = 0;
+    
+    /** these are for encodeLoopICU */
+    protected int fromUnicodeStatus;
+    protected int fromUChar32;
+    protected boolean useSubChar1;
+    
+    /* store previous UChars/chars to continue partial matches */
+    protected int preFromUFirstCP; /* >=0: partial match */
+    protected char[] preFromUArray;
+    protected int preFromUBegin;
+    protected int preFromULength;    /* negative: replay */
+    
+    protected char[] invalidUCharBuffer = new char[2];    
+    protected int    invalidUCharLength;
+    protected Object fromUContext;
+    private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
+    private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
+    protected CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder(){ 
+                                                                        public CoderResult call(CharsetEncoderICU encoder, Object context, 
+                                                                                                CharBuffer source, ByteBuffer target, IntBuffer offsets, 
+                                                                                                char[] buffer, int length, int cp, CoderResult cr) {
+                                                                                if(cr.isUnmappable()){
+                                                                                    return onUnmappableInput.call(encoder, context, 
+                                                                                                                  source, target, offsets, 
+                                                                                                                  buffer, length, cp, cr);
+                                                                                }else if(cr.isMalformed()){
+                                                                                    return onMalformedInput.call(encoder, context, 
+                                                                                                                 source, target, offsets, 
+                                                                                                                 buffer, length, cp, cr);    
+                                                                                }
+                                                                                return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, 
+                                                                                                                                 source, target, offsets, 
+                                                                                                                                 buffer, length, cp, cr);
+
+                                                                        }
+                                                                    };
+
+   /** 
+     * Construcs a new encoder for the given charset
+     * @param cs for which the decoder is created
+     * @param cHandle the address of ICU converter
+     * @param replacement the substitution bytes
+     * @draft ICU 3.6
+     */
+    protected CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
+        super(cs, (cs.minBytesPerChar+cs.maxBytesPerChar)/2, cs.maxBytesPerChar, replacement);
+    }
+
+	/**
+	 * Sets the action to be taken if an illegal sequence is encountered
+	 * @param newAction action to be taken
+	 * @exception IllegalArgumentException
+     * @draft ICU 3.6
+	 */
+	protected void implOnMalformedInput(CodingErrorAction newAction) {
+	    onMalformedInput = getCallback(newAction);
+	}
+
+	/**
+	 * Sets the action to be taken if an illegal sequence is encountered
+	 * @param newAction action to be taken
+	 * @exception IllegalArgumentException
+     * @draft ICU 3.6
+	 */
+	protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
+        onUnmappableInput = getCallback(newAction);
+	}
+    
+    private static CharsetCallback.Encoder getCallback(CodingErrorAction action){
+        if(action==CodingErrorAction.REPLACE){
+            return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
+        }else if(action==CodingErrorAction.IGNORE){
+            return CharsetCallback.FROM_U_CALLBACK_SKIP;
+        }else if(action==CodingErrorAction.REPORT){
+            return CharsetCallback.FROM_U_CALLBACK_STOP;
+        }
+        return CharsetCallback.FROM_U_CALLBACK_STOP;
+    }
+
+	/**
+	 * Flushes any characters saved in the converter's internal buffer and
+	 * resets the converter.
+	 * @param out action to be taken
+	 * @return result of flushing action and completes the decoding all input. 
+	 *	   Returns CoderResult.UNDERFLOW if the action succeeds.
+     * @draft ICU 3.6
+	 */
+	protected CoderResult implFlush(ByteBuffer out) {
+        return CoderResult.UNDERFLOW;
+	}
+
+	/**
+	 * Resets the from Unicode mode of converter
+     * @draft ICU 3.6
+	 */
+	protected void implReset() {
+	    errorBufferLength=0;
+        fromUChar32=0;
+        fromUnicodeStatus = 0;
+        preFromUBegin = 0;
+        preFromUFirstCP = 0;
+        preFromULength = 0;
+	}
+
+	/**
+	 * Encodes one or more chars. The default behaviour of the
+	 * converter is stop and report if an error in input stream is encountered.
+	 * To set different behaviour use @see CharsetEncoder.onMalformedInput()
+	 * @param in buffer to decode
+	 * @param out buffer to populate with decoded result
+	 * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+	 *	   action succeeds or more input is needed for completing the decoding action.
+     * @draft ICU 3.6
+	 */
+	protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+        if(!in.hasRemaining()){
+            return CoderResult.UNDERFLOW;
+        }
+        in.position(in.position()+fromUCountPending());
+        /* do the conversion */
+        CoderResult ret = encode(in, out, null, false);
+        setSourcePosition(in);
+        return ret;
+    }
+    /**
+     * Implements ICU semantics of buffer management
+     * @param source
+     * @param target
+     * @param offsets
+     * @return
+     * @throws MalformedInputException
+     */
+    protected abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets);
+    
+    /**
+     * Implements ICU semantics for encoding the buffer
+     * @param in
+     * @param out
+     * @return
+     */
+    protected final CoderResult encode(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+
+    
+        /* check parameters */    
+        if(target==null || source==null) {
+            throw new IllegalArgumentException();
+        }
+
+        /*
+         * Make sure that the buffer sizes do not exceed the number range for
+         * int32_t because some functions use the size (in units or bytes)
+         * rather than comparing pointers, and because offsets are int32_t values.
+         *
+         * size_t is guaranteed to be unsigned and large enough for the job.
+         *
+         * Return with an error instead of adjusting the limits because we would
+         * not be able to maintain the semantics that either the source must be
+         * consumed or the target filled (unless an error occurs).
+         * An adjustment would be targetLimit=t+0x7fffffff; for example.
+         */
+        //Ram: not required
+        //if( ((long)(sourceLimit-sArrayIndex)>(long)0x3fffffff && sourceLimit>sArrayIndex) || ((long)(targetLimit-tArrayIndex)>(long)0x7fffffff && targetLimit>tArrayIndex)) {
+        //    err[0]=ErrorCode.U_ILLEGAL_ARGUMENT_ERROR;
+        //    return;
+        //}
+        
+        /* flush the target overflow buffer */
+        if(errorBufferLength>0) {
+            byte[] overflowArray;
+            int i, length;
+    
+            overflowArray=errorBuffer;
+            length=errorBufferLength;
+            i=0;
+            do {
+                if(target.remaining()==0) {
+                    /* the overflow buffer contains too much, keep the rest */
+                    int j=0;
+    
+                    do {
+                        overflowArray[j++]=overflowArray[i++];
+                    } while(i<length);
+    
+                    errorBufferLength=(byte)j;
+                    return CoderResult.OVERFLOW;
+                }
+    
+                /* copy the overflow contents to the target */
+                target.put(overflowArray[i++]);
+                if(offsets!=null) {
+                    offsets.put(-1); /* no source index available for old output */
+                }
+            } while(i<length);
+    
+            /* the overflow buffer is completely copied to the target */
+            errorBufferLength=0;
+        }
+    
+        if(!flush && source.remaining()==0 && preFromULength>=0) {
+            /* the overflow buffer is emptied and there is no new input: we are done */
+            return CoderResult.UNDERFLOW;
+        }
+    
+        /*
+         * Do not simply return with a buffer overflow error if
+         * !flush && t==targetLimit
+         * because it is possible that the source will not generate any output.
+         * For example, the skip callback may be called;
+         * it does not output anything.
+         */
+    
+        return fromUnicodeWithCallback(source, target, offsets, flush);
+
+    }
+    /* maximum number of indexed UChars */
+    public static final int EXT_MAX_UCHARS = 19;
+  
+    protected final CoderResult fromUnicodeWithCallback(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+        int sBufferIndex;
+        int sourceIndex;
+        int errorInputLength;
+        boolean converterSawEndOfInput, calledCallback;
+        
+
+        /* variables for m:n conversion */
+        CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
+        int replayArrayIndex=0;
+        CharBuffer realSource;
+        boolean realFlush;
+        
+        CoderResult cr = CoderResult.UNDERFLOW;
+        
+        /* get the converter implementation function */
+        sourceIndex=0;
+
+        if(preFromULength>=0) {
+            /* normal mode */
+            realSource=null;    
+            realFlush=false;
+        } else {
+            /*
+             * Previous m:n conversion stored source units from a partial match
+             * and failed to consume all of them.
+             * We need to "replay" them from a temporary buffer and convert them first.
+             */
+            realSource=source;
+            realFlush = flush;
+            
+            //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
+            replayArray.put(preFromUArray,0, -preFromULength);
+            source.position(replayArrayIndex);
+            source.limit(replayArrayIndex-preFromULength); //preFromULength is negative, see declaration
+            source=replayArray;
+            flush=false;
+            
+            preFromULength=0;
+        }
+
+        /*
+         * loop for conversion and error handling
+         *
+         * loop {
+         *   convert
+         *   loop {
+         *     update offsets
+         *     handle end of input
+         *     handle errors/call callback
+         *   }
+         * }
+         */
+        for(;;) {
+            /* convert */
+            cr = encodeLoop(source, target, offsets);
+            /*
+             * set a flag for whether the converter
+             * successfully processed the end of the input
+             *
+             * need not check cnv.preFromULength==0 because a replay (<0) will cause
+             * s<sourceLimit before converterSawEndOfInput is checked
+             */
+            converterSawEndOfInput= (boolean)(cr.isUnderflow() && flush && source.remaining()==0 && fromUChar32==0);
+    
+            /* no callback called yet for this iteration */
+            calledCallback=false;
+    
+            /* no sourceIndex adjustment for conversion, only for callback output */
+            errorInputLength=0;
+
+            /*
+             * loop for offsets and error handling
+             *
+             * iterates at most 3 times:
+             * 1. to clean up after the conversion function
+             * 2. after the callback
+             * 3. after the callback again if there was truncated input
+             */
+            for(;;) {
+                /* update offsets if we write any */
+                if(offsets!=null) {
+                    int length = target.remaining();
+                    if(length>0) {
+    
+                        /*
+                         * if a converter handles offsets and updates the offsets
+                         * pointer at the end, then offset should not change
+                         * here;
+                         * however, some converters do not handle offsets at all
+                         * (sourceIndex<0) or may not update the offsets pointer
+                         */
+                        offsets.position(offsets.position()+length);
+                    }
+    
+                    if(sourceIndex>=0) {
+                        sourceIndex+=(int)(source.position());
+                    }
+                }
+
+                if(preFromULength<0) {
+                    /*
+                     * switch the source to new replay units (cannot occur while replaying)
+                     * after offset handling and before end-of-input and callback handling
+                     */
+                    if(realSource==null) {
+                        realSource=source;
+                        realFlush=flush;
+    
+                        //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
+                        replayArray.put(preFromUArray,0, -preFromULength);
+                        
+                        source=replayArray;
+                        source.position(replayArrayIndex);
+                        source.limit(replayArrayIndex-preFromULength);
+                        flush=false;
+                        if((sourceIndex+=preFromULength)<0) {
+                            sourceIndex=-1;
+                        }
+    
+                        preFromULength=0;
+                    } else {
+                        /* see implementation note before _fromUnicodeWithCallback() */
+                        //agljport:todo U_ASSERT(realSource==NULL);
+                        Assert.assrt(realSource==null);
+                    }
+                }
+
+                /* update pointers */
+                sBufferIndex=source.position();
+                if(cr.isUnderflow()) {
+                    if(sBufferIndex<source.limit()) {
+                        /*
+                         * continue with the conversion loop while there is still input left
+                         * (continue converting by breaking out of only the inner loop)
+                         */
+                        break;
+                    } else if(realSource!=null) {
+                        /* switch back from replaying to the real source and continue */
+                        source=realSource;
+                        flush=realFlush;
+                        sourceIndex=source.position();
+                        realSource=null;
+                        break;
+                    } else if(flush && fromUChar32!=0) {
+                        /*
+                         * the entire input stream is consumed
+                         * and there is a partial, truncated input sequence left
+                         */
+    
+                        /* inject an error and continue with callback handling */
+                        //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
+                        cr = CoderResult.malformedForLength(1);
+                        calledCallback=false; /* new error condition */
+                    } else {
+                        /* input consumed */
+                        if(flush) {
+                            /*
+                             * return to the conversion loop once more if the flush
+                             * flag is set and the conversion function has not
+                             * successfully processed the end of the input yet
+                             *
+                             * (continue converting by breaking out of only the inner loop)
+                             */
+                            if(!converterSawEndOfInput) {
+                                break;
+                            }
+    
+                            /* reset the converter without calling the callback function */
+                            implReset();
+                        }
+    
+                        /* done successfully */
+                        return cr;
+                    }
+                }
+
+                /*U_FAILURE(*err) */
+                {
+    
+                    if( calledCallback || cr.isOverflow() ||
+                        (cr.isMalformed() && cr.isUnmappable())
+                      ){
+                        /*
+                         * the callback did not or cannot resolve the error:
+                         * set output pointers and return
+                         *
+                         * the check for buffer overflow is redundant but it is
+                         * a high-runner case and hopefully documents the intent
+                         * well
+                         *
+                         * if we were replaying, then the replay buffer must be
+                         * copied back into the UConverter
+                         * and the real arguments must be restored
+                         */
+                        if(realSource!=null) {
+                            int length;
+    
+                            //agljport:todo U_ASSERT(cnv.preFromULength==0);
+    
+                            length=source.remaining();
+                            if(length>0) {
+                                //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
+                                source.get(preFromUArray, 0, length );
+                                preFromULength=(byte)-length;
+                            }
+                            source=realSource;
+                            flush=realFlush;
+                        }
+                        return cr;
+                    }
+                }
+
+                /* callback handling */
+                {
+                    /* get and write the code point */
+                    errorInputLength = UTF16.append(invalidUCharBuffer, 0, fromUChar32);
+                    invalidUCharLength = errorInputLength;
+    
+                    /* set the converter state to deal with the next character */
+                    fromUChar32=0;
+    
+                    /* call the callback function */
+                    cr = fromCharErrorBehaviour.call(this, fromUContext, source, target, offsets, invalidUCharBuffer, invalidUCharLength, fromUChar32, cr);
+                }
+    
+                /*
+                 * loop back to the offset handling
+                 *
+                 * this flag will indicate after offset handling
+                 * that a callback was called;
+                 * if the callback did not resolve the error, then we return
+                 */
+                calledCallback=true;
+            }
+        }
+    }
+	/**
+	 * Ascertains if a given Unicode code point (32bit value for handling surrogates)
+	 * can be converted to the target encoding. If the caller wants to test if a
+	 * surrogate pair can be converted to target encoding then the
+	 * responsibility of assembling the int value lies with the caller.
+	 * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
+	 * <pre>
+	 * while(i<mySource.length){
+	 *	  if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
+	 *	      if(UTF16.isTrailSurrogate(mySource[i+1])){
+	 *	          int temp = UTF16.charAt(mySource,i,i+1,0);
+	 *	          if(!((CharsetEncoderICU) myConv).canEncode(temp)){
+	 *		  passed=false;
+	 *	          }
+	 *	          i++;
+	 *	          i++;
+	 *	      }
+	 *	 }
+	 * }
+	 * </pre>
+	 * or
+	 * <pre>
+	 * String src = new String(mySource);
+	 * int i,codepoint;
+	 * boolean passed = false;
+	 * while(i<src.length()){
+	 *	codepoint = UTF16.charAt(src,i);
+	 *	i+= (codepoint>0xfff)? 2:1;
+	 *	if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
+	 *	    passed = false;
+	 *	}
+	 * }
+	 * </pre>
+	 *
+	 * @param codepoint Unicode code point as int value
+	 * @return true if a character can be converted
+     * @draft ICU 3.6
+	 * 
+	 */
+	public boolean canEncode(int codepoint) {
+	    return true;
+    }
+
+	public boolean isLegalReplacement(byte[] repl){
+	    return true;
+    }
+
+	/**
+	 * Releases the system resources by cleanly closing ICU converter opened
+	 * @exception Throwable exception thrown by super class' finalize method
+     * @draft ICU 3.6
+	 */
+	protected void finalize() throws Throwable {
+	}
+    
+    protected static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv, 
+                                         byte[] bytesArray, int bytesBegin, int bytesLength, 
+                                         ByteBuffer out, IntBuffer offsets, int sourceIndex){
+
+        //write bytes
+        int obl = bytesLength;
+        CoderResult cr = CoderResult.UNDERFLOW;
+        int bytesLimit = bytesBegin + bytesLength;
+        try{
+            for (;bytesBegin< bytesLimit;){
+                out.put(bytesArray[bytesBegin]);
+                bytesBegin++;
+            }
+            // success 
+            bytesLength=0;
+        }catch( BufferOverflowException ex){
+            cr = CoderResult.OVERFLOW;
+        }
+        
+    
+        if(offsets!=null) {
+            while(obl>bytesLength) {
+                offsets.put(sourceIndex);
+                --obl;
+            }
+        }
+        //write overflow 
+        cnv.errorBufferLength = bytesLimit - bytesBegin;
+        if(cnv.errorBufferLength >0) {
+            if(cnv!=null) {
+                int index = 0;     
+                while(bytesBegin<bytesLimit) {
+                    cnv.errorBuffer[index++]=bytesArray[bytesBegin++];
+                } 
+            }
+            cr = CoderResult.OVERFLOW;
+        }
+        return  cr;
+    }   
+
+    /**
+     * Returns the number of chars held in the converter's internal state
+     * because more input is needed for completing the conversion. This function is 
+     * useful for mapping semantics of ICU's converter interface to those of iconv,
+     * and this information is not needed for normal conversion.
+     * @param cnv       The converter in which the input is held as internal state
+     * @param status    ICU error code in/out parameter.
+     *                  Must fulfill U_SUCCESS before the function call.
+     * @return The number of chars in the state. -1 if an error is encountered.
+     * @draft ICU 3.4
+     */
+    /*public*/ int fromUCountPending(){    
+        if(preFromULength > 0){
+            return UTF16.getCharCount(preFromUFirstCP)+preFromULength ;
+        }else if(preFromULength < 0){
+            return -preFromULength ;
+        }else if(fromUChar32 > 0){
+            return 1;
+        }else if(preFromUFirstCP >0){
+            return UTF16.getCharCount(preFromUFirstCP);
+        }
+        return 0; 
+     }
+    /**
+     * 
+     * @param source
+     */
+    private final void setSourcePosition(CharBuffer source){
+        
+        // ok was there input held in the previous invocation of decodeLoop 
+        // that resulted in output in this invocation?
+        source.position(source.position() - fromUCountPending());
+    }
+    /**
+     * Write the codepage substitution character.
+     * Subclasses to override this method.
+     * For stateful converters, it is typically necessary to handle this
+     * specificially for the converter in order to properly maintain the state.
+     */
+    protected CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, 
+                                           CharBuffer source, ByteBuffer target, 
+                                           IntBuffer offsets){
+        CharsetICU cs = (CharsetICU) encoder.charset();
+        byte[] sub = encoder.replacement();
+        if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
+            return CharsetEncoderICU.fromUWriteBytes(encoder,
+                    new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
+                            .position());
+        } else {
+            return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
+                    sub.length, target, offsets, source.position());
+        }
+    }
+}
--- a/icu4j/src/com/ibm/icu/charset/CharsetICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetICU.java
@ -0,0 +1,192 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+
+package com.ibm.icu.charset;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.HashMap;
+
+import com.ibm.icu.lang.UCharacter;
+
+
+
+public abstract class CharsetICU extends Charset{
+	
+    protected String icuCanonicalName;
+    protected String javaCanonicalName;
+    protected int options;
+
+    protected int maxBytesPerChar;
+    protected int minBytesPerChar;
+    protected float  maxCharsPerByte;
+    protected byte subChar1 = 0x00; 
+    
+    protected int mode;
+    protected boolean flush;
+    protected boolean useFallback;
+    
+    /**
+     * 
+     * @param icuCanonicalName
+     * @param canonName
+     * @param aliases
+     * @draft ICU 3.6
+     */
+    protected CharsetICU(String icuCanonicalName, String canonicalName, String[] aliases) {
+		super(canonicalName,aliases);
+        if(canonicalName.length() == 0){
+            throw new IllegalCharsetNameException(canonicalName);
+        }
+        this.javaCanonicalName = canonicalName;
+        this.icuCanonicalName  = icuCanonicalName;
+    }
+    
+    /**
+     * Ascertains if a charset is a sub set of this charset
+     * @param cs charset to test
+     * @return true if the given charset is a subset of this charset
+     */
+    public boolean contains(Charset cs){
+        if (null == cs) {
+            return false;
+        } else if (this.equals(cs)) {
+            return true;
+        }
+        return false;
+    }
+    private static final HashMap algorithmicCharsets = new HashMap();
+    static{
+        algorithmicCharsets.put("BOCU-1",                "com.ibm.icu.impl.CharsetBOCU1" );
+        algorithmicCharsets.put("CESU-8",                "com.ibm.icu.impl.CharsetCESU8" );
+        algorithmicCharsets.put("HZ",                    "com.ibm.icu.impl.CharsetHZ" );
+        algorithmicCharsets.put("imapmailboxname",       "com.ibm.icu.impl.CharsetIMAP" );
+        algorithmicCharsets.put("ISCII",                 "com.ibm.icu.impl.CharsetISCII" );
+        algorithmicCharsets.put("iso2022",               "com.ibm.icu.impl.CharsetISO2022" );
+        algorithmicCharsets.put("iso88591",              "com.ibm.icu.impl.CharsetBOCU1" );
+        algorithmicCharsets.put("lmbcs1",                "com.ibm.icu.impl.CharsetLMBCS1" );
+        algorithmicCharsets.put("lmbcs11",               "com.ibm.icu.impl.CharsetLMBCS11" );
+        algorithmicCharsets.put("lmbcs16",               "com.ibm.icu.impl.CharsetLMBCS16" );
+        algorithmicCharsets.put("lmbcs17",               "com.ibm.icu.impl.CharsetLMBCS17" );
+        algorithmicCharsets.put("lmbcs18",               "com.ibm.icu.impl.CharsetLMBCS18" );
+        algorithmicCharsets.put("lmbcs19",               "com.ibm.icu.impl.CharsetLMBCS19" );
+        algorithmicCharsets.put("lmbcs2",                "com.ibm.icu.impl.CharsetLMBCS2" );
+        algorithmicCharsets.put("lmbcs3",                "com.ibm.icu.impl.CharsetLMBCS3" );
+        algorithmicCharsets.put("lmbcs4",                "com.ibm.icu.impl.CharsetLMBCS4" );
+        algorithmicCharsets.put("lmbcs5",                "com.ibm.icu.impl.CharsetLMBCS5" );
+        algorithmicCharsets.put("lmbcs6",                "com.ibm.icu.impl.CharsetLMBCS6" );
+        algorithmicCharsets.put("lmbcs8",                "com.ibm.icu.impl.CharsetLMBCS8" );
+        algorithmicCharsets.put("scsu",                  "com.ibm.icu.impl.CharsetSCSU" );
+        algorithmicCharsets.put("usascii",               "com.ibm.icu.impl.CharsetUSASCII" );
+        algorithmicCharsets.put("UTF-16",                "com.ibm.icu.impl.CharsetUTF16" );
+        algorithmicCharsets.put("UTF-16BE",              "com.ibm.icu.impl.CharsetUTF16" );
+        algorithmicCharsets.put("UTF-16LE",              "com.ibm.icu.impl.CharsetUTF16LE" );
+        algorithmicCharsets.put("UTF16_OppositeEndian",  "com.ibm.icu.impl.CharsetUTF16LE" );
+        algorithmicCharsets.put("UTF16_PlatformEndian",  "com.ibm.icu.impl.CharsetUTF16" );
+        algorithmicCharsets.put("UTF-32",                "com.ibm.icu.impl.CharsetUTF32" );
+        algorithmicCharsets.put("UTF-32BE",              "com.ibm.icu.impl.CharsetUTF32" );
+        algorithmicCharsets.put("UTF-32LE",              "com.ibm.icu.impl.CharsetUTF32LE" );
+        algorithmicCharsets.put("UTF32_PlatformEndian",  "com.ibm.icu.impl.CharsetUTF32LE" );
+        algorithmicCharsets.put("UTF32_OppositeEndian",  "com.ibm.icu.impl.CharsetUTF32" );
+        algorithmicCharsets.put("UTF-7",                 "com.ibm.icu.impl.CharsetUTF7" );
+        algorithmicCharsets.put("UTF-8",                 "com.ibm.icu.impl.CharsetUTF8" );
+    }
+
+    /*public*/ static final Charset getCharset(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+       String className = (String) algorithmicCharsets.get(icuCanonicalName);
+       if(className==null){
+           //all the cnv files are loaded as MBCS
+           className = "com.ibm.icu.impl.CharsetMBCS";
+       }
+       try{
+           CharsetICU conv = null;
+           Class cs = Class.forName(className);
+           Class[] paramTypes = new Class[]{ String.class, String.class,  String[].class};
+           final Constructor c = cs.getConstructor(paramTypes);
+           Object[] params = new Object[]{ icuCanonicalName, javaCanonicalName, aliases};
+           
+           java.security.AccessController.doPrivileged
+           (new java.security.PrivilegedAction() {
+                   public Object run() {
+                       c.setAccessible(true);
+                       return null;
+                   }
+               });
+
+           // Run constructor
+           try {
+               Object obj = c.newInstance(params);
+               if(obj!=null && obj instanceof CharsetICU){
+                   conv = (CharsetICU)obj;
+                   return conv;
+               }
+           }catch (InvocationTargetException e) {
+               throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className+ ". Exception:" + e.getTargetException());    
+           }
+       }catch(ClassNotFoundException ex){
+       }catch(NoSuchMethodException ex){
+       }catch (IllegalAccessException ex){ 
+       }catch (InstantiationException ex){ 
+       }
+       throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className);    
+    }
+    
+    /** Always use fallbacks from codepage to Unicode */
+    protected final boolean isToUUseFallback() {
+        return true;
+    }    
+    
+    /** Use fallbacks from Unicode to codepage when useFallback or for private-use code points */
+    protected final boolean isFromUUseFallback(int c) {
+        return (useFallback) || isPrivateUse(c);
+    }
+    
+    /**
+     * 
+     */
+    public static final String getDefaultCharsetName(){
+        String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding();
+        return defaultEncoding;
+    }
+    
+    /*public*/ static final boolean isPrivateUse(int c) {
+        return (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
+    }
+
+    /**
+     * Returns a charset object for the named charset.
+     * This method gurantee that ICU charset is returned when
+     * available.  If the ICU charset provider does not support
+     * the specified charset, then try other charset providers
+     * including the standard Java charset provider.
+     * 
+     * @param charsetName The name of the requested charset,
+     * may be either a canonical name or an alias
+     * @return A charset object for the named charset
+     * @throws IllegalCharsetNameException If the given charset name
+     * is illegal
+     * @throws UnsupportedCharsetException If no support for the
+     * named charset is available in this instance of th Java
+     * virtual machine
+     */
+    public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException {
+        CharsetProviderICU icuProvider = new CharsetProviderICU();
+        Charset cs = icuProvider.charsetForName(charsetName);
+        if (cs != null) {
+            return cs;
+        }
+        return Charset.forName(charsetName);
+    }
+}
+
--- a/icu4j/src/com/ibm/icu/charset/CharsetProviderICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetProviderICU.java
@ -0,0 +1,260 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+import com.ibm.icu.impl.UConverterAlias;
+
+public final class CharsetProviderICU extends CharsetProvider{
+    
+    /**
+     * Constructs a CharsetProviderICU object 
+     * @stable ICU 2.4
+     */
+    public CharsetProviderICU(){
+    }
+    
+    /**
+     * Constructs a charset for the given charset name
+     * @param charsetName charset name
+     * @return charset objet for the given charset name, null if unsupported
+     * @stable ICU 2.4
+     */
+    public final Charset charsetForName(String charsetName){
+        try{
+            // get the canonical name
+            String icuCanonicalName = getICUCanonicalName(charsetName);      
+    
+                // create the converter object and return it
+            if(icuCanonicalName==null || icuCanonicalName.length()==0){
+                // this would make the Charset API to throw 
+                // unsupported encoding exception
+                return null;
+            }
+            return getCharset(icuCanonicalName);
+        }catch(UnsupportedCharsetException ex){
+        }catch(IOException ex){
+        }
+        return null;
+    }
+    /**
+     * Gets the canonical name of the converter as defined by Java
+     * @param enc converter name
+     * @return canonical name of the converter
+     * @internal ICU 3.4
+     */
+    public static final String getICUCanonicalName(String enc)
+                                throws UnsupportedCharsetException{
+        String canonicalName = null;
+        String ret = null;
+        try{
+            if(enc!=null){
+                if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){
+                    ret = canonicalName;
+                }else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){
+                    ret = canonicalName;
+                }else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){
+                    ret = canonicalName;
+                }else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){
+                    /* we have some aliases in the form x-blah .. match those first */
+                    ret = canonicalName;
+                }else if(enc.indexOf("x-")==0){
+                    /* TODO: Match with getJavaCanonicalName method */
+                    /*
+                    char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
+                    strcpy(temp, encName+2);
+                    */
+                    ret = enc.substring(2);
+                }else{
+                    /* unsupported encoding */
+                   ret = "";
+                }
+            }
+            return ret;
+        }catch(IOException ex){
+            throw new UnsupportedCharsetException(enc);
+        } 
+    }
+    private static final Charset getCharset(String icuCanonicalName) throws IOException{
+       String[] aliases = (String[])getAliases(icuCanonicalName);    
+       String canonicalName = getJavaCanonicalName(icuCanonicalName);
+       return (CharsetICU.getCharset(icuCanonicalName,canonicalName, aliases));  
+    }
+    /**
+     * Gets the canonical name of the converter as defined by Java
+     * @param icuCanonicalName converter name
+     * @return canonical name of the converter
+     * @internal ICU 3.4
+     */
+
+    private static String getJavaCanonicalName(String icuCanonicalName){
+        /*
+        If a charset listed in the IANA Charset Registry is supported by an implementation 
+        of the Java platform then its canonical name must be the name listed in the registry. 
+        Many charsets are given more than one name in the registry, in which case the registry 
+        identifies one of the names as MIME-preferred. If a charset has more than one registry 
+        name then its canonical name must be the MIME-preferred name and the other names in 
+        the registry must be valid aliases. If a supported charset is not listed in the IANA 
+        registry then its canonical name must begin with one of the strings "X-" or "x-".
+        */
+        if(icuCanonicalName==null ){
+            return null;
+        }  
+        try{
+            String cName = null;
+            /* find out the alias with MIME tag */
+            if((cName=UConverterAlias.getStandardName(icuCanonicalName, "MIME"))!=null){
+            /* find out the alias with IANA tag */
+            }else if((cName=UConverterAlias.getStandardName(icuCanonicalName, "IANA"))!=null){
+            }else {
+                /*  
+                    check to see if an alias already exists with x- prefix, if yes then 
+                    make that the canonical name
+                */
+                int aliasNum = UConverterAlias.countAliases(icuCanonicalName);
+                String name;
+                for(int i=0;i<aliasNum;i++){
+                    name = UConverterAlias.getAlias(icuCanonicalName, i);
+                    if(name!=null && name.indexOf("x-")==0){
+                        cName = name;
+                        break;
+                    }
+                }
+                /* last resort just append x- to any of the alias and 
+                make it the canonical name */
+                if((cName==null || cName.length()==0)){
+                    name = UConverterAlias.getStandardName(icuCanonicalName, "UTR22");
+                    if(name==null && icuCanonicalName.indexOf(",")!=-1){
+                        name = UConverterAlias.getAlias(icuCanonicalName, 1);
+                    }
+                    /* if there is no UTR22 canonical name .. then just return itself*/
+                    if(name==null){
+                        name = icuCanonicalName;
+                    }
+                    cName = "x-"+ name;
+                }
+            }
+            return cName;
+        }catch (IOException ex){
+            
+        }
+        return null;
+     }
+
+    /** 
+     * Gets the aliases associated with the converter name
+     * @param encName converter name
+     * @return converter names as elements in an object array
+     * @internal ICU 2.4
+     */
+    private static final String[] getAliases(String encName)throws IOException{
+        String[] ret = null;
+        int aliasNum = 0;
+        int i=0;
+        int j=0;
+        String aliasArray[/*50*/] = new String[50];
+    
+        if(encName != null){
+            aliasNum = UConverterAlias.countAliases(encName);
+            for(i=0,j=0;i<aliasNum;i++){
+                String name = UConverterAlias.getAlias(encName,i);
+                if(name.indexOf('+')==-1 && name.indexOf(',')==-1){
+                    aliasArray[j++]= name;
+                }
+            }
+            ret = new String[j];
+            for(;--j>=0;) {
+                ret[j] = aliasArray[j];
+            }
+                        
+        }
+        return (ret);
+    
+    }
+
+    /**
+     * Class that implements the iterator for charsets
+     * @stable ICU 2.4
+     */
+    protected final class CharsetIterator implements Iterator{
+      private String[] names;
+      private int currentIndex;
+      protected CharsetIterator(String[] strs){
+        names = strs;
+        currentIndex=0;
+      }
+      public boolean hasNext(){
+        return (currentIndex< names.length);
+      }
+      public Object next(){
+        if(currentIndex<names.length){
+              return charsetForName(names[currentIndex++]);
+        }else{
+              throw new NoSuchElementException();
+        }
+      }
+      public void remove(){
+          throw new UnsupportedOperationException();
+      }
+    }
+      
+    private static final void putCharsets(Map map){
+        int num = UConverterAlias.countAvailable();
+        for(int i=0;i<num;i++) {
+            String name = UConverterAlias.getAvailableName(i);
+            try {
+                Charset cs =  getCharset(name);
+                map.put(cs, getJavaCanonicalName(name));
+            }catch(UnsupportedCharsetException ex){
+            }catch (IOException e) {
+            }
+            // add only charsets that can be created!
+        }
+    }
+
+    /**
+     * Returns an iterator for the available charsets
+     * @return Iterator the charset name iterator
+     */
+    public final Iterator charsets(){
+        HashMap map = new HashMap();
+        putCharsets(map);
+        return map.keySet().iterator();
+    }
+    /**
+     * Gets the canonical names of available converters 
+     * @return Object[] names as an object array
+     */
+    public static final Object[] getAvailableNames(){
+        HashMap map = new HashMap();
+        putCharsets(map);
+        return map.values().toArray();
+    }
+    /**
+     * Return all names available
+     * @return
+     */
+    public static final String[] getAllNames(){
+        int num = UConverterAlias.countAvailable();
+        String[] names = new String[num];
+        for(int i=0;i<num;i++) {
+            names[i] = UConverterAlias.getAvailableName(i);
+        }
+        return names;
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java
+++ b/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java
--- a/icu4j/src/com/ibm/icu/impl/CharsetUTF16.java
+++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF16.java
@ -0,0 +1,446 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.charset.CharsetDecoderICU;
+import com.ibm.icu.charset.CharsetEncoderICU;
+import com.ibm.icu.charset.CharsetICU;
+import com.ibm.icu.text.UTF16;
+
+public class CharsetUTF16 extends CharsetICU {
+    protected byte[] fromUSubstitution = new byte[]{(byte)0xff, (byte)0xfd};
+    public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+        super(icuCanonicalName, javaCanonicalName, aliases);
+        maxBytesPerChar = 4;
+        minBytesPerChar = 2;
+        maxCharsPerByte = 1;
+    }
+    class CharsetDecoderUTF16 extends CharsetDecoderICU{
+
+        public CharsetDecoderUTF16(CharsetICU cs) {
+            super(cs);
+        }
+
+        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(!source.hasRemaining() && toUnicodeStatus==0) {
+                /* no input, nothing to do */
+                return cr;
+            }
+            if(!target.hasRemaining()) {
+                return CoderResult.OVERFLOW;
+            }
+        
+            int sourceIndex=0, count=0, length, sourceArrayIndex;
+            char c=0, trail;
+            length = source.remaining();
+            sourceArrayIndex = source.position();
+            try{
+                /* complete a partial UChar or pair from the last call */
+                if(toUnicodeStatus!=0) {
+                    /*
+                     * special case: single byte from a previous buffer,
+                     * where the byte turned out not to belong to a trail surrogate
+                     * and the preceding, unmatched lead surrogate was put into toUBytes[]
+                     * for error handling
+                     */
+                    toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
+                    toULength=1;
+                    toUnicodeStatus=0;
+                }
+                if((count=toULength)!=0) {
+                    byte[] pArray=toUBytesArray;
+                    int pArrayIndex = toUBytesBegin;
+                    do {
+                        pArray[count++]=source.get(sourceArrayIndex++);
+                        ++sourceIndex;
+                        --length;
+                        if(count==2) {
+                            c=(char)(((pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            if(!UTF16.isSurrogate(c)) {
+                                /* output the BMP code point */
+                                target.put(c);
+                                if(offsets!=null) {
+                                    offsets.put(-1);
+                                }
+                                count=0;
+                                c=0;
+                                break;
+                            } else if(UTF16.isLeadSurrogate(c)) {
+                                /* continue collecting bytes for the trail surrogate */
+                                c=0; /* avoid unnecessary surrogate handling below */
+                            } else {
+                                /* fall through to error handling for an unmatched trail surrogate */
+                                break;
+                            }
+                        } else if(count==4) {
+                            c=(char)(((pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            trail=(char)(((pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            if(UTF16.isTrailSurrogate(trail)) {
+                                /* output the surrogate pair */
+                                target.put(c);
+                                if(target.remaining()>=1) {
+                                    target.put(trail);
+                                    if(offsets!=null) {
+                                        offsets.put(-1);
+                                        offsets.put(-1);
+                                    }
+                                } else /* targetCapacity==1 */ {
+                                    charErrorBufferArray[charErrorBufferBegin+0]=trail;
+                                    charErrorBufferLength=1;
+                                    throw new BufferOverflowException();
+                                }
+                                count=0;
+                                c=0;
+                                break;
+                            } else {
+                                /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+            
+                                /* back out reading the code unit after it */
+                                if((source.position()-sourceArrayIndex)>=2) {
+                                    sourceArrayIndex-=2;
+                                } else {
+                                    /*
+                                     * if the trail unit's first byte was in a previous buffer, then
+                                     * we need to put it into a special place because toUBytes[] will be
+                                     * used for the lead unit's bytes
+                                     */
+                                    toUnicodeStatus=0x100|pArray[pArrayIndex+2];
+                                    --sourceArrayIndex;
+                                }
+                                toULength=2;
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);;
+                            }
+                        }
+                    } while(length>0);
+                    toULength=(byte)count;
+                }
+            
+                /* copy an even number of bytes for complete UChars */
+                count=2*target.remaining();
+                if(count>length) {
+                    count=length&~1;
+                }
+                if(c==0 && count>0) {
+                    length-=count;
+                    count>>=1;
+                    //targetCapacity-=count;
+                    if(offsets==null) {
+                        do {
+                            c=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            sourceArrayIndex+=2;
+                            if(!UTF16.isSurrogate(c)) {
+                                target.put(c);
+                            } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+                                      UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+                                     ) {
+                                sourceArrayIndex+=2;
+                                --count;
+                                target.put(c);
+                                target.put(trail);
+                            } else {
+                                break;
+                            }
+                        } while(--count>0);
+                    } else {
+                        do {
+                            c=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            sourceArrayIndex+=2;
+                            if(!UTF16.isSurrogate(c)) {
+                                target.put(c);
+                                offsets.put(sourceIndex);
+                                sourceIndex+=2;
+                            } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+                                      UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+                            ) {
+                                sourceArrayIndex+=2;
+                                --count;
+                                target.put(c);
+                                target.put(trail);
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex);
+                                sourceIndex+=4;
+                            } else {
+                                break;
+                            }
+                        } while(--count>0);
+                    }
+            
+                    if(count==0) {
+                        /* done with the loop for complete UChars */
+                        c=0;
+                    } else {
+                        /* keep c for surrogate handling, trail will be set there */
+                        length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+                    }
+                }
+            
+                if(c!=0) {
+                    /*
+                     * c is a surrogate, and
+                     * - source or target too short
+                     * - or the surrogate is unmatched
+                     */
+                    toUBytesArray[toUBytesBegin+0]=(byte)(c>>>8);
+                    toUBytesArray[toUBytesBegin+1]=(byte)c;
+                    toULength=2;
+            
+                    if(UTF16.isLeadSurrogate(c)) {
+                        if(length>=2) {
+                            if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
+                                /* output the surrogate pair, will overflow (see conditions comment above) */
+                                sourceArrayIndex+=2;
+                                length-=2;
+                                target.put(c);
+                                if(offsets!=null) {
+                                    offsets.put(sourceIndex);
+                                }
+                                charErrorBufferArray[charErrorBufferBegin+0]=trail;
+                                charErrorBufferLength=1;
+                                toULength=0;
+                                cr = CoderResult.OVERFLOW;
+                            } else {
+                                /* unmatched lead surrogate */
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            }
+                        } else {
+                            /* see if the trail surrogate is in the next buffer */
+                        }
+                    } else {
+                        /* unmatched trail surrogate */
+                        cr = CoderResult.malformedForLength(sourceArrayIndex);
+                    }
+                }
+            
+               
+                /* check for a remaining source byte */
+                if(length>0) {
+                    if(!target.hasRemaining()) {
+                        cr = CoderResult.OVERFLOW;
+                    } else {
+                        /* it must be length==1 because otherwise the above would have copied more */
+                        toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
+                    }
+                }
+           
+                source.position(sourceArrayIndex);
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }
+        
+    }
+    class CharsetEncoderUTF16 extends CharsetEncoderICU{
+
+        public CharsetEncoderUTF16(CharsetICU cs) {
+            super(cs, fromUSubstitution);
+            implReset();
+        }
+
+        private final static int NEED_TO_WRITE_BOM = 1;
+        
+        protected void implReset() {
+            super.implReset();
+            fromUnicodeStatus = NEED_TO_WRITE_BOM;
+        }
+        
+        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(!source.hasRemaining()) {
+                /* no input, nothing to do */
+                return cr;
+            }
+            char c;
+            /* write the BOM if necessary */
+            if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
+                byte bom[]={ (byte)0xfe, (byte)0xff };
+                cr = fromUWriteBytes(this,bom, 0, bom.length, target, offsets, -1);
+                if(cr.isError()){
+                    return cr;
+                }
+                fromUnicodeStatus=0;
+            }
+            
+            if(!target.hasRemaining()) {
+                return CoderResult.OVERFLOW;
+            }
+            
+            int sourceIndex = 0;
+            char trail = 0;
+            int length = source.remaining();
+            
+            try{
+                /* c!=0 indicates in several places outside the main loops that a surrogate was found */
+            
+                if((c=(char)fromUChar32)!=0 && UTF16.isTrailSurrogate(trail=source.get(sourceIndex)) && target.remaining()>=4) {
+                    /* the last buffer ended with a lead surrogate, output the surrogate pair */
+                    ++sourceIndex;
+                    --length;
+                    target.put((byte)(c>>>8));
+                    target.put((byte)c);
+                    target.put((byte)(trail>>>8));
+                    target.put((byte)trail);
+                    if(offsets!=null && offsets.remaining()>=4) {
+                        offsets.put(-1);
+                        offsets.put(-1);
+                        offsets.put(-1);
+                        offsets.put(-1);
+                    }
+                    sourceIndex=1;
+                    fromUChar32=c=0;
+                }
+                byte overflow[/*4*/] = new byte[4];
+                int sourceArrayIndex = source.position();
+                
+                if(c==0) {
+                    /* copy an even number of bytes for complete UChars */
+                    int count=2*length;
+                    int targetCapacity = target.limit();
+                    if(count>targetCapacity) {
+                        count=targetCapacity&~1;
+                    }           
+                    /* count is even */
+                    targetCapacity-=count;
+                    count>>=1;
+                    length-=count;
+    
+                    if(offsets==null) {
+                        while(count>0) {
+                            c= source.get(sourceArrayIndex++);
+                            if(!UTF16.isSurrogate(c)) {
+                                target.put((byte)(c>>>8));
+                                target.put((byte)c);
+                                
+                            } else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+                                ++sourceArrayIndex;
+                                --count;
+                                target.put((byte)(c>>>8));
+                                target.put((byte)c);
+                                target.put((byte)(trail>>>8));
+                                target.put((byte)trail);
+                            } else {
+                                break;
+                            }
+                            --count;
+                        }
+                    } else {
+                        while(count>0) {
+                            c=source.get(sourceArrayIndex++);
+                            if(!UTF16.isSurrogate(c)) {
+                                target.put((byte)(c>>>8));
+                                target.put((byte)c);
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex++);
+                            } else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+                                ++sourceArrayIndex;
+                                --count;
+                                target.put((byte)(c>>>8));
+                                target.put((byte)c);
+                                target.put((byte)(trail>>>8));
+                                target.put((byte)trail);
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex);
+                                sourceIndex+=2;
+                            } else {
+                                break;
+                            }
+                            --count;
+                        }
+                    }
+            
+                    if(count==0) {
+                        /* done with the loop for complete UChars */
+                        if(length>0 && targetCapacity>0) {
+                            /*
+                             * there is more input and some target capacity -
+                             * it must be targetCapacity==1 because otherwise
+                             * the above would have copied more;
+                             * prepare for overflow output
+                             */
+                            if(!UTF16.isSurrogate(c=source.get(sourceArrayIndex++))) {
+                                overflow[0]=(byte)(c>>>8);
+                                overflow[1]=(byte)c;
+                                length=2; /* 2 bytes to output */
+                                c=0;
+                            /* } else { keep c for surrogate handling, length will be set there */
+                            }
+                        } else {
+                            length=0;
+                            c=0;
+                        }
+                    } else {
+                        /* keep c for surrogate handling, length will be set there */
+                        targetCapacity+=2*count;
+                    }
+                } else {
+                    length=0; /* from here on, length counts the bytes in overflow[] */
+                }
+                
+                if(c!=0) {
+                    /*
+                     * c is a surrogate, and
+                     * - source or target too short
+                     * - or the surrogate is unmatched
+                     */
+                    length=0;
+                    if(UTF16.isLeadSurrogate(c)) {
+                        if(sourceArrayIndex<source.limit()) {
+                            if(UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+                                /* output the surrogate pair, will overflow (see conditions comment above) */
+                                ++sourceArrayIndex;
+                                overflow[0]=(byte)(c>>>8);
+                                overflow[1]=(byte)c;
+                                overflow[2]=(byte)(trail>>>8);
+                                overflow[3]=(byte)trail;
+                                length=4; /* 4 bytes to output */
+                                c=0;
+                            } else {
+                                /* unmatched lead surrogate */
+                                //pErrorCode[0]=ErrorCode.U_ILLEGAL_CHAR_FOUND;
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            }
+                        } else {
+                            /* see if the trail surrogate is in the next buffer */
+                        }
+                    } else {
+                        /* unmatched trail surrogate */
+                        //pErrorCode[0]=ErrorCode.U_ILLEGAL_CHAR_FOUND;
+                    }
+                    fromUChar32=c;
+                }
+                source.position(sourceArrayIndex);
+                if(length>0) {
+                    /* output length bytes with overflow (length>targetCapacity>0) */
+                    fromUWriteBytes(this, overflow, 0, length, target, offsets, sourceIndex);
+                }
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }
+    }
+    public CharsetDecoder newDecoder() {
+        return new CharsetDecoderUTF16(this);
+    }
+
+    public CharsetEncoder newEncoder() {
+        return new CharsetEncoderUTF16(this);
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/CharsetUTF16LE.java
+++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF16LE.java
@ -0,0 +1,449 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.charset.CharsetDecoderICU;
+import com.ibm.icu.charset.CharsetEncoderICU;
+import com.ibm.icu.charset.CharsetICU;
+import com.ibm.icu.text.UTF16;
+
+/**
+ * @author Niti Hantaweepant
+ */
+public class CharsetUTF16LE extends CharsetICU {
+    protected byte[] fromUSubstitution = new byte[]{(byte)0xfd, (byte)0xff};
+    public CharsetUTF16LE(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+        super(icuCanonicalName, javaCanonicalName, aliases);
+        maxBytesPerChar = 4;
+        minBytesPerChar = 2;
+        maxCharsPerByte = 1;
+    }
+    class CharsetDecoderUTF16LE extends CharsetDecoderICU{
+
+        public CharsetDecoderUTF16LE(CharsetICU cs) {
+            super(cs);
+        }
+
+        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(!source.hasRemaining() && toUnicodeStatus==0) {
+                /* no input, nothing to do */
+                return cr;
+            }
+            if(!target.hasRemaining()) {
+                return CoderResult.OVERFLOW;
+            }
+        
+            int sourceIndex=0, count=0, length, sourceArrayIndex;
+            char c=0, trail;
+            length = source.remaining();
+            sourceArrayIndex = source.position();
+            try{
+                /* complete a partial UChar or pair from the last call */
+                if(toUnicodeStatus!=0) {
+                    /*
+                     * special case: single byte from a previous buffer,
+                     * where the byte turned out not to belong to a trail surrogate
+                     * and the preceding, unmatched lead surrogate was put into toUBytes[]
+                     * for error handling
+                     */
+                    toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
+                    toULength=1;
+                    toUnicodeStatus=0;
+                }
+                if((count=toULength)!=0) {
+                    byte[] pArray=toUBytesArray;
+                    int pArrayIndex = toUBytesBegin;
+                    do {
+                        pArray[count++]=source.get(sourceArrayIndex++);
+                        ++sourceIndex;
+                        --length;
+                        if(count==2) {
+                            c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            if(!UTF16.isSurrogate(c)) {
+                                /* output the BMP code point */
+                                target.put(c);
+                                if(offsets!=null) {
+                                    offsets.put(-1);
+                                }
+                                count=0;
+                                c=0;
+                                break;
+                            } else if(UTF16.isLeadSurrogate(c)) {
+                                /* continue collecting bytes for the trail surrogate */
+                                c=0; /* avoid unnecessary surrogate handling below */
+                            } else {
+                                /* fall through to error handling for an unmatched trail surrogate */
+                                break;
+                            }
+                        } else if(count==4) {
+                            c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            trail=(char)(((pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            if(UTF16.isTrailSurrogate(trail)) {
+                                /* output the surrogate pair */
+                                target.put(c);
+                                if(target.remaining()>=1) {
+                                    target.put(trail);
+                                    if(offsets!=null) {
+                                        offsets.put(-1);
+                                        offsets.put(-1);
+                                    }
+                                } else /* targetCapacity==1 */ {
+                                    charErrorBufferArray[charErrorBufferBegin+0]=trail;
+                                    charErrorBufferLength=1;
+                                    throw new BufferOverflowException();
+                                }
+                                count=0;
+                                c=0;
+                                break;
+                            } else {
+                                /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+            
+                                /* back out reading the code unit after it */
+                                if((source.position()-sourceArrayIndex)>=2) {
+                                    sourceArrayIndex-=2;
+                                } else {
+                                    /*
+                                     * if the trail unit's first byte was in a previous buffer, then
+                                     * we need to put it into a special place because toUBytes[] will be
+                                     * used for the lead unit's bytes
+                                     */
+                                    toUnicodeStatus=0x100|pArray[pArrayIndex+2];
+                                    --sourceArrayIndex;
+                                }
+                                toULength=2;
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);;
+                            }
+                        }
+                    } while(length>0);
+                    toULength=(byte)count;
+                }
+            
+                /* copy an even number of bytes for complete UChars */
+                count=2*target.remaining();
+                if(count>length) {
+                    count=length&~1;
+                }
+                if(c==0 && count>0) {
+                    length-=count;
+                    count>>=1;
+                    //targetCapacity-=count;
+                    if(offsets==null) {
+                        do {
+                            c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            sourceArrayIndex+=2;
+                            if(!UTF16.isSurrogate(c)) {
+                                target.put(c);
+                            } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+                                      UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+                                     ) {
+                                sourceArrayIndex+=2;
+                                --count;
+                                target.put(c);
+                                target.put(trail);
+                            } else {
+                                break;
+                            }
+                        } while(--count>0);
+                    } else {
+                        do {
+                            c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
+                            sourceArrayIndex+=2;
+                            if(!UTF16.isSurrogate(c)) {
+                                target.put(c);
+                                offsets.put(sourceIndex);
+                                sourceIndex+=2;
+                            } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+                                      UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+                            ) {
+                                sourceArrayIndex+=2;
+                                --count;
+                                target.put(c);
+                                target.put(trail);
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex);
+                                sourceIndex+=4;
+                            } else {
+                                break;
+                            }
+                        } while(--count>0);
+                    }
+            
+                    if(count==0) {
+                        /* done with the loop for complete UChars */
+                        c=0;
+                    } else {
+                        /* keep c for surrogate handling, trail will be set there */
+                        length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+                    }
+                }
+            
+                if(c!=0) {
+                    /*
+                     * c is a surrogate, and
+                     * - source or target too short
+                     * - or the surrogate is unmatched
+                     */
+                    toUBytesArray[toUBytesBegin+0]=(byte)c;
+                    toUBytesArray[toUBytesBegin+1]=(byte)(c>>>8);
+                    toULength=2;
+            
+                    if(UTF16.isLeadSurrogate(c)) {
+                        if(length>=2) {
+                            if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
+                                /* output the surrogate pair, will overflow (see conditions comment above) */
+                                sourceArrayIndex+=2;
+                                length-=2;
+                                target.put(c);
+                                if(offsets!=null) {
+                                    offsets.put(sourceIndex);
+                                }
+                                charErrorBufferArray[charErrorBufferBegin+0]=trail;
+                                charErrorBufferLength=1;
+                                toULength=0;
+                                cr = CoderResult.OVERFLOW;
+                            } else {
+                                /* unmatched lead surrogate */
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            }
+                        } else {
+                            /* see if the trail surrogate is in the next buffer */
+                        }
+                    } else {
+                        /* unmatched trail surrogate */
+                        cr = CoderResult.malformedForLength(sourceArrayIndex);
+                    }
+                }
+            
+               
+                /* check for a remaining source byte */
+                if(length>0) {
+                    if(!target.hasRemaining()) {
+                        cr = CoderResult.OVERFLOW;
+                    } else {
+                        /* it must be length==1 because otherwise the above would have copied more */
+                        toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
+                    }
+                }
+           
+                source.position(sourceArrayIndex);
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }
+        
+    }
+    class CharsetEncoderUTF16LE extends CharsetEncoderICU{
+
+        public CharsetEncoderUTF16LE(CharsetICU cs) {
+            super(cs, fromUSubstitution);
+            implReset();
+        }
+
+        private final static int NEED_TO_WRITE_BOM = 1;
+        
+        protected void implReset() {
+            super.implReset();
+            fromUnicodeStatus = NEED_TO_WRITE_BOM;
+        }
+        
+        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(!source.hasRemaining()) {
+                /* no input, nothing to do */
+                return cr;
+            }
+            char c;
+            /* write the BOM if necessary */
+            if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
+                byte bom[]={ (byte)0xff, (byte)0xfe };
+                cr = fromUWriteBytes(this,bom, 0, bom.length, target, offsets, -1);
+                if(cr.isError()){
+                    return cr;
+                }
+                fromUnicodeStatus=0;
+            }
+            
+            if(!target.hasRemaining()) {
+                return CoderResult.OVERFLOW;
+            }
+            
+            int sourceIndex = 0;
+            char trail = 0;
+            int length = source.remaining();
+            
+            try{
+                /* c!=0 indicates in several places outside the main loops that a surrogate was found */
+            
+                if((c=(char)fromUChar32)!=0 && UTF16.isTrailSurrogate(trail=source.get(sourceIndex)) && target.remaining()>=4) {
+                    /* the last buffer ended with a lead surrogate, output the surrogate pair */
+                    ++sourceIndex;
+                    --length;
+                    target.put((byte)c);
+                    target.put((byte)(c>>>8));
+                    target.put((byte)trail);
+                    target.put((byte)(trail>>>8));
+                    if(offsets!=null && offsets.remaining()>=4) {
+                        offsets.put(-1);
+                        offsets.put(-1);
+                        offsets.put(-1);
+                        offsets.put(-1);
+                    }
+                    sourceIndex=1;
+                    fromUChar32=c=0;
+                }
+                byte overflow[/*4*/] = new byte[4];
+                int sourceArrayIndex = source.position();
+                
+                if(c==0) {
+                    /* copy an even number of bytes for complete UChars */
+                    int count=2*length;
+                    int targetCapacity = target.limit();
+                    if(count>targetCapacity) {
+                        count=targetCapacity&~1;
+                    }           
+                    /* count is even */
+                    targetCapacity-=count;
+                    count>>=1;
+                    length-=count;
+    
+                    if(offsets==null) {
+                        while(count>0) {
+                            c= source.get(sourceArrayIndex++);
+                            if(!UTF16.isSurrogate(c)) {
+                                target.put((byte)c);
+                                target.put((byte)(c>>>8));
+                                
+                            } else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+                                ++sourceArrayIndex;
+                                --count;
+                                target.put((byte)c);
+                                target.put((byte)(c>>>8));
+                                target.put((byte)trail);
+                                target.put((byte)(trail>>>8));
+                            } else {
+                                break;
+                            }
+                            --count;
+                        }
+                    } else {
+                        while(count>0) {
+                            c=source.get(sourceArrayIndex++);
+                            if(!UTF16.isSurrogate(c)) {
+                                target.put((byte)c);
+                                target.put((byte)(c>>>8));
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex++);
+                            } else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+                                ++sourceArrayIndex;
+                                --count;
+                                target.put((byte)c);
+                                target.put((byte)(c>>>8));
+                                target.put((byte)trail);
+                                target.put((byte)(trail>>>8));
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex);
+                                offsets.put(sourceIndex);
+                                sourceIndex+=2;
+                            } else {
+                                break;
+                            }
+                            --count;
+                        }
+                    }
+            
+                    if(count==0) {
+                        /* done with the loop for complete UChars */
+                        if(length>0 && targetCapacity>0) {
+                            /*
+                             * there is more input and some target capacity -
+                             * it must be targetCapacity==1 because otherwise
+                             * the above would have copied more;
+                             * prepare for overflow output
+                             */
+                            if(!UTF16.isSurrogate(c=source.get(sourceArrayIndex++))) {
+                                overflow[0]=(byte)c;
+                                overflow[1]=(byte)(c>>>8);
+                                length=2; /* 2 bytes to output */
+                                c=0;
+                            /* } else { keep c for surrogate handling, length will be set there */
+                            }
+                        } else {
+                            length=0;
+                            c=0;
+                        }
+                    } else {
+                        /* keep c for surrogate handling, length will be set there */
+                        targetCapacity+=2*count;
+                    }
+                } else {
+                    length=0; /* from here on, length counts the bytes in overflow[] */
+                }
+                
+                if(c!=0) {
+                    /*
+                     * c is a surrogate, and
+                     * - source or target too short
+                     * - or the surrogate is unmatched
+                     */
+                    length=0;
+                    if(UTF16.isLeadSurrogate(c)) {
+                        if(sourceArrayIndex<source.limit()) {
+                            if(UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+                                /* output the surrogate pair, will overflow (see conditions comment above) */
+                                ++sourceArrayIndex;
+                                overflow[0]=(byte)c;
+                                overflow[1]=(byte)(c>>>8);
+                                overflow[2]=(byte)trail;
+                                overflow[3]=(byte)(trail>>>8);
+                                length=4; /* 4 bytes to output */
+                                c=0;
+                            } else {
+                                /* unmatched lead surrogate */
+                                //pErrorCode[0]=ErrorCode.U_ILLEGAL_CHAR_FOUND;
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            }
+                        } else {
+                            /* see if the trail surrogate is in the next buffer */
+                        }
+                    } else {
+                        /* unmatched trail surrogate */
+                        //pErrorCode[0]=ErrorCode.U_ILLEGAL_CHAR_FOUND;
+                    }
+                    fromUChar32=c;
+                }
+                source.position(sourceArrayIndex);
+                if(length>0) {
+                    /* output length bytes with overflow (length>targetCapacity>0) */
+                    fromUWriteBytes(this, overflow, 0, length, target, offsets, sourceIndex);
+                }
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }
+    }
+    public CharsetDecoder newDecoder() {
+        return new CharsetDecoderUTF16LE(this);
+    }
+
+    public CharsetEncoder newEncoder() {
+        return new CharsetEncoderUTF16LE(this);
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/CharsetUTF32.java
+++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF32.java
@ -0,0 +1,318 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.charset.CharsetDecoderICU;
+import com.ibm.icu.charset.CharsetEncoderICU;
+import com.ibm.icu.charset.CharsetICU;
+import com.ibm.icu.text.UTF16;
+/**
+ * @author Niti Hantaweepant
+ */
+public class CharsetUTF32 extends CharsetICU {
+    protected byte[] fromUSubstitution = new byte[]{(byte)0, (byte)0, (byte)0xff, (byte)0xfd};
+    public CharsetUTF32(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+        super(icuCanonicalName, javaCanonicalName, aliases);
+        maxBytesPerChar = 4;
+        minBytesPerChar = 4;
+        maxCharsPerByte = 1;
+    }
+    class CharsetDecoderUTF32 extends CharsetDecoderICU{
+
+        public CharsetDecoderUTF32(CharsetICU cs) {
+            super(cs);
+        }
+
+        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            
+            int sourceArrayIndex = source.position();
+            int ch, i;
+            
+            try{
+                donefornow:
+                {                    
+                    /* UTF-8 returns here for only non-offset, this needs to change.*/
+                    if (toUnicodeStatus != 0 && target.hasRemaining()) {
+                        i = toULength;       /* restore # of bytes consumed */
+                
+                        ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
+                        toUnicodeStatus = 0;
+                        
+                        while (i < 4) {
+                            if (sourceArrayIndex < source.limit()) {
+                                ch = (ch << 8) | ((byte)(source.get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK);
+                                toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
+                            }
+                            else {
+                                /* stores a partially calculated target*/
+                                /* + 1 to make 0 a valid character */
+                                toUnicodeStatus = ch + 1;
+                                toULength = (byte) i;
+                                break donefornow;
+                            }
+                        }
+                
+                        if (ch <= UConverterSharedData.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) {
+                            /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+                            if (ch <= UConverterSharedData.MAXIMUM_UCS2) 
+                            {
+                                /* fits in 16 bits */
+                                target.put((char)ch);
+                            }
+                            else {
+                                /* write out the surrogates */
+                                target.put(UTF16.getLeadSurrogate(ch));
+                                ch = UTF16.getTrailSurrogate(ch);
+                                if (target.hasRemaining()) {
+                                    target.put((char)ch);
+                                }
+                                else {
+                                    /* Put in overflow buffer (not handled here) */
+                                    charErrorBufferArray[0] = (char) ch;
+                                    charErrorBufferLength = 1;
+                                    throw new BufferOverflowException();
+                                }
+                            }
+                        }
+                        else {
+                            toULength = (byte)i;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            break donefornow;
+                        }
+                    }
+                    
+                    while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
+                        i = 0;
+                        ch = 0;
+                
+                        while (i < 4) {
+                            if (sourceArrayIndex < source.limit()) {
+                                ch = (ch << 8) | ((byte)(source.get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK);
+                                toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
+                            }
+                            else {
+                                /* stores a partially calculated target*/
+                                /* + 1 to make 0 a valid character */
+                                toUnicodeStatus = ch + 1;
+                                toULength = (byte) i;
+                                break donefornow;
+                            }
+                        }
+                
+                        if (ch <= UConverterSharedData.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) {
+                            /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+                            if (ch <= UConverterSharedData.MAXIMUM_UCS2) 
+                            {
+                                /* fits in 16 bits */
+                                target.put((char) ch);
+                            }
+                            else {
+                                /* write out the surrogates */
+                                target.put(UTF16.getLeadSurrogate(ch));
+                                ch = UTF16.getTrailSurrogate(ch);
+                                if (target.hasRemaining()) {
+                                    target.put((char)ch);
+                                }
+                                else {
+                                    /* Put in overflow buffer (not handled here) */
+                                    charErrorBufferArray[0] = (char) ch;
+                                    charErrorBufferLength = 1;
+                                    throw new BufferOverflowException();                                    
+                                }
+                            }
+                        }
+                        else {
+                            toULength = (byte)i;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            break;
+                        }
+                    }
+                }
+                
+                if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
+                    /* End of target buffer */
+                    cr = CoderResult.OVERFLOW;
+                }                    
+                
+                source.position(sourceArrayIndex);
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }        
+    }
+    
+    class CharsetEncoderUTF32 extends CharsetEncoderICU{
+
+        public CharsetEncoderUTF32(CharsetICU cs) {
+            super(cs, fromUSubstitution);
+            implReset();
+        }
+
+        private final static int NEED_TO_WRITE_BOM = 1;
+        
+        protected void implReset() {
+            super.implReset();
+            fromUnicodeStatus = NEED_TO_WRITE_BOM;
+        }
+        
+        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(!source.hasRemaining()) {
+                /* no input, nothing to do */
+                return cr;
+            }
+            
+            /* write the BOM if necessary */
+            if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
+                byte[] bom={ 0, 0, (byte)0xfe, (byte)0xff };
+                cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1);
+                if(cr.isError()){
+                    return cr;
+                }
+                fromUnicodeStatus=0;
+            }
+            
+            int ch, ch2;
+            int indexToWrite;
+            byte temp[] = new byte[4];
+            temp[0] = 0;
+            int sourceArrayIndex = source.position();
+            
+            try{
+                boolean doloop = true;
+                if (fromUChar32 != 0) {
+                    ch = fromUChar32;
+                    fromUChar32 = 0;
+                    //lowsurogate:
+                    if (sourceArrayIndex < source.limit()) {
+                        ch2 = source.get(sourceArrayIndex);
+                        if (UTF16.isTrailSurrogate((char)ch2)) {
+                            ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
+                            sourceArrayIndex++;
+                        }
+                        else {
+                            /* this is an unmatched trail code unit (2nd surrogate) */
+                            /* callback(illegal) */
+                            fromUChar32 = ch;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            doloop = false;
+                        }
+                    }
+                    else {
+                        /* ran out of source */
+                        fromUChar32 = ch;
+                        if (flush) {
+                            /* this is an unmatched trail code unit (2nd surrogate) */
+                            /* callback(illegal) */
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                        }
+                        doloop = false;
+                    }
+                    
+                    /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+                    temp[1] = (byte) (ch >>> 16 & 0x1F);
+                    temp[2] = (byte) (ch >>> 8);  /* unsigned cast implicitly does (ch & FF) */
+                    temp[3] = (byte) (ch);       /* unsigned cast implicitly does (ch & FF) */
+            
+                    for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
+                        if (target.hasRemaining()) {
+                            target.put(temp[indexToWrite]);
+                        }
+                        else {
+                            errorBuffer[errorBufferLength++] = temp[indexToWrite];
+                            cr = CoderResult.OVERFLOW;
+                        }
+                    }
+                }
+            
+                if(doloop) {
+                    while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
+                        ch = source.get(sourceArrayIndex++);
+                
+                        if (UTF16.isSurrogate((char)ch)) {
+                            if (UTF16.isLeadSurrogate((char)ch)) {
+                                //lowsurogate:
+                                if (sourceArrayIndex < source.limit()) {
+                                    ch2 = source.get(sourceArrayIndex);
+                                    if (UTF16.isTrailSurrogate((char)ch2)) {
+                                        ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
+                                        sourceArrayIndex++;
+                                    }
+                                    else {
+                                        /* this is an unmatched trail code unit (2nd surrogate) */
+                                        /* callback(illegal) */
+                                        fromUChar32 = ch;
+                                        cr = CoderResult.OVERFLOW;
+                                        break;
+                                    }
+                                }
+                                else {
+                                    /* ran out of source */
+                                    fromUChar32 = ch;
+                                    if (flush) {
+                                        /* this is an unmatched trail code unit (2nd surrogate) */
+                                        /* callback(illegal) */
+                                        cr = CoderResult.malformedForLength(sourceArrayIndex);
+                                    }
+                                    break;
+                                }
+                            }
+                            else {
+                                fromUChar32 = ch;
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);
+                                break;
+                            }
+                        }
+                
+                        /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+                        temp[1] = (byte) (ch >>> 16 & 0x1F);
+                        temp[2] = (byte) (ch >>> 8);  /* unsigned cast implicitly does (ch & FF) */
+                        temp[3] = (byte) (ch);       /* unsigned cast implicitly does (ch & FF) */
+                
+                        for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
+                            if (target.hasRemaining()) {
+                                target.put(temp[indexToWrite]);
+                            }
+                            else {
+                                errorBuffer[errorBufferLength++] = temp[indexToWrite];
+                                cr = CoderResult.OVERFLOW;
+                            }
+                        }
+                    }
+                }
+            
+                if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
+                    cr = CoderResult.OVERFLOW;
+                }
+                source.position(sourceArrayIndex);
+                
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }
+    }
+    public CharsetDecoder newDecoder() {
+        return new CharsetDecoderUTF32(this);
+    }
+
+    public CharsetEncoder newEncoder() {
+        return new CharsetEncoderUTF32(this);
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/CharsetUTF32LE.java
+++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF32LE.java
@ -0,0 +1,318 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.charset.CharsetDecoderICU;
+import com.ibm.icu.charset.CharsetEncoderICU;
+import com.ibm.icu.charset.CharsetICU;
+import com.ibm.icu.text.UTF16;
+/**
+ * @author Niti Hantaweepant
+ */
+public class CharsetUTF32LE extends CharsetICU {
+    protected byte[] fromUSubstitution = new byte[]{(byte)0xfd, (byte)0xff, (byte)0, (byte)0};
+    public CharsetUTF32LE(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+        super(icuCanonicalName, javaCanonicalName, aliases);
+        maxBytesPerChar = 4;
+        minBytesPerChar = 4;
+        maxCharsPerByte = 1;
+    }
+    class CharsetDecoderUTF32LE extends CharsetDecoderICU{
+
+        public CharsetDecoderUTF32LE(CharsetICU cs) {
+            super(cs);
+        }
+
+        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            
+            int sourceArrayIndex = source.position();
+            int ch, i;
+            
+            try{
+                donefornow:
+                {                    
+                    /* UTF-8 returns here for only non-offset, this needs to change.*/
+                    if (toUnicodeStatus != 0 && target.hasRemaining()) {
+                        i = toULength;       /* restore # of bytes consumed */
+                
+                        ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
+                        toUnicodeStatus = 0;
+                        
+                        while (i < 4) {
+                            if (sourceArrayIndex < source.limit()) {
+                                ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
+                                toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
+                            }
+                            else {
+                                /* stores a partially calculated target*/
+                                /* + 1 to make 0 a valid character */
+                                toUnicodeStatus = ch + 1;
+                                toULength = (byte) i;
+                                break donefornow;
+                            }
+                        }
+                
+                        if (ch <= UConverterSharedData.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) {
+                            /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+                            if (ch <= UConverterSharedData.MAXIMUM_UCS2) 
+                            {
+                                /* fits in 16 bits */
+                                target.put((char)ch);
+                            }
+                            else {
+                                /* write out the surrogates */
+                                target.put(UTF16.getLeadSurrogate(ch));
+                                ch = UTF16.getTrailSurrogate(ch);
+                                if (target.hasRemaining()) {
+                                    target.put((char)ch);
+                                }
+                                else {
+                                    /* Put in overflow buffer (not handled here) */
+                                    charErrorBufferArray[0] = (char) ch;
+                                    charErrorBufferLength = 1;
+                                    throw new BufferOverflowException();
+                                }
+                            }
+                        }
+                        else {
+                            toULength = (byte)i;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            break donefornow;
+                        }
+                    }
+                    
+                    while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
+                        i = 0;
+                        ch = 0;
+                
+                        while (i < 4) {
+                            if (sourceArrayIndex < source.limit()) {
+                                ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
+                                toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
+                            }
+                            else {
+                                /* stores a partially calculated target*/
+                                /* + 1 to make 0 a valid character */
+                                toUnicodeStatus = ch + 1;
+                                toULength = (byte) i;
+                                break donefornow;
+                            }
+                        }
+                
+                        if (ch <= UConverterSharedData.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) {
+                            /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+                            if (ch <= UConverterSharedData.MAXIMUM_UCS2) 
+                            {
+                                /* fits in 16 bits */
+                                target.put((char) ch);
+                            }
+                            else {
+                                /* write out the surrogates */
+                                target.put(UTF16.getLeadSurrogate(ch));
+                                ch = UTF16.getTrailSurrogate(ch);
+                                if (target.hasRemaining()) {
+                                    target.put((char)ch);
+                                }
+                                else {
+                                    /* Put in overflow buffer (not handled here) */
+                                    charErrorBufferArray[0] = (char) ch;
+                                    charErrorBufferLength = 1;
+                                    throw new BufferOverflowException();                                    
+                                }
+                            }
+                        }
+                        else {
+                            toULength = (byte)i;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            break;
+                        }
+                    }
+                }
+                
+                if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
+                    /* End of target buffer */
+                    cr = CoderResult.OVERFLOW;
+                }                    
+                
+                source.position(sourceArrayIndex);
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }        
+    }
+    
+    class CharsetEncoderUTF32LE extends CharsetEncoderICU{
+
+        public CharsetEncoderUTF32LE(CharsetICU cs) {
+            super(cs, fromUSubstitution);
+            implReset();
+        }
+
+        private final static int NEED_TO_WRITE_BOM = 1;
+        
+        protected void implReset() {
+            super.implReset();
+            fromUnicodeStatus = NEED_TO_WRITE_BOM;
+        }
+        
+        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(!source.hasRemaining()) {
+                /* no input, nothing to do */
+                return cr;
+            }
+            
+            /* write the BOM if necessary */
+            if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
+                byte[] bom={ (byte)0xff, (byte)0xfe, 0, 0 };
+                cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1);
+                if(cr.isError()){
+                    return cr;
+                }
+                fromUnicodeStatus=0;
+            }
+            
+            int ch, ch2;
+            int indexToWrite;
+            byte temp[] = new byte[4];
+            temp[3] = 0;
+            int sourceArrayIndex = source.position();
+            
+            try{
+                boolean doloop = true;
+                if (fromUChar32 != 0) {
+                    ch = fromUChar32;
+                    fromUChar32 = 0;
+                    //lowsurogate:
+                    if (sourceArrayIndex < source.limit()) {
+                        ch2 = source.get(sourceArrayIndex);
+                        if (UTF16.isTrailSurrogate((char)ch2)) {
+                            ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
+                            sourceArrayIndex++;
+                        }
+                        else {
+                            /* this is an unmatched trail code unit (2nd surrogate) */
+                            /* callback(illegal) */
+                            fromUChar32 = ch;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            doloop = false;
+                        }
+                    }
+                    else {
+                        /* ran out of source */
+                        fromUChar32 = ch;
+                        if (flush) {
+                            /* this is an unmatched trail code unit (2nd surrogate) */
+                            /* callback(illegal) */
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                        }
+                        doloop = false;
+                    }
+                    
+                    /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+                    temp[2] = (byte) (ch >>> 16 & 0x1F);
+                    temp[1] = (byte) (ch >>> 8);  /* unsigned cast implicitly does (ch & FF) */
+                    temp[0] = (byte) (ch);       /* unsigned cast implicitly does (ch & FF) */
+            
+                    for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
+                        if (target.hasRemaining()) {
+                            target.put(temp[indexToWrite]);
+                        }
+                        else {
+                            errorBuffer[errorBufferLength++] = temp[indexToWrite];
+                            cr = CoderResult.OVERFLOW;
+                        }
+                    }
+                }
+            
+                if(doloop) {
+                    while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
+                        ch = source.get(sourceArrayIndex++);
+                
+                        if (UTF16.isSurrogate((char)ch)) {
+                            if (UTF16.isLeadSurrogate((char)ch)) {
+                                //lowsurogate:
+                                if (sourceArrayIndex < source.limit()) {
+                                    ch2 = source.get(sourceArrayIndex);
+                                    if (UTF16.isTrailSurrogate((char)ch2)) {
+                                        ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
+                                        sourceArrayIndex++;
+                                    }
+                                    else {
+                                        /* this is an unmatched trail code unit (2nd surrogate) */
+                                        /* callback(illegal) */
+                                        fromUChar32 = ch;
+                                        cr = CoderResult.OVERFLOW;
+                                        break;
+                                    }
+                                }
+                                else {
+                                    /* ran out of source */
+                                    fromUChar32 = ch;
+                                    if (flush) {
+                                        /* this is an unmatched trail code unit (2nd surrogate) */
+                                        /* callback(illegal) */
+                                        cr = CoderResult.malformedForLength(sourceArrayIndex);
+                                    }
+                                    break;
+                                }
+                            }
+                            else {
+                                fromUChar32 = ch;
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);
+                                break;
+                            }
+                        }
+                
+                        /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+                        temp[2] = (byte) (ch >>> 16 & 0x1F);
+                        temp[1] = (byte) (ch >>> 8);  /* unsigned cast implicitly does (ch & FF) */
+                        temp[0] = (byte) (ch);       /* unsigned cast implicitly does (ch & FF) */
+                
+                        for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
+                            if (target.hasRemaining()) {
+                                target.put(temp[indexToWrite]);
+                            }
+                            else {
+                                errorBuffer[errorBufferLength++] = temp[indexToWrite];
+                                cr = CoderResult.OVERFLOW;
+                            }
+                        }
+                    }
+                }
+            
+                if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
+                    cr = CoderResult.OVERFLOW;
+                }
+                source.position(sourceArrayIndex);
+                
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }
+    }
+    public CharsetDecoder newDecoder() {
+        return new CharsetDecoderUTF32LE(this);
+    }
+
+    public CharsetEncoder newEncoder() {
+        return new CharsetEncoderUTF32LE(this);
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java
+++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java
@ -0,0 +1,508 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+
+package com.ibm.icu.impl;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.charset.CharsetDecoderICU;
+import com.ibm.icu.charset.CharsetEncoderICU;
+import com.ibm.icu.charset.CharsetICU;
+import com.ibm.icu.text.UTF16;
+/**
+ * @author Niti Hantaweepant
+ */
+public class CharsetUTF8 extends CharsetICU {
+    protected byte[] fromUSubstitution = new byte[]{(byte)0xef, (byte)0xbf, (byte)0xbd};
+    public CharsetUTF8(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+        super(icuCanonicalName, javaCanonicalName, aliases);
+        maxBytesPerChar = 4;
+        minBytesPerChar = 1;
+        maxCharsPerByte = 1;
+    }
+    
+    /* UTF-8 Conversion DATA
+     *   for more information see Unicode Strandard 2.0 , Transformation Formats Appendix A-9
+     */       
+    private static final long OFFSETS_FROM_UTF8[] = {0,
+  	  0x00000000L, 0x00003080L, 0x000E2080L,
+  	  0x03C82080L, 0xFA082080L, 0x82082080L};
+    
+    private static final byte BYTES_FROM_UTF8[] = 
+    {
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
+    };
+    
+    /*
+	 * Starting with Unicode 3.0.1:
+	 * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N];
+	 * byte sequences with more than 4 bytes are illegal in UTF-8,
+	 * which is tested with impossible values for them
+	 */
+	private static final long UTF8_MIN_CHAR32[] = { 0L, 0L, 0x80L, 0x800L, 0x10000L, 0xffffffffL, 0xffffffffL };
+
+    class CharsetDecoderUTF8 extends CharsetDecoderICU{
+
+        public CharsetDecoderUTF8(CharsetICU cs) {
+            super(cs);
+        }        
+        
+        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+        
+            int sourceArrayIndex = source.position();
+            
+            // Todo: CESU8 implementation
+            // boolean isCESU8 = args.converter.sharedData == _CESU8Data;
+            boolean isCESU8 = (UConverterSharedData._CESU8Data != null);
+            int ch, ch2 = 0;
+    	    int i, inBytes;    	  
+    	    
+            try{            	
+            	
+				donefornow:
+				{
+		    	    if (toUnicodeStatus!=0 && target.hasRemaining())
+		    	    {
+				        inBytes = mode;            	/* restore # of bytes to consume */
+				        i = toULength;             	/* restore # of bytes consumed */
+				
+		    	        ch = toUnicodeStatus;		/*Stores the previously calculated ch from a previous call*/
+		    	        toUnicodeStatus = 0;
+	
+						while (i < inBytes)
+						{
+							if (sourceArrayIndex<source.limit())
+							{
+								toUBytesArray[i] = (byte) (ch2 = source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK);
+								if (!isTrail((byte)ch2))
+								{
+									break; /* i < inBytes */
+								}
+								ch = (ch << 6) + ch2;
+								++sourceArrayIndex;
+								i++;
+							}
+							else
+							{
+								/* stores a partially calculated target*/
+								toUnicodeStatus = ch;
+								mode = inBytes;
+								toULength = (byte) i;
+								break donefornow;
+							}
+						}
+	            	
+	    	            /* Remove the accumulated high bits */
+	    	            ch -= OFFSETS_FROM_UTF8[inBytes];
+	    	
+	    	            /*
+	    	             * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
+	    	             * - use only trail bytes after a lead byte (checked above)
+	    	             * - use the right number of trail bytes for a given lead byte
+	    	             * - encode a code point <= U+10ffff
+	    	             * - use the fewest possible number of bytes for their code points
+	    	             * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
+	    	             *
+	    	             * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
+	    	             * There are no irregular sequences any more.
+	    	             * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
+	    	             */
+	    	            if (i == inBytes && ch <= UConverterSharedData.MAXIMUM_UTF && ch >= UTF8_MIN_CHAR32[i] && (isCESU8 ? i <= 3 : !UTF16.isSurrogate((char)ch)))
+	    	            {
+	    	                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+	    	                toULength = 0;
+	    	                if (ch <= UConverterSharedData.MAXIMUM_UCS2) 
+	    	                {
+	    	                    /* fits in 16 bits */
+	    	                    target.put((char)ch);
+	    	                }
+	    	                else
+	    	                {
+	    	                    /* write out the surrogates */
+	    	                    ch -= UConverterSharedData.HALF_BASE;
+	    	                    target.put((char) ((ch >> UConverterSharedData.HALF_SHIFT) + UConverterSharedData.SURROGATE_HIGH_START));
+	    	                    ch = (ch & UConverterSharedData.HALF_MASK) + UConverterSharedData.SURROGATE_LOW_START;
+                                if(target.hasRemaining()) {
+                                    target.put((char)ch);
+                                    
+                                } else /* targetCapacity==1 */ {
+                                    charErrorBufferArray[charErrorBufferBegin+0]=(char)ch;
+                                    charErrorBufferLength=1;
+                                    throw new BufferOverflowException();
+                                }
+	    	                }
+	    	            }
+	    	            else
+	    	            {
+	    	                toULength = (byte)i;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+	    	                break donefornow;
+	    	            }
+	            	}  
+                    
+                    while (sourceArrayIndex < source.limit() && target.hasRemaining())
+                    {
+                        ch = source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK;
+                        if (ch < 0x80)        /* Simple case */
+                        {
+                            target.put((char)ch);
+                        }
+                        else
+                        {
+                            /* store the first char */
+                            toUBytesArray[0] = (byte)ch;
+                            inBytes = BYTES_FROM_UTF8[(int)ch]; /* lookup current sequence length */
+                            i = 1;
+                
+                            while (i < inBytes)
+                            {
+                                if (sourceArrayIndex < source.limit())
+                                {
+                                    toUBytesArray[i] = (byte) (ch2 = source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK);
+                                    if (!isTrail((byte)ch2))
+                                    {
+                                        break; /* i < inBytes */
+                                    }
+                                    ch = (ch << 6) + ch2;
+                                    ++sourceArrayIndex;
+                                    i++;
+                                }
+                                else
+                                {
+                                    /* stores a partially calculated target*/
+                                    toUnicodeStatus = ch;
+                                    mode = inBytes;
+                                    toULength = (byte) i;
+                                    break donefornow;
+                                }
+                            }
+                
+                            /* Remove the accumulated high bits */
+                            ch -= OFFSETS_FROM_UTF8[inBytes];
+                
+                            /*
+                             * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
+                             * - use only trail bytes after a lead byte (checked above)
+                             * - use the right number of trail bytes for a given lead byte
+                             * - encode a code point <= U+10ffff
+                             * - use the fewest possible number of bytes for their code points
+                             * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
+                             *
+                             * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
+                             * There are no irregular sequences any more.
+                             * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
+                             */
+                            if (i == inBytes && ch <= UConverterSharedData.MAXIMUM_UTF && ch >= UTF8_MIN_CHAR32[i] && (isCESU8 ? i <= 3 : !UTF16.isSurrogate((char)ch)))
+                            {
+                                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+                                toULength = 0;
+                                if (ch <= UConverterSharedData.MAXIMUM_UCS2) 
+                                {
+                                    /* fits in 16 bits */
+                                    target.put((char) ch);
+                                }
+                                else
+                                {
+                                    /* write out the surrogates */
+                                    ch -= UConverterSharedData.HALF_BASE;
+                                    target.put((char) ((ch >>> UConverterSharedData.HALF_SHIFT) + UConverterSharedData.SURROGATE_HIGH_START));
+                                    ch = (ch & UConverterSharedData.HALF_MASK) + UConverterSharedData.SURROGATE_LOW_START;
+                                    if (target.hasRemaining())
+                                    {
+                                        target.put((char)ch);
+                                    }
+                                    else
+                                    {
+                                        /* Put in overflow buffer (not handled here) */
+                                        charErrorBufferArray[charErrorBufferBegin+0]=(char)ch;
+                                        charErrorBufferLength=1;
+                                        throw new BufferOverflowException();
+                                    }
+                                }
+                            }
+                            else
+                            {
+                                toULength = (byte)i;
+                                cr = CoderResult.malformedForLength(sourceArrayIndex);
+                                break;
+                            }
+                        }
+                    }
+				}
+            
+        	    if (sourceArrayIndex < source.limit() && !target.hasRemaining())
+        	    {
+        	        /* End of target buffer */
+        	    	cr = CoderResult.OVERFLOW;
+        	    }        	        	
+            	
+        	    source.position(sourceArrayIndex);
+           
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }
+        
+    }
+    class CharsetEncoderUTF8 extends CharsetEncoderICU{
+
+        public CharsetEncoderUTF8(CharsetICU cs) {
+            super(cs, fromUSubstitution);
+            implReset();
+        }
+        
+        protected void implReset() {
+            super.implReset();
+        }
+        
+        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            
+            int sourceArrayIndex = source.position();
+
+            // Todo: CESU8 implementation
+            // boolean isCESU8 = args.converter.sharedData == _CESU8Data;
+            boolean isCESU8 = (UConverterSharedData._CESU8Data != null);
+            
+            int ch;
+            short indexToWrite;
+            byte temp[] = new byte[4];
+            boolean doloop = true;
+            
+            try{
+                
+                if (fromUChar32 != 0 && target.hasRemaining())
+                {
+                    ch = fromUChar32;
+                    fromUChar32 = 0;
+                           
+                    if (sourceArrayIndex < source.limit()) {
+                        /* test the following code unit */
+                        char trail = source.get(sourceArrayIndex);
+                        if(UTF16.isTrailSurrogate(trail)) {
+                            ++sourceArrayIndex;
+                            ch = UTF16.getCodePoint((char)ch, trail);
+                            /* convert this supplementary code point */
+                            /* exit this condition tree */
+                        } else {
+                            /* this is an unmatched lead code unit (1st surrogate) */
+                            /* callback(illegal) */
+                            fromUChar32 = (int)ch;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            doloop = false;
+                        }
+                    } else {
+                        /* no more input */
+                        fromUChar32 = (int)ch;
+                        doloop = false;
+                    }                                    
+            
+                    if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE)
+                    {
+                        indexToWrite = 2;
+                        temp[2] = (byte) ((ch >>> 12) | 0xe0);
+                    }
+                    else
+                    {
+                        indexToWrite = 3;
+                        temp[3] = (byte) ((ch >>> 18) | 0xf0);
+                        temp[2] = (byte) (((ch >>> 12) & 0x3f) | 0x80);
+                    }
+                    temp[1] = (byte) (((ch >>> 6) & 0x3f) | 0x80);
+                    temp[0] = (byte) ((ch & 0x3f) | 0x80);
+        
+                    for (; indexToWrite >= 0; indexToWrite--)
+                    {
+                        if (target.hasRemaining())
+                        {
+                            target.put(temp[indexToWrite]);
+                        }
+                        else
+                        {
+                            errorBuffer[errorBufferLength++] = temp[indexToWrite];
+                            cr = CoderResult.OVERFLOW;
+                        }
+                    }
+                }
+            
+                if(doloop) {
+                    while (sourceArrayIndex < source.limit() && target.hasRemaining())
+                    {
+                        ch = source.get(sourceArrayIndex++);
+            
+                        if (ch < 0x80)        /* Single byte */
+                        {
+                            target.put((byte)ch);
+                        }
+                        else if (ch < 0x800)  /* Double byte */
+                        {
+                            target.put((byte) ((ch >>> 6) | 0xc0));
+                            if (target.hasRemaining())
+                            {
+                                target.put((byte) ((ch & 0x3f) | 0x80));
+                            }
+                            else
+                            {
+                                errorBuffer[0] = (byte) ((ch & 0x3f) | 0x80);
+                                errorBufferLength = 1;
+                                throw new BufferOverflowException();
+                            }
+                        }
+                        else
+                        /* Check for surrogates */
+                        {
+                            if(UTF16.isSurrogate((char)ch) && !isCESU8) {
+                                if(UTF16.isLeadSurrogate((char)ch)) {
+               
+                                    if (sourceArrayIndex < source.limit()) {
+                                        /* test the following code unit */
+                                        char trail = source.get(sourceArrayIndex);
+                                        if(UTF16.isTrailSurrogate(trail)) {
+                                            ++sourceArrayIndex;
+                                            ch = UTF16.getCodePoint((char)ch, trail);
+                                            //ch2 = 0;
+                                            /* convert this supplementary code point */
+                                            /* exit this condition tree */
+                                        } 
+                                        else {
+                                            /* this is an unmatched lead code unit (1st surrogate) */
+                                            /* callback(illegal) */
+                                            fromUChar32 = ch;
+                                            cr = CoderResult.malformedForLength(sourceArrayIndex);;
+                                            break;
+                                        }
+                                    } 
+                                    else {
+                                        /* no more input */
+                                        fromUChar32 = ch;
+                                        break;
+                                    }
+                                } 
+                                else {
+                                    fromUChar32 = (int)ch;
+                                    cr = CoderResult.malformedForLength(sourceArrayIndex);
+                                    break;
+                                }
+                            }
+                
+                            if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE)
+                            {
+                                indexToWrite = 2;
+                                temp[2] = (byte) ((ch >>> 12) | 0xe0);
+                            }
+                            else
+                            {
+                                indexToWrite = 3;
+                                temp[3] = (byte) ((ch >>> 18) | 0xf0);
+                                temp[2] = (byte) (((ch >>> 12) & 0x3f) | 0x80);
+                            }
+                            temp[1] = (byte) (((ch >>> 6) & 0x3f) | 0x80);
+                            temp[0] = (byte) ((ch & 0x3f) | 0x80);
+                
+                            for (; indexToWrite >= 0; indexToWrite--)
+                            {
+                                if (target.hasRemaining())
+                                {
+                                    target.put(temp[indexToWrite]);
+                                }
+                                else
+                                {
+                                    errorBuffer[errorBufferLength++] = temp[indexToWrite];
+                                    cr = CoderResult.OVERFLOW;
+                                }
+                            }
+                        }
+                    }
+                }
+            
+                if (sourceArrayIndex < source.limit() && !target.hasRemaining())
+                {
+                    cr = CoderResult.OVERFLOW;
+                }
+            
+                source.position(sourceArrayIndex);
+                
+            }catch(BufferOverflowException ex){
+                cr = CoderResult.OVERFLOW;
+            }
+            return cr;
+        }
+    }
+    
+    /* single-code point definitions -------------------------------------------- */
+
+    /**
+     * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+     * @param c 8-bit code unit (byte)
+     * @return TRUE or FALSE
+     * @stable ICU 2.4
+     */
+    public static boolean isSingle(byte c) {return (((c)&0x80)==0);}
+
+    /**
+     * Is this code unit (byte) a UTF-8 lead byte?
+     * @param c 8-bit code unit (byte)
+     * @return TRUE or FALSE
+     * @stable ICU 2.4
+     */
+    public static boolean isLead(byte c) {return ((((c)-0xc0) & UConverterConstants.UNSIGNED_BYTE_MASK)<0x3e);}
+
+    /**
+     * Is this code unit (byte) a UTF-8 trail byte?
+     * @param c 8-bit code unit (byte)
+     * @return TRUE or FALSE
+     * @stable ICU 2.4
+     */
+    public static boolean isTrail(byte c) {return (((c)&0xc0)==0x80);}
+
+    /**
+     * How many code units (bytes) are used for the UTF-8 encoding
+     * of this Unicode code point?
+     * @param c 32-bit code point
+     * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+     * @stable ICU 2.4
+     */
+    public static final int length(int c)
+    {
+    	long uc = c & UConverterConstants.UNSIGNED_INT_MASK;
+    	return
+        (uc<=0x7f ? 1 : 
+            (uc<=0x7ff ? 2 : 
+                (uc<=0xd7ff ? 3 : 
+                    (uc<=0xdfff || uc>0x10ffff ? 0 : 
+                        (uc<=0xffff ? 3 : 4)
+                    ) 
+                ) 
+            ) 
+        );
+    }
+    
+    public CharsetDecoder newDecoder() {
+        return new CharsetDecoderUTF8(this);
+    }
+
+    public CharsetEncoder newEncoder() {
+        return new CharsetEncoderUTF8(this);
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/InvalidFormatException.java
+++ b/icu4j/src/com/ibm/icu/impl/InvalidFormatException.java
@ -0,0 +1,16 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+public class InvalidFormatException extends Exception {
+    public InvalidFormatException(){}
+    public InvalidFormatException(String message){
+        super(message);
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/UConverterAlias.java
+++ b/icu4j/src/com/ibm/icu/impl/UConverterAlias.java
@ -0,0 +1,789 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+import java.io.IOException;
+import java.io.BufferedInputStream;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.ibm.icu.charset.CharsetICU;
+
+public final class UConverterAlias {
+    /** The largest value a 32 bit unsigned integer can hold @draft ICU 3.6 */
+    public static final long UINT32_MAX = 4294967295L;
+
+    public static final int AMBIGUOUS_ALIAS_MAP_BIT = 0x8000;
+
+    public static final int CONVERTER_INDEX_MASK = 0xFFF;
+
+    public static final int NUM_RESERVED_TAGS = 2;
+
+    public static final int NUM_HIDDEN_TAGS = 1;
+
+    static int[] gConverterListArray = null;
+
+    static int gConverterListArrayIndex;
+
+    static int[] gTagListArray = null;
+
+    static int gTagListArrayIndex;
+
+    static int[] gAliasListArray = null;
+
+    static int gAliasListArrayIndex;
+
+    static int[] gUntaggedConvArrayArray = null;
+
+    static int gUntaggedConvArrayArrayIndex;
+
+    static int[] gTaggedAliasArrayArray = null;
+
+    static int gTaggedAliasArrayArrayIndex;
+
+    static int[] gTaggedAliasListsArray = null;
+
+    static int gTaggedAliasListsArrayIndex;
+
+    static byte[] gStringTableArray = null;
+
+    static int gStringTableArrayIndex;
+
+    static long gConverterListSize;
+
+    static long gTagListSize;
+
+    static long gAliasListSize;
+
+    static long gUntaggedConvArraySize;
+
+    static long gTaggedAliasArraySize;
+
+    static long gTaggedAliasListsSize;
+
+    static long gStringTableSize;
+
+    static final String GET_STRING(int idx) {
+        return new String(gStringTableArray, 2 * idx, (int) strlen(gStringTableArray, 2 * idx));
+    }
+
+    public static final int strlen(byte[] sArray, int sBegin)
+    {
+        int i = sBegin;
+        while(i < sArray.length && sArray[i++] != 0) {}
+        return i - sBegin - 1;
+    }
+
+    public static final int tocLengthIndex = 0;
+
+    public static final int converterListIndex = 1;
+
+    public static final int tagListIndex = 2;
+
+    public static final int aliasListIndex = 3;
+
+    public static final int untaggedConvArrayIndex = 4;
+
+    public static final int taggedAliasArrayIndex = 5;
+
+    public static final int taggedAliasListsIndex = 6;
+
+    public static final int reservedIndex1 = 7;
+
+    public static final int stringTableIndex = 8;
+
+    public static final int minTocLength = 8; /*
+                                                 * min. tocLength in the file,
+                                                 * does not count the
+                                                 * tocLengthIndex!
+                                                 */
+
+    public static final int offsetsCount = minTocLength + 1; /*
+                                                                 * length of the
+                                                                 * swapper's
+                                                                 * temporary
+                                                                 * offsets[]
+                                                                 */
+
+    static ByteBuffer gAliasData = null;
+
+    private static final boolean isAlias(String alias) {
+        if (alias == null) {
+            throw new IllegalArgumentException("Alias param is null!");
+        } else if (alias.length() == 0) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+    private static final String CNVALIAS_DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE + "/cnvalias.icu";
+
+    /**
+     * Default buffer size of datafile
+     */
+    private static final int CNVALIAS_DATA_BUFFER_SIZE = 25000;
+
+    private static final synchronized boolean haveAliasData() 
+                                               throws IOException{
+        boolean needInit;
+
+        // agljport:todo umtx_lock(NULL);
+        needInit = gAliasData == null;
+
+        /* load converter alias data from file if necessary */
+        if (needInit) {
+            ByteBuffer data = null;
+            long[] tableArray = null;
+            long tableStart;
+            long reservedSize1;
+            byte[] reservedBytes = null;
+
+            // agljport:fix data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME,
+            // isAcceptable, NULL, pErrorCode);
+            // data = udata_openChoice(null, DATA_TYPE, DATA_NAME, 0,
+            // isAcceptable, null, pErrorCode);
+            InputStream i = ICUData.getRequiredStream(CNVALIAS_DATA_FILE_NAME);
+            BufferedInputStream b = new BufferedInputStream(i, CNVALIAS_DATA_BUFFER_SIZE);
+            UConverterAliasDataReader reader = new UConverterAliasDataReader(b);
+            tableArray = reader.readToc(offsetsCount);
+
+            tableStart = tableArray[0];
+            if (tableStart < minTocLength) {
+                throw new IOException("Invalid data format.");
+            }
+            gConverterListSize = tableArray[1];
+            gTagListSize = tableArray[2];
+            gAliasListSize = tableArray[3];
+            gUntaggedConvArraySize = tableArray[4];
+            gTaggedAliasArraySize = tableArray[5];
+            gTaggedAliasListsSize = tableArray[6];
+            reservedSize1 = tableArray[7] * 2;
+            gStringTableSize = tableArray[8] * 2;
+
+            gConverterListArray = new int[(int) gConverterListSize];
+            gTagListArray = new int[(int) gTagListSize];
+            gAliasListArray = new int[(int) gAliasListSize];
+            gUntaggedConvArrayArray = new int[(int) gUntaggedConvArraySize];
+            gTaggedAliasArrayArray = new int[(int) gTaggedAliasArraySize];
+            gTaggedAliasListsArray = new int[(int) gTaggedAliasListsSize];
+            reservedBytes = new byte[(int) reservedSize1];
+            gStringTableArray = new byte[(int) gStringTableSize];
+
+            reader.read(gConverterListArray, gTagListArray,
+                    gAliasListArray, gUntaggedConvArrayArray,
+                    gTaggedAliasArrayArray, gTaggedAliasListsArray,
+                    reservedBytes, gStringTableArray);
+            data =  ByteBuffer.allocate(0); // dummy UDataMemory object in absence
+                                        // of memory mapping
+
+            // agljport:todo umtx_lock(NULL);
+            if (gAliasData == null) {
+                gAliasData = data;
+                data = null;
+
+                // agljport:fix ucln_common_registerCleanup(UCLN_COMMON_IO,
+                // io_cleanup);
+            }
+            // agljport:todo umtx_unlock(NULL);
+
+            /* if a different thread set it first, then close the extra data */
+            if (data != null) {
+                // agljport:fix udata_close(data); /* NULL if it was set
+                // correctly */
+            }
+        }
+
+        return true;
+    }
+
+    // U_CFUNC const char * io_getConverterName(const char *alias, UErrorCode
+    // *pErrorCode)
+    public static final String io_getConverterName(String alias)
+                                    throws IOException{
+        if (haveAliasData() && isAlias(alias)) {
+            boolean[] isAmbigous = new boolean[1];
+            long convNum = findConverter(alias, isAmbigous);
+            if (convNum < gConverterListSize) {
+                return GET_STRING(gConverterListArray[(int) convNum]);
+            }
+            /* else converter not found */
+        }
+        return null;
+    }
+
+    /*
+     * search for an alias return the converter number index for gConverterList
+     */
+    // static U_INLINE uint32_t findConverter(const char *alias, UErrorCode
+    // *pErrorCode)
+    private static final long findConverter(String alias, boolean[] isAmbigous) {
+        long mid, start, limit;
+        long lastMid;
+        long result;
+
+        /* do a binary search for the alias */
+        start = 0;
+        limit = gUntaggedConvArraySize;
+        mid = limit;
+        lastMid = UINT32_MAX;
+
+        for (;;) {
+            mid = (start + limit) / 2;
+            if (lastMid == mid) { /* Have we moved? */
+                break; /* We haven't moved, and it wasn't found. */
+            }
+            lastMid = mid;
+            result = compareNames(alias, GET_STRING(gAliasListArray[(int) mid]));
+
+            if (result < 0) {
+                limit = mid;
+            } else if (result > 0) {
+                start = mid;
+            } else {
+                /*
+                 * Since the gencnval tool folds duplicates into one entry, this
+                 * alias in gAliasList is unique, but different standards may
+                 * map an alias to different converters.
+                 */
+                if ((gUntaggedConvArrayArray[(int) mid] & AMBIGUOUS_ALIAS_MAP_BIT) != 0) {
+                    isAmbigous[0]=true;
+                }
+                return gUntaggedConvArrayArray[(int) mid] & CONVERTER_INDEX_MASK;
+            }
+        }
+//  public static final long UINT32_MAX = 4294967295L;
+        return Long.MAX_VALUE;
+    }
+
+    /**
+     * \var io_stripForCompare Remove the underscores, dashes and spaces from
+     * the name, and convert the name to lower case.
+     * 
+     * @param dst
+     *            The destination buffer, which is <= the buffer of name.
+     * @param dst
+     *            The destination buffer, which is <= the buffer of name.
+     * @return the destination buffer.
+     */
+    public static final StringBuffer io_stripForCompare(StringBuffer dst, String name) {
+        return io_stripASCIIForCompare(dst, name);
+    }
+
+    /* @see compareNames */
+    private static final StringBuffer io_stripASCIIForCompare(StringBuffer dst, String name) {
+        name = name.concat("\000");
+        int nameIndex = 0;
+        char c1 = name.charAt(0);
+        int dstItr = 0;
+
+        while (c1 != 0) {
+            /* Ignore delimiters '-', '_', and ' ' */
+            while ((c1 = name.charAt(nameIndex)) == 0x2d || c1 == 0x5f
+                    || c1 == 0x20) {
+                ++nameIndex;
+            }
+
+            /* lowercase for case-insensitive comparison */
+            dst.append(Character.toLowerCase(c1));
+            ++dstItr;
+            ++nameIndex;
+        }
+        if (dst.length() > 0)
+            dst.deleteCharAt(dst.length() - 1);
+        return dst;
+    }
+
+    /**
+     * Do a fuzzy compare of a two converter/alias names. The comparison is
+     * case-insensitive. It also ignores the characters '-', '_', and ' ' (dash,
+     * underscore, and space). Thus the strings "UTF-8", "utf_8", and "Utf 8"
+     * are exactly equivalent.
+     * 
+     * This is a symmetrical (commutative) operation; order of arguments is
+     * insignificant. This is an important property for sorting the list (when
+     * the list is preprocessed into binary form) and for performing binary
+     * searches on it at run time.
+     * 
+     * @param name1
+     *            a converter name or alias, zero-terminated
+     * @param name2
+     *            a converter name or alias, zero-terminated
+     * @return 0 if the names match, or a negative value if the name1 lexically
+     *         precedes name2, or a positive value if the name1 lexically
+     *         follows name2.
+     * 
+     * @see io_stripForCompare
+     */
+    public static int compareNames(String name1, String name2){
+        int result = 0;
+        int i1 = 0;
+        int i2 = 0;
+        while (true) {
+            char ch1 = 0;
+            char ch2 = 0;
+            // Ignore delimiters '-', '_', and ASCII White_Space
+            if (i1 < name1.length()) {
+                ch1 = name1.charAt(i1 ++);
+            }
+            while (ch1 == '-' || ch1 == '_' || ch1 == ' ' ) {
+                if (i1 < name1.length()) {
+                    ch1 = name1.charAt(i1 ++);
+                }
+                else {
+                    ch1 = 0;
+                }
+            }
+            if (i2 < name2.length()) {
+                ch2 = name2.charAt(i2 ++);
+            }
+            while (ch2 == '-' || ch2 == '_' || ch2 == ' ' ) {
+                if (i2 < name2.length()) {
+                    ch2 = name2.charAt(i2 ++);
+                }
+                else {
+                    ch2 = 0;
+                }
+            }
+
+            // If we reach the ends of both strings then they match
+            if (ch1 == 0 && ch2 == 0) {
+                return 0;
+            }
+
+            // Case-insensitive comparison
+            if (ch1 != ch2) {
+                result = Character.toLowerCase(ch1)- Character.toLowerCase(ch2);
+                if (result != 0) {
+                    return result;
+                }
+            }
+        }
+    }
+
+    public static int io_countAliases(String alias) 
+                        throws IOException{
+        if (haveAliasData() && isAlias(alias)) {
+            boolean[] isAmbigous = new boolean[1];
+            long convNum = findConverter(alias, isAmbigous);
+            if (convNum < gConverterListSize) {
+                /* tagListNum - 1 is the ALL tag */
+                int listOffset = gTaggedAliasArrayArray[(int) ((gTagListSize - 1)
+                        * gConverterListSize + convNum)];
+
+                if (listOffset != 0) {
+                    return gTaggedAliasListsArray[listOffset];
+                }
+                /* else this shouldn't happen. internal program error */
+            }
+            /* else converter not found */
+        }
+        return 0;
+    }
+
+    /**
+     * Return the number of all aliases (and converter names).
+     * 
+     * @param pErrorCode
+     *            The error code
+     * @return the number of all aliases
+     */
+    // U_CFUNC uint16_t io_countTotalAliases(UErrorCode *pErrorCode);
+    public static int io_countTotalAliases() throws IOException{
+        if (haveAliasData()) {
+            return (int) gAliasListSize;
+        }
+        return 0;
+    }
+
+    // U_CFUNC const char * io_getAlias(const char *alias, uint16_t n,
+    // UErrorCode *pErrorCode)
+    public static String io_getAlias(String alias, int n) throws IOException{
+        if (haveAliasData() && isAlias(alias)) {
+            boolean[] isAmbigous = new boolean[1];
+            long convNum = findConverter(alias,isAmbigous);
+            if (convNum < gConverterListSize) {
+                /* tagListNum - 1 is the ALL tag */
+                int listOffset = gTaggedAliasArrayArray[(int) ((gTagListSize - 1)
+                        * gConverterListSize + convNum)];
+
+                if (listOffset != 0) {
+                    //long listCount = gTaggedAliasListsArray[listOffset];
+                    /* +1 to skip listCount */
+                    int[] currListArray = gTaggedAliasListsArray;
+                    int currListArrayIndex = listOffset + 1;
+
+                    return GET_STRING(currListArray[currListArrayIndex + n]);
+                    
+                }
+                /* else this shouldn't happen. internal program error */
+            }
+            /* else converter not found */
+        }
+        return null;
+    }
+
+    // U_CFUNC uint16_t io_countStandards(UErrorCode *pErrorCode) {
+    public static int io_countStandards() throws IOException{
+        if (haveAliasData()) {
+            return (int) (gTagListSize - NUM_HIDDEN_TAGS);
+        }
+        return 0;
+    }
+
+    // U_CAPI const char * U_EXPORT2getStandard(uint16_t n, UErrorCode
+    // *pErrorCode)
+    public static String getStandard(int n) throws IOException{
+        if (haveAliasData()) {
+            return GET_STRING(gTagListArray[n]);
+        }
+        return null;
+    }
+
+    // U_CAPI const char * U_EXPORT2 getStandardName(const char *alias, const
+    // char *standard, UErrorCode *pErrorCode)
+    public static final String getStandardName(String alias, String standard)throws IOException {
+        if (haveAliasData() && isAlias(alias)) {
+            long listOffset = findTaggedAliasListsOffset(alias, standard);
+
+            if (0 < listOffset && listOffset < gTaggedAliasListsSize) {
+                int[] currListArray = gTaggedAliasListsArray;
+                long currListArrayIndex = listOffset + 1;
+                if (currListArray[0] != 0) {
+                    return GET_STRING(currListArray[(int) currListArrayIndex]);
+                }
+            }
+        }
+        return null;
+    }
+
+    // U_CAPI uint16_t U_EXPORT2 countAliases(const char *alias, UErrorCode
+    // *pErrorCode)
+    public static int countAliases(String alias) throws IOException{
+        return io_countAliases(alias);
+    }
+
+    // U_CAPI const char* U_EXPORT2 getAlias(const char *alias, uint16_t n,
+    // UErrorCode *pErrorCode)
+    public static String getAlias(String alias, int n) throws IOException{
+        return io_getAlias(alias, n);
+    }
+
+    // U_CFUNC uint16_t countStandards(void)
+    public static int countStandards()throws IOException{
+        return io_countStandards();
+    }
+    
+    /*returns a single Name from the list, will return NULL if out of bounds
+     */
+    public static String getAvailableName (int n){
+        try{
+          if (0 <= n && n <= 0xffff) {
+            String name = bld_getAvailableConverter(n);
+            return name;
+          }
+        }catch(IOException ex){
+            //throw away exception
+        }
+        return null;
+    }
+    // U_CAPI const char * U_EXPORT2 getCanonicalName(const char *alias, const
+    // char *standard, UErrorCode *pErrorCode) {
+    public static String getCanonicalName(String alias, String standard) throws IOException{
+        if (haveAliasData() && isAlias(alias)) {
+            long convNum = findTaggedConverterNum(alias, standard);
+
+            if (convNum < gConverterListSize) {
+                return GET_STRING(gConverterListArray[(int) convNum]);
+            }
+        }
+
+        return null;
+    }
+    public static int countAvailable (){
+        try{
+            return bld_countAvailableConverters();
+        }catch(IOException ex){
+            //throw away exception
+        }
+        return -1;
+    }
+        
+    // U_CAPI UEnumeration * U_EXPORT2 openStandardNames(const char *convName,
+    // const char *standard, UErrorCode *pErrorCode)
+    public static final UConverterAliasesEnumeration openStandardNames(String convName, String standard)throws IOException {
+        UConverterAliasesEnumeration aliasEnum = null;
+        if (haveAliasData() && isAlias(convName)) {
+            long listOffset = findTaggedAliasListsOffset(convName, standard);
+
+            /*
+             * When listOffset == 0, we want to acknowledge that the converter
+             * name and standard are okay, but there is nothing to enumerate.
+             */
+            if (listOffset < gTaggedAliasListsSize) {
+
+                UConverterAliasesEnumeration.UAliasContext context = new UConverterAliasesEnumeration.UAliasContext(listOffset, 0);
+                aliasEnum = new UConverterAliasesEnumeration();
+                aliasEnum.setContext(context);
+            }
+            /* else converter or tag not found */
+        }
+        return aliasEnum;
+    }
+
+    // static uint32_t getTagNumber(const char *tagname)
+    private static long getTagNumber(String tagName) {
+        if (gTagListArray != null) {
+            long tagNum;
+            for (tagNum = 0; tagNum < gTagListSize; tagNum++) {
+                if (tagName.equals(GET_STRING(gTagListArray[(int) tagNum]))) {
+                    return tagNum;
+                }
+            }
+        }
+
+        return UINT32_MAX;
+    }
+
+    // static uint32_t findTaggedAliasListsOffset(const char *alias, const char
+    // *standard, UErrorCode *pErrorCode)
+    private static long findTaggedAliasListsOffset(String alias, String standard) {
+        long idx;
+        long listOffset;
+        long convNum;
+        long tagNum = getTagNumber(standard);
+        boolean[] isAmbigous = new boolean[1];
+        /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
+        convNum = findConverter(alias, isAmbigous);
+
+        if (tagNum < (gTagListSize - NUM_HIDDEN_TAGS)
+                && convNum < gConverterListSize) {
+            listOffset = gTaggedAliasArrayArray[(int) (tagNum
+                    * gConverterListSize + convNum)];
+            if (listOffset != 0
+                    && gTaggedAliasListsArray[(int) listOffset + 1] != 0) {
+                return listOffset;
+            }
+            if (isAmbigous[0]==true) {
+                /*
+                 * Uh Oh! They used an ambiguous alias. We have to search the
+                 * whole swiss cheese starting at the highest standard affinity.
+                 * This may take a while.
+                 */
+
+                for (idx = 0; idx < gTaggedAliasArraySize; idx++) {
+                    listOffset = gTaggedAliasArrayArray[(int) idx];
+                    if (listOffset != 0 && isAliasInList(alias, listOffset)) {
+                        long currTagNum = idx / gConverterListSize;
+                        long currConvNum = (idx - currTagNum
+                                * gConverterListSize);
+                        long tempListOffset = gTaggedAliasArrayArray[(int) (tagNum
+                                * gConverterListSize + currConvNum)];
+                        if (tempListOffset != 0
+                                && gTaggedAliasListsArray[(int) tempListOffset + 1] != 0) {
+                            return tempListOffset;
+                        }
+                        /*
+                         * else keep on looking We could speed this up by
+                         * starting on the next row because an alias is unique
+                         * per row, right now. This would change if alias
+                         * versioning appears.
+                         */
+                    }
+                }
+                /* The standard doesn't know about the alias */
+            }
+            /* else no default name */
+            return 0;
+        }
+        /* else converter or tag not found */
+
+        return UINT32_MAX;
+    }
+
+    /* Return the canonical name */
+    // static uint32_t findTaggedConverterNum(const char *alias, const char
+    // *standard, UErrorCode *pErrorCode)
+    private static long findTaggedConverterNum(String alias, String standard) {
+        long idx;
+        long listOffset;
+        long convNum;
+        long tagNum = getTagNumber(standard);
+        boolean[] isAmbigous = new boolean[1];
+        
+        /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
+        convNum = findConverter(alias, isAmbigous);        
+
+        if (tagNum < (gTagListSize - NUM_HIDDEN_TAGS)
+                && convNum < gConverterListSize) {
+            listOffset = gTaggedAliasArrayArray[(int) (tagNum
+                    * gConverterListSize + convNum)];
+            if (listOffset != 0 && isAliasInList(alias, listOffset)) {
+                return convNum;
+            }
+            if (isAmbigous[0] == true) {
+                /*
+                 * Uh Oh! They used an ambiguous alias. We have to search one
+                 * slice of the swiss cheese. We search only in the requested
+                 * tag, not the whole thing. This may take a while.
+                 */
+                long convStart = (tagNum) * gConverterListSize;
+                long convLimit = (tagNum + 1) * gConverterListSize;
+                for (idx = convStart; idx < convLimit; idx++) {
+                    listOffset = gTaggedAliasArrayArray[(int) idx];
+                    if (listOffset != 0 && isAliasInList(alias, listOffset)) {
+                        return idx - convStart;
+                    }
+                }
+                /* The standard doesn't know about the alias */
+            }
+            /* else no canonical name */
+        }
+        /* else converter or tag not found */
+
+        return UINT32_MAX;
+    }
+
+    // static U_INLINE UBool isAliasInList(const char *alias, uint32_t
+    // listOffset)
+    private static boolean isAliasInList(String alias, long listOffset) {
+        if (listOffset != 0) {
+            long currAlias;
+            long listCount = gTaggedAliasListsArray[(int) listOffset];
+            /* +1 to skip listCount */
+            int[] currList = gTaggedAliasListsArray;
+            long currListArrayIndex = listOffset + 1;
+            for (currAlias = 0; currAlias < listCount; currAlias++) {
+                if (currList[(int) (currAlias + currListArrayIndex)] != 0
+                        && compareNames(
+                                alias,
+                                GET_STRING(currList[(int) (currAlias + currListArrayIndex)])) == 0) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    // begin bld.c
+    static String[] gAvailableConverters = null;
+
+    static int gAvailableConverterCount = 0;
+
+    static byte[] gDefaultConverterNameBuffer; // [MAX_CONVERTER_NAME_LENGTH +
+                                                // 1]; /* +1 for NULL */
+
+    static String gDefaultConverterName = null;
+
+    // static UBool haveAvailableConverterList(UErrorCode *pErrorCode)
+    static boolean haveAvailableConverterList() throws IOException{
+        if (gAvailableConverters == null) {
+            int idx;
+            int localConverterCount;
+            String converterName;
+            String[] localConverterList;
+
+            if (!haveAliasData()) {
+                return false;
+            }
+
+            /* We can't have more than "*converterTable" converters to open */
+            localConverterList = new String[(int) gConverterListSize];
+
+            localConverterCount = 0;
+
+            for (idx = 0; idx < gConverterListSize; idx++) {
+                converterName = GET_STRING(gConverterListArray[idx]);
+                //UConverter cnv = UConverter.open(converterName);
+                //TODO: Fix me
+                localConverterList[localConverterCount++] = converterName;
+                
+            }
+
+            // agljport:todo umtx_lock(NULL);
+            if (gAvailableConverters == null) {
+                gAvailableConverters = localConverterList;
+                gAvailableConverterCount = localConverterCount;
+                /* haveData should have already registered the cleanup function */
+            } else {
+                // agljport:todo free((char **)localConverterList);
+            }
+            // agljport:todo umtx_unlock(NULL);
+        }
+        return true;
+    }
+
+    // U_CFUNC uint16_t bld_countAvailableConverters(UErrorCode *pErrorCode)
+    public static int bld_countAvailableConverters() throws IOException{
+        if (haveAvailableConverterList()) {
+            return gAvailableConverterCount;
+        }
+        return 0;
+    }
+
+    // U_CFUNC const char * bld_getAvailableConverter(uint16_t n, UErrorCode
+    // *pErrorCode)
+    public static String bld_getAvailableConverter(int n) throws IOException{
+        if (haveAvailableConverterList()) {
+            if (n < gAvailableConverterCount) {
+                return gAvailableConverters[n];
+            }
+        }
+        return null;
+    }
+
+    /* default converter name --------------------------------------------------- */
+
+    /*
+     * In order to be really thread-safe, the get function would have to take
+     * a buffer parameter and copy the current string inside a mutex block.
+     * This implementation only tries to be really thread-safe while
+     * setting the name.
+     * It assumes that setting a pointer is atomic.
+     */
+
+    // U_CFUNC const char * getDefaultName()
+    public static final synchronized String getDefaultName() {
+        /* local variable to be thread-safe */
+        String name;
+
+        //agljport:todo umtx_lock(null);
+        name = gDefaultConverterName;
+        //agljport:todo umtx_unlock(null);
+
+        if (name == null) {
+            //UConverter cnv = null;
+            long length = 0;
+
+            name = CharsetICU.getDefaultCharsetName();
+
+            /* if the name is there, test it out and get the canonical name with options */
+            if (name != null) {
+               // cnv = UConverter.open(name); 
+               // name = cnv.getName(cnv);
+                // TODO: fix me
+            }
+
+            if (name == null || name.length() == 0 ||/* cnv == null ||*/
+                     length >= gDefaultConverterNameBuffer.length) {
+                /* Panic time, let's use a fallback. */
+                name = new String("US-ASCII");
+            }
+
+            //length=(int32_t)(strlen(name));
+
+            /* Copy the name before we close the converter. */
+            name = gDefaultConverterName;
+        }
+
+        return name;
+    }
+
+    //end bld.c
+}
--- a/icu4j/src/com/ibm/icu/impl/UConverterAliasDataReader.java
+++ b/icu4j/src/com/ibm/icu/impl/UConverterAliasDataReader.java
@ -0,0 +1,218 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+
+package com.ibm.icu.impl;
+import java.io.*;
+import com.ibm.icu.impl.ICUDebug;
+
+/* Format of cnvalias.icu -----------------------------------------------------
+ *
+ * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
+ * This binary form contains several tables. All indexes are to uint16_t
+ * units, and not to the bytes (uint8_t units). Addressing everything on
+ * 16-bit boundaries allows us to store more information with small index
+ * numbers, which are also 16-bit in size. The majority of the table (except
+ * the string table) are 16-bit numbers.
+ *
+ * First there is the size of the Table of Contents (TOC). The TOC
+ * entries contain the size of each section. In order to find the offset
+ * you just need to sum up the previous offsets.
+ * The TOC length and entries are an array of uint32_t values.
+ * The first section after the TOC starts immediately after the TOC.
+ *
+ * 1) This section contains a list of converters. This list contains indexes
+ * into the string table for the converter name. The index of this list is
+ * also used by other sections, which are mentioned later on.
+ * This list is not sorted.
+ *
+ * 2) This section contains a list of tags. This list contains indexes
+ * into the string table for the tag name. The index of this list is
+ * also used by other sections, which are mentioned later on.
+ * This list is in priority order of standards.
+ *
+ * 3) This section contains a list of sorted unique aliases. This
+ * list contains indexes into the string table for the alias name. The
+ * index of this list is also used by other sections, like the 4th section.
+ * The index for the 3rd and 4th section is used to get the
+ * alias -> converter name mapping. Section 3 and 4 form a two column table.
+ *
+ * 4) This section contains a list of mapped converter names. Consider this
+ * as a table that maps the 3rd section to the 1st section. This list contains
+ * indexes into the 1st section. The index of this list is the same index in
+ * the 3rd section. There is also some extra information in the high bits of
+ * each converter index in this table. Currently it's only used to say that
+ * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
+ * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
+ * the predigested form of the 5th section so that an alias lookup can be fast.
+ *
+ * 5) This section contains a 2D array with indexes to the 6th section. This
+ * section is the full form of all alias mappings. The column index is the
+ * index into the converter list (column header). The row index is the index
+ * to tag list (row header). This 2D array is the top part a 3D array. The
+ * third dimension is in the 6th section.
+ *
+ * 6) This is blob of variable length arrays. Each array starts with a size,
+ * and is followed by indexes to alias names in the string table. This is
+ * the third dimension to the section 5. No other section should be referencing
+ * this section.
+ *
+ * 7) Reserved at this time (There is no information). This _usually_ has a
+ * size of 0. Future versions may add more information here.
+ *
+ * 8) This is the string table. All strings are indexed on an even address.
+ * There are two reasons for this. First many chip architectures locate strings
+ * faster on even address boundaries. Second, since all indexes are 16-bit
+ * numbers, this string table can be 128KB in size instead of 64KB when we
+ * only have strings starting on an even address.
+ *
+ *
+ * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
+ * has a unique alias among all converters. That same alias can
+ * be mentioned in other standards on different converters,
+ * but only one alias per tag can be unique.
+ *
+ *
+ *              Converter Names (Usually in TR22 form)
+ *           -------------------------------------------.
+ *     T    /                                          /|
+ *     a   /                                          / |
+ *     g  /                                          /  |
+ *     s /                                          /   |
+ *      /                                          /    |
+ *      ------------------------------------------/     |
+ *    A |                                         |     |
+ *    l |                                         |     |
+ *    i |                                         |    /
+ *    a |                                         |   /
+ *    s |                                         |  /
+ *    e |                                         | /
+ *    s |                                         |/
+ *      -------------------------------------------
+ *
+ *
+ *
+ * Here is what it really looks like. It's like swiss cheese.
+ * There are holes. Some converters aren't recognized by
+ * a standard, or they are really old converters that the
+ * standard doesn't recognize anymore.
+ *
+ *              Converter Names (Usually in TR22 form)
+ *           -------------------------------------------.
+ *     T    /##########################################/|
+ *     a   /     #            #                       /#
+ *     g  /  #      ##     ##     ### # ### ### ### #/
+ *     s / #             #####  ####        ##  ## #/#
+ *      / ### # # ##  #  #   #          ### # #   #/##
+ *      ------------------------------------------/# #
+ *    A |### # # ##  #  #   #          ### # #   #|# #
+ *    l |# # #    #     #               ## #     #|# #
+ *    i |# # #    #     #                #       #|#
+ *    a |#                                       #|#
+ *    s |                                        #|#
+ *    e
+ *    s
+ *
+ */
+
+final class UConverterAliasDataReader implements ICUBinary.Authenticate {
+    private final static boolean debug = ICUDebug.enabled("UConverterAliasDataReader");
+    
+   /**
+    * <p>Protected constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @exception IOException throw if data file fails authentication 
+    * @draft 2.1
+    */
+    protected UConverterAliasDataReader(InputStream inputStream) 
+                                        throws IOException{
+        if(debug) System.out.println("Bytes in inputStream " + inputStream.available());
+        
+        unicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this);
+        
+        if(debug) System.out.println("Bytes left in inputStream " +inputStream.available());
+        
+        dataInputStream = new DataInputStream(inputStream);
+        
+        if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available());
+    }
+    
+    // protected methods -------------------------------------------------
+    
+		protected long[] readToc(int n)throws IOException
+		{
+			long[] toc = new long[n];
+			//Read the toc
+			for (int i = 0; i < n ; ++i) {
+				toc[i] = dataInputStream.readInt() & UNSIGNED_INT_MASK;
+			}
+			return toc;
+		} 
+        
+    protected void read(int[] convList, int[] tagList, int[] aliasList, int[]untaggedConvArray, int[] taggedAliasArray, int[] taggedAliasLists, byte[] reservedBytes, byte[] stringTable) throws IOException{
+			int i;
+			//int listnum = 1;
+			//long listsize;
+
+			for(i = 0; i < convList.length; ++i)
+				convList[i] = dataInputStream.readUnsignedShort();
+			
+			for(i = 0; i < tagList.length; ++i)
+				tagList[i] = dataInputStream.readUnsignedShort();
+			
+			for(i = 0; i < aliasList.length; ++i)
+				aliasList[i] = dataInputStream.readUnsignedShort();
+			
+			for(i = 0; i < untaggedConvArray.length; ++i)
+				untaggedConvArray[i] = dataInputStream.readUnsignedShort();
+			
+			for(i = 0; i < taggedAliasArray.length; ++i)
+				taggedAliasArray[i] = dataInputStream.readUnsignedShort();
+			
+			for(i = 0; i < taggedAliasLists.length; ++i)
+				taggedAliasLists[i] = dataInputStream.readUnsignedShort();
+
+			dataInputStream.read(reservedBytes);
+			dataInputStream.read(stringTable);
+}
+    
+    public byte[] getDataFormatVersion(){
+        return DATA_FORMAT_VERSION;
+    }
+    
+    public boolean isDataVersionAcceptable(byte version[])
+    {
+        return version[0] == DATA_FORMAT_VERSION[0];
+    }
+    
+    public byte[] getUnicodeVersion(){
+        return unicodeVersion;    
+    }
+    // private data members -------------------------------------------------
+      
+
+    /**
+    * ICU data file input stream
+    */
+    private DataInputStream dataInputStream;
+    
+    private byte[] unicodeVersion;
+                                       
+    /**
+    * File format version that this class understands.
+    * No guarantees are made if a older version is used
+    * see store.c of gennorm for more information and values
+    */
+		// DATA_FORMAT_ID_ values taken from icu4c isAcceptable (ucnv_io.c)
+    private static final byte DATA_FORMAT_ID[] = {(byte)0x43, (byte)0x76, (byte)0x41, (byte)0x6c}; // dataFormat="CvAl"
+    private static final byte DATA_FORMAT_VERSION[] = {(byte)0x3};
+
+    //private static final int UNSIGNED_SHORT_MASK = 0xffff;
+    private static final long UNSIGNED_INT_MASK = 0xffffffffL;
+    
+}
--- a/icu4j/src/com/ibm/icu/impl/UConverterAliasesEnumeration.java
+++ b/icu4j/src/com/ibm/icu/impl/UConverterAliasesEnumeration.java
@ -0,0 +1,83 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+import java.util.Enumeration;
+
+
+/**
+ * Enumeration for Converter Aliases
+ */
+
+public class UConverterAliasesEnumeration implements Enumeration {
+
+	private UAliasContext context;
+	
+	/* Set alias context
+	 */	
+	public void setContext(UAliasContext context){
+		this.context = context;		
+	}
+	
+    public int count() {
+		int value = 0;
+	    
+	    if (context.listOffset!=0) {
+	        value = UConverterAlias.gTaggedAliasListsArray[(int)context.listOffset];
+	    }
+	    return value;
+	}
+
+	public Object nextElement() {
+	
+		if (context.listOffset!=0) {
+	        long listCount = UConverterAlias.gTaggedAliasListsArray[(int)context.listOffset];
+	        int[] currListArray = UConverterAlias.gTaggedAliasListsArray;
+	        long currListArrayIndex = context.getListOffset() + 1; 
+
+	        if (context.getListIdx() < listCount) {
+	            String str = UConverterAlias.GET_STRING(currListArray[(int)(context.listIdx+currListArrayIndex)]);
+	            context.listIdx++;
+	            return str;
+	        }
+	    }
+	    /* Either we accessed a zero length list, or we enumerated too far. */
+	    throw new IndexOutOfBoundsException();
+	}
+
+    public void reset() {
+		context.listIdx = 0;
+	}
+
+	/**
+	 * Class to store context for alias
+	 */
+	public static class UAliasContext{
+		private long listOffset;
+		private long listIdx;
+		
+		public UAliasContext(long listOffset, long listIdx){
+			this.listOffset = listOffset;
+			this.listIdx = listIdx;
+		}
+		
+		public long getListOffset(){
+			return listOffset;
+		}
+		
+		public long getListIdx(){
+			return listIdx;
+		}
+	}
+
+    public boolean hasMoreElements() {
+        long listCount = UConverterAlias.gTaggedAliasListsArray[(int)context.listOffset];
+        return (context.getListIdx() < listCount);
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/UConverterConstants.java
+++ b/icu4j/src/com/ibm/icu/impl/UConverterConstants.java
@ -0,0 +1,156 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+public interface UConverterConstants {
+
+    public static final short UNSIGNED_BYTE_MASK = 0xff;
+    public static final int UNSIGNED_SHORT_MASK = 0xffff;
+    public static final long UNSIGNED_INT_MASK = 0xffffffffL;
+    
+    public static final int U_IS_BIG_ENDIAN = 0;
+	
+	/**
+	 * Useful constant for the maximum size of the whole locale ID
+	 * (including the terminating NULL).
+	 * @draft ICU 3.6
+	 */
+	public static final int ULOC_FULLNAME_CAPACITY = 56;
+	
+	/**
+	 * This value is intended for sentinel values for APIs that
+	 * (take or) return single code points (UChar32).
+	 * It is outside of the Unicode code point range 0..0x10ffff.
+	 * 
+	 * For example, a "done" or "error" value in a new API
+	 * could be indicated with U_SENTINEL.
+	 *
+	 * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+	 * values, mostly 0xffff.
+	 * Those may need to be distinguished from
+	 * actual U+ffff text contents by calling functions like
+	 * CharacterIterator::hasNext() or UnicodeString::length().
+	 * @draft ICU 2.4
+	 */
+	public static final int U_SENTINEL = -1;
+	
+	//end utf.h
+	
+	//begin ucnv.h	
+	/**
+	 * Character that separates converter names from options and options from each other.
+	 * @see open
+	 * @draft ICU 3.6
+	 */
+	static final byte OPTION_SEP_CHAR  = ',';
+	
+	/** Maximum length of a converter name including the terminating NULL @draft ICU 3.6 */
+	public static final int MAX_CONVERTER_NAME_LENGTH  = 60;
+	/** Maximum length of a converter name including path and terminating NULL @draft ICU 3.6 */
+	public static final int MAX_FULL_FILE_NAME_LENGTH = (600+MAX_CONVERTER_NAME_LENGTH);
+	
+	/** Shift in for EBDCDIC_STATEFUL and iso2022 states @draft ICU 3.6 */
+	public static final int SI = 0x0F;
+	/** Shift out for EBDCDIC_STATEFUL and iso2022 states @draft ICU 3.6 */
+	public static final int  SO = 0x0E;
+	
+	//end ucnv.h
+	
+	// begin bld.h
+	/* size of the overflow buffers in UConverter, enough for escaping callbacks */
+	//#define ERROR_BUFFER_LENGTH 32
+	public static final int ERROR_BUFFER_LENGTH = 32;
+	
+	/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
+	public static final int MAX_SUBCHAR_LEN = 4;
+	
+	/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
+	public static final int MAX_CHAR_LEN = 8;
+	
+	/* converter options bits */
+	public static final int OPTION_VERSION     = 0xf;
+	public static final int OPTION_SWAP_LFNL   = 0x10;
+	public static final int OPTION_MAC   = 0x20; //agljport:comment added for Mac ISCII encodings
+	
+	/** values for the unicodeMask */
+	public static final int HAS_SUPPLEMENTARY = 1;
+	public static final int HAS_SURROGATES =   2;
+	// end bld.h
+	
+	// begin cnv.h
+	/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
+	public static final int missingCharMarker = 0xFFFF;
+	
+	public final class UConverterResetChoice {
+	    public static final int RESET_BOTH = 0;
+	    public static final int RESET_TO_UNICODE = RESET_BOTH + 1;
+	    public static final int RESET_FROM_UNICODE = RESET_TO_UNICODE + 1;
+	}
+	
+	// begin utf16.h
+	/**
+	 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+	 * @return 2
+	 * @draft ICU 2.4
+	 */
+	public static final int U16_MAX_LENGTH = 2;
+	// end utf16.h	
+	
+	// begin err.h	
+	/**
+	 * FROM_U, TO_U context options for sub callback
+	 * @draft ICU 3.6
+	 */
+	public static byte[] SUB_STOP_ON_ILLEGAL = {'i'};
+	
+	/**
+	 * FROM_U, TO_U context options for skip callback
+	 * @draft ICU 3.6
+	 */
+	public static byte[] SKIP_STOP_ON_ILLEGAL = {'i'};	
+	
+	/** 
+	 * The process condition code to be used with the callbacks.  
+	 * Codes which are greater than IRREGULAR should be 
+	 * passed on to any chained callbacks.
+	 * @draft ICU 3.6
+	 */
+	public static final class UConverterCallbackReason {
+		public static final int UNASSIGNED = 0;  /**< The code point is unassigned.
+	                             The error code U_INVALID_CHAR_FOUND will be set. */
+		public static final int ILLEGAL = 1;     /**< The code point is illegal. For example, 
+	                             \\x81\\x2E is illegal in SJIS because \\x2E
+	                             is not a valid trail byte for the \\x81 
+	                             lead byte.
+	                             Also, starting with Unicode 3.0.1, non-shortest byte sequences
+	                             in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+	                             are also illegal, not just irregular.
+	                             The error code U_ILLEGAL_CHAR_FOUND will be set. */
+		public static final int IRREGULAR = 2;   /**< The codepoint is not a regular sequence in 
+	                             the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+	                             are irregular UTF-8 byte sequences for single surrogate
+	                             code points.
+	                             The error code U_INVALID_CHAR_FOUND will be set. */
+		public static final int RESET = 3;       /**< The callback is called with this reason when a
+	                             'reset' has occured. Callback should reset all
+	                             state. */
+		public static final int CLOSE = 4;        /**< Called when the converter is closed. The
+	                             callback should release any allocated memory.*/
+		public static final int CLONE = 5;         /**< Called when safeClone() is called on the
+	                              converter. the pointer available as the
+	                              'context' is an alias to the original converters'
+	                              context pointer. If the context must be owned
+	                              by the new converter, the callback must clone 
+	                              the data and call setFromUCallback 
+	                              (or setToUCallback) with the correct pointer.
+	                              @draft ICU 2.2
+	                           */
+	}
+	//end err.h
+}
--- a/icu4j/src/com/ibm/icu/impl/UConverterDataReader.java
+++ b/icu4j/src/com/ibm/icu/impl/UConverterDataReader.java
@ -0,0 +1,552 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+ 
+package com.ibm.icu.impl;
+
+import com.ibm.icu.impl.ICUDebug;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * ucnvmbcs.h
+ *
+ * ICU conversion (.cnv) data file structure, following the usual UDataInfo
+ * header.
+ *
+ * Format version: 6.2
+ *
+ * struct UConverterStaticData -- struct containing the converter name, IBM CCSID,
+ *                                min/max bytes per character, etc.
+ *                                see ucnv_bld.h
+ *
+ * --------------------
+ *
+ * The static data is followed by conversionType-specific data structures.
+ * At the moment, there are only variations of MBCS converters. They all have
+ * the same toUnicode structures, while the fromUnicode structures for SBCS
+ * differ from those for other MBCS-style converters.
+ *
+ * _MBCSHeader.version 4.2 adds an optional conversion extension data structure.
+ * If it is present, then an ICU version reading header versions 4.0 or 4.1
+ * will be able to use the base table and ignore the extension.
+ *
+ * The unicodeMask in the static data is part of the base table data structure.
+ * Especially, the UCNV_HAS_SUPPLEMENTARY flag determines the length of the
+ * fromUnicode stage 1 array.
+ * The static data unicodeMask refers only to the base table's properties if
+ * a base table is included.
+ * In an extension-only file, the static data unicodeMask is 0.
+ * The extension data indexes have a separate field with the unicodeMask flags.
+ *
+ * MBCS-style data structure following the static data.
+ * Offsets are counted in bytes from the beginning of the MBCS header structure.
+ * Details about usage in comments in ucnvmbcs.c.
+ *
+ * struct _MBCSHeader (see the definition in this header file below)
+ * contains 32-bit fields as follows:
+ * 8 values:
+ *  0   uint8_t[4]  MBCS version in UVersionInfo format (currently 4.2.0.0)
+ *  1   uint32_t    countStates
+ *  2   uint32_t    countToUFallbacks
+ *  3   uint32_t    offsetToUCodeUnits
+ *  4   uint32_t    offsetFromUTable
+ *  5   uint32_t    offsetFromUBytes
+ *  6   uint32_t    flags, bits:
+ *                      31.. 8 offsetExtension -- _MBCSHeader.version 4.2 (ICU 2.8) and higher
+ *                                                0 for older versions and if
+ *                                                there is not extension structure
+ *                       7.. 0 outputType
+ *  7   uint32_t    fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher
+ *                  counts bytes in fromUBytes[]
+ *
+ * if(outputType==MBCS_OUTPUT_EXT_ONLY) {
+ *     -- base table name for extension-only table
+ *     char baseTableName[variable]; -- with NUL plus padding for 4-alignment
+ *
+ *     -- all _MBCSHeader fields except for version and flags are 0
+ * } else {
+ *     -- normal base table with optional extension
+ *
+ *     int32_t stateTable[countStates][256];
+ *    
+ *     struct _MBCSToUFallback { (fallbacks are sorted by offset)
+ *         uint32_t offset;
+ *         UChar32 codePoint;
+ *     } toUFallbacks[countToUFallbacks];
+ *    
+ *     uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2];
+ *                  (padded to an even number of units)
+ *    
+ *     -- stage 1 tables
+ *     if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
+ *         -- stage 1 table for all of Unicode
+ *         uint16_t fromUTable[0x440]; (32-bit-aligned)
+ *     } else {
+ *         -- BMP-only tables have a smaller stage 1 table
+ *         uint16_t fromUTable[0x40]; (32-bit-aligned)
+ *     }
+ *    
+ *     -- stage 2 tables
+ *        length determined by top of stage 1 and bottom of stage 3 tables
+ *     if(outputType==MBCS_OUTPUT_1) {
+ *         -- SBCS: pure indexes
+ *         uint16_t stage 2 indexes[?];
+ *     } else {
+ *         -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes
+ *         uint32_t stage 2 flags and indexes[?];
+ *     }
+ *    
+ *     -- stage 3 tables with byte results
+ *     if(outputType==MBCS_OUTPUT_1) {
+ *         -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c
+ *         uint16_t fromUBytes[fromUBytesLength/2];
+ *     } else {
+ *         -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c
+ *         uint8_t fromUBytes[fromUBytesLength]; or
+ *         uint16_t fromUBytes[fromUBytesLength/2]; or
+ *         uint32_t fromUBytes[fromUBytesLength/4];
+ *     }
+ * }
+ *
+ * -- extension table, details see ucnv_ext.h
+ * int32_t indexes[>=32]; ...
+ */
+/*
+ * ucnv_ext.h
+ *
+ * See icuhtml/design/conversion/conversion_extensions.html
+ *
+ * Conversion extensions serve two purposes:
+ * 1. They support m:n mappings.
+ * 2. They support extension-only conversion files that are used together
+ *    with the regular conversion data in base files.
+ *
+ * A base file may contain an extension table (explicitly requested or
+ * implicitly generated for m:n mappings), but its extension table is not
+ * used when an extension-only file is used.
+ *
+ * It is an error if a base file contains any regular (not extension) mapping
+ * from the same sequence as a mapping in the extension file
+ * because the base mapping would hide the extension mapping.
+ *
+ *
+ * Data for conversion extensions:
+ *
+ * One set of data structures per conversion direction (to/from Unicode).
+ * The data structures are sorted by input units to allow for binary search.
+ * Input sequences of more than one unit are handled like contraction tables
+ * in collation:
+ * The lookup value of a unit points to another table that is to be searched
+ * for the next unit, recursively.
+ *
+ * For conversion from Unicode, the initial code point is looked up in
+ * a 3-stage trie for speed,
+ * with an additional table of unique results to save space.
+ *
+ * Long output strings are stored in separate arrays, with length and index
+ * in the lookup tables.
+ * Output results also include a flag distinguishing roundtrip from
+ * (reverse) fallback mappings.
+ *
+ * Input Unicode strings must not begin or end with unpaired surrogates
+ * to avoid problems with matches on parts of surrogate pairs.
+ *
+ * Mappings from multiple characters (code points or codepage state
+ * table sequences) must be searched preferring the longest match.
+ * For this to work and be efficient, the variable-width table must contain
+ * all mappings that contain prefixes of the multiple characters.
+ * If an extension table is built on top of a base table in another file
+ * and a base table entry is a prefix of a multi-character mapping, then
+ * this is an error.
+ *
+ *
+ * Implementation note:
+ *
+ * Currently, the parser and several checks in the code limit the number
+ * of UChars or bytes in a mapping to
+ * UCNV_EXT_MAX_UCHARS and UCNV_EXT_MAX_BYTES, respectively,
+ * which are output value limits in the data structure.
+ *
+ * For input, this is not strictly necessary - it is a hard limit only for the
+ * buffers in UConverter that are used to store partial matches.
+ *
+ * Input sequences could otherwise be arbitrarily long if partial matches
+ * need not be stored (i.e., if a sequence does not span several buffers with too
+ * many units before the last buffer), although then results would differ
+ * depending on whether partial matches exceed the limits or not,
+ * which depends on the pattern of buffer sizes.
+ *
+ *
+ * Data structure:
+ *
+ * int32_t indexes[>=32];
+ *
+ *   Array of indexes and lengths etc. The length of the array is at least 32.
+ *   The actual length is stored in indexes[0] to be forward compatible.
+ *
+ *   Each index to another array is the number of bytes from indexes[].
+ *   Each length of an array is the number of array base units in that array.
+ *
+ *   Some of the structures may not be present, in which case their indexes
+ *   and lengths are 0.
+ *
+ *   Usage of indexes[i]:
+ *   [0]  length of indexes[]
+ *
+ *   // to Unicode table
+ *   [1]  index of toUTable[] (array of uint32_t)
+ *   [2]  length of toUTable[]
+ *   [3]  index of toUUChars[] (array of UChar)
+ *   [4]  length of toUUChars[]
+ *
+ *   // from Unicode table, not for the initial code point
+ *   [5]  index of fromUTableUChars[] (array of UChar)
+ *   [6]  index of fromUTableValues[] (array of uint32_t)
+ *   [7]  length of fromUTableUChars[] and fromUTableValues[]
+ *   [8]  index of fromUBytes[] (array of char)
+ *   [9]  length of fromUBytes[]
+ *
+ *   // from Unicode trie for initial-code point lookup
+ *   [10] index of fromUStage12[] (combined array of uint16_t for stages 1 & 2)
+ *   [11] length of stage 1 portion of fromUStage12[]
+ *   [12] length of fromUStage12[]
+ *   [13] index of fromUStage3[] (array of uint16_t indexes into fromUStage3b[])
+ *   [14] length of fromUStage3[]
+ *   [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[])
+ *   [16] length of fromUStage3b[]
+ *
+ *   [17] Bit field containing numbers of bytes:
+ *        31..24 reserved, 0
+ *        23..16 maximum input bytes
+ *        15.. 8 maximum output bytes
+ *         7.. 0 maximum bytes per UChar
+ *
+ *   [18] Bit field containing numbers of UChars:
+ *        31..24 reserved, 0
+ *        23..16 maximum input UChars
+ *        15.. 8 maximum output UChars
+ *         7.. 0 maximum UChars per byte
+ *
+ *   [19] Bit field containing flags:
+ *               (extension table unicodeMask)
+ *         1     UCNV_HAS_SURROGATES flag for the extension table
+ *         0     UCNV_HAS_SUPPLEMENTARY flag for the extension table
+ *
+ *   [20]..[30] reserved, 0
+ *   [31] number of bytes for the entire extension structure
+ *   [>31] reserved; there are indexes[0] indexes
+ *
+ *
+ * uint32_t toUTable[];
+ *
+ *   Array of byte/value pairs for lookups for toUnicode conversion.
+ *   The array is partitioned into sections like collation contraction tables.
+ *   Each section contains one word with the number of following words and
+ *   a default value for when the lookup in this section yields no match.
+ *
+ *   A section is sorted in ascending order of input bytes,
+ *   allowing for fast linear or binary searches.
+ *   The builder may store entries for a contiguous range of byte values
+ *   (compare difference between the first and last one with count),
+ *   which then allows for direct array access.
+ *   The builder should always do this for the initial table section.
+ *
+ *   Entries may have 0 values, see below.
+ *   No two entries in a section have the same byte values.
+ *
+ *   Each uint32_t contains an input byte value in bits 31..24 and the
+ *   corresponding lookup value in bits 23..0.
+ *   Interpret the value as follows:
+ *     if(value==0) {
+ *       no match, see below
+ *     } else if(value<0x1f0000) {
+ *       partial match - use value as index to the next toUTable section
+ *       and match the next unit; (value indexes toUTable[value])
+ *     } else {
+ *       if(bit 23 set) {
+ *         roundtrip;
+ *       } else {
+ *         fallback;
+ *       }
+ *       unset value bit 23;
+ *       if(value<=0x2fffff) {
+ *         (value-0x1f0000) is a code point; (BMP: value<=0x1fffff)
+ *       } else {
+ *         bits 17..0 (value&0x3ffff) is an index to
+ *           the result UChars in toUUChars[]; (0 indexes toUUChars[0])
+ *         length of the result=((value>>18)-12); (length=0..19)
+ *       }
+ *     }
+ *
+ *   The first word in a section contains the number of following words in the
+ *   input byte position (bits 31..24, number=1..0xff).
+ *   The value of the initial word is used when the current byte is not found
+ *   in this section.
+ *   If the value is not 0, then it represents a result as above.
+ *   If the value is 0, then the search has to return a shorter match with an
+ *   earlier default value as the result, or result in "unmappable" even for the
+ *   initial bytes.
+ *   If the value is 0 for the initial toUTable entry, then the initial byte
+ *   does not start any mapping input.
+ *
+ *
+ * UChar toUUChars[];
+ *
+ *   Contains toUnicode mapping results, stored as sequences of UChars.
+ *   Indexes and lengths stored in the toUTable[].
+ *
+ *
+ * UChar fromUTableUChars[];
+ * uint32_t fromUTableValues[];
+ *
+ *   The fromUTable is split into two arrays, but works otherwise much like
+ *   the toUTable. The array is partitioned into sections like collation
+ *   contraction tables and toUTable.
+ *   A row in the table consists of same-index entries in fromUTableUChars[]
+ *   and fromUTableValues[].
+ *
+ *   Interpret a value as follows:
+ *     if(value==0) {
+ *       no match, see below
+ *     } else if(value<=0xffffff) { (bits 31..24 are 0)
+ *       partial match - use value as index to the next fromUTable section
+ *       and match the next unit; (value indexes fromUTable[value])
+ *     } else {
+ *       if(value==0x80000001) {
+ *         return no mapping, but request for <subchar1>;
+ *       }
+ *       if(bit 31 set) {
+ *         roundtrip;
+ *       } else {
+ *         fallback;
+ *       }
+ *       // bits 30..29 reserved, 0
+ *       length=(value>>24)&0x1f; (bits 28..24)
+ *       if(length==1..3) {
+ *         bits 23..0 contain 1..3 bytes, padded with 00s on the left;
+ *       } else {
+ *         bits 23..0 (value&0xffffff) is an index to
+ *           the result bytes in fromUBytes[]; (0 indexes fromUBytes[0])
+ *       }
+ *     }
+ *       
+ *   The first pair in a section contains the number of following pairs in the
+ *   UChar position (16 bits, number=1..0xffff).
+ *   The value of the initial pair is used when the current UChar is not found
+ *   in this section.
+ *   If the value is not 0, then it represents a result as above.
+ *   If the value is 0, then the search has to return a shorter match with an
+ *   earlier default value as the result, or result in "unmappable" even for the
+ *   initial UChars.
+ *
+ *   If the from Unicode trie is present, then the from Unicode search tables
+ *   are not used for initial code points.
+ *   In this case, the first entries (index 0) in the tables are not used
+ *   (reserved, set to 0) because a value of 0 is used in trie results
+ *   to indicate no mapping.
+ *
+ *
+ * uint16_t fromUStage12[];
+ *
+ *   Stages 1 & 2 of a trie that maps an initial code point.
+ *   Indexes in stage 1 are all offset by the length of stage 1 so that the
+ *   same array pointer can be used for both stages.
+ *   If (c>>10)>=(length of stage 1) then c does not start any mapping.
+ *   Same bit distribution as for regular conversion tries.
+ *
+ *
+ * uint16_t fromUStage3[];
+ * uint32_t fromUStage3b[];
+ *
+ *   Stage 3 of the trie. The first array simply contains indexes to the second,
+ *   which contains words in the same format as fromUTableValues[].
+ *   Use a stage 3 granularity of 4, which allows for 256k stage 3 entries,
+ *   and 16-bit entries in stage 3 allow for 64k stage 3b entries.
+ *   The stage 3 granularity means that the stage 2 entry needs to be left-shifted.
+ *
+ *   Two arrays are used because it is expected that more than half of the stage 3
+ *   entries will be zero. The 16-bit index stage 3 array saves space even
+ *   considering storing a total of 6 bytes per non-zero entry in both arrays
+ *   together.
+ *   Using a stage 3 granularity of >1 diminishes the compactability in that stage
+ *   but provides a larger effective addressing space in stage 2.
+ *   All but the final result stage use 16-bit entries to save space.
+ *
+ *   fromUStage3b[] contains a zero for "no mapping" at its index 0,
+ *   and may contain UCNV_EXT_FROM_U_SUBCHAR1 at index 1 for "<subchar1> SUB mapping"
+ *   (i.e., "no mapping" with preference for <subchar1> rather than <subchar>),
+ *   and all other items are unique non-zero results.
+ *
+ *   The default value of a fromUTableValues[] section that is referenced
+ *   _directly_ from a fromUStage3b[] item may also be UCNV_EXT_FROM_U_SUBCHAR1,
+ *   but this value must not occur anywhere else in fromUTableValues[]
+ *   because "no mapping" is always a property of a single code point,
+ *   never of multiple.
+ *
+ *
+ * char fromUBytes[];
+ *
+ *   Contains fromUnicode mapping results, stored as sequences of chars.
+ *   Indexes and lengths stored in the fromUTableValues[].
+ */
+
+public final class UConverterDataReader implements ICUBinary.Authenticate {
+    private final static boolean debug = ICUDebug.enabled("UConverterDataReader");
+
+    /*
+     * 	public UConverterDataReader(UConverterDataReader r)
+        {
+            dataInputStream = new DataInputStream(r.dataInputStream);
+            unicodeVersion = r.unicodeVersion;
+        }
+        */
+    
+   /**
+    * <p>Protected constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @exception IOException throw if data file fails authentication 
+    * @draft 2.1
+    */
+    protected UConverterDataReader(InputStream inputStream) 
+                                        throws IOException{
+        if(debug) System.out.println("Bytes in inputStream " + inputStream.available());
+        
+        unicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this);
+        
+        if(debug) System.out.println("Bytes left in inputStream " +inputStream.available());
+        
+        dataInputStream = new DataInputStream(inputStream);
+        
+        if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available());
+    }
+    
+    // protected methods -------------------------------------------------
+    
+    protected void readStaticData(UConverterStaticData sd) throws IOException
+    {
+        sd.structSize = dataInputStream.readInt();
+        byte[] name = new byte[UConverterConstants.MAX_CONVERTER_NAME_LENGTH];
+        int length = dataInputStream.read(name);
+        sd.name = new String(name, 0, length);
+        sd.codepage = dataInputStream.readInt();
+        sd.platform = dataInputStream.readByte();
+        sd.conversionType = dataInputStream.readByte();
+        sd.minBytesPerChar = dataInputStream.readByte();
+        sd.maxBytesPerChar = dataInputStream.readByte();
+        dataInputStream.read(sd.subChar);
+        sd.subCharLen = dataInputStream.readByte();
+        sd.hasToUnicodeFallback = dataInputStream.readByte();
+        sd.hasFromUnicodeFallback = dataInputStream.readByte();
+        sd.unicodeMask = (short)dataInputStream.readUnsignedByte();
+        sd.subChar1 = dataInputStream.readByte();
+        dataInputStream.read(sd.reserved);
+    }
+
+    protected void readMBCSHeader(UConverterSharedData.MBCSHeader h) throws IOException
+    {
+        dataInputStream.read(h.version);
+        h.countStates = dataInputStream.readInt();
+        h.countToUFallbacks = dataInputStream.readInt();
+        h.offsetToUCodeUnits = dataInputStream.readInt();
+        h.offsetFromUTable = dataInputStream.readInt();
+        h.offsetFromUBytes = dataInputStream.readInt();
+        h.flags = dataInputStream.readInt();
+        h.fromUBytesLength = dataInputStream.readInt();
+    }
+    
+    protected void readMBCSTable(int[][] stateTableArray, UConverterSharedData.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException
+    {
+        int i, j;
+        for(i = 0; i < stateTableArray.length; ++i)
+            for(j = 0; j < stateTableArray[i].length; ++j)
+                stateTableArray[i][j] = dataInputStream.readInt();
+        for(i = 0; i < toUFallbacksArray.length; ++i) {
+            toUFallbacksArray[i].offset = dataInputStream.readInt();
+            toUFallbacksArray[i].codePoint = dataInputStream.readInt();
+        }
+        for(i = 0; i < unicodeCodeUnitsArray.length; ++i)
+            unicodeCodeUnitsArray[i] = dataInputStream.readChar();
+        for(i = 0; i < fromUnicodeTableArray.length; ++i)
+            fromUnicodeTableArray[i] = dataInputStream.readChar();
+        for(i = 0; i < fromUnicodeBytesArray.length; ++i)
+            fromUnicodeBytesArray[i] = dataInputStream.readByte();
+    }
+
+    protected String readBaseTableName() throws IOException
+    {
+        char c;
+        StringBuffer name = new StringBuffer();
+        while((c = (char)dataInputStream.readByte()) !=  0)
+            name.append(c);
+        return name.toString();
+    }
+
+    //protected int[] readExtIndexes(int skip) throws IOException
+    protected ByteBuffer readExtIndexes(int skip) throws IOException
+    {
+        dataInputStream.skipBytes(skip);
+
+        int n = dataInputStream.readInt();
+        int[] indexes = new int[n];
+        indexes[0] = n;
+        for(int i = 1; i < n; ++i) {
+            indexes[i] = dataInputStream.readInt();
+        }
+        //return indexes;
+
+        ByteBuffer b = ByteBuffer.allocate(indexes[31]);
+        for(int i = 0; i < n; ++i) {
+            b.putInt(indexes[i]);
+        }
+        dataInputStream.read(b.array(), b.position(), b.remaining());
+        return b;
+    }
+
+    protected byte[] readExtTables(int n) throws IOException
+    {
+        byte[] tables = new byte[n];
+        dataInputStream.read(tables);
+        return tables;
+    }
+
+    public byte[] getDataFormatVersion(){
+        return DATA_FORMAT_VERSION;
+    }
+    
+    public boolean isDataVersionAcceptable(byte version[])
+    {
+        return version[0] == DATA_FORMAT_VERSION[0];
+    }
+    
+    public byte[] getUnicodeVersion(){
+        return unicodeVersion;    
+    }
+    // private data members -------------------------------------------------
+      
+    /**
+    * ICU data file input stream
+    */
+    private DataInputStream dataInputStream;
+    
+    private byte[] unicodeVersion;
+                                       
+    /**
+    * File format version that this class understands.
+    * No guarantees are made if a older version is used
+    * see store.c of gennorm for more information and values
+    */
+    // DATA_FORMAT_ID_ values taken from icu4c isCnvAcceptable (ucnv_bld.c)
+    private static final byte DATA_FORMAT_ID[] = {(byte)0x63, (byte)0x6e, (byte)0x76, (byte)0x74}; // dataFormat="cnvt"
+    private static final byte DATA_FORMAT_VERSION[] = {(byte)0x6};
+
+}
+
--- a/icu4j/src/com/ibm/icu/impl/UConverterSharedData.java
+++ b/icu4j/src/com/ibm/icu/impl/UConverterSharedData.java
@ -0,0 +1,545 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+import java.nio.ByteBuffer;
+
+
+/*
+ * Defines the UConverterSharedData struct,
+ * the immutable, shared part of UConverter.
+ */
+public class UConverterSharedData {
+    //uint32_t structSize;            /* Size of this structure */
+	public int structSize;            /* Size of this structure */
+    //uint32_t referenceCounter;      /* used to count number of clients, 0xffffffff for static SharedData */
+	public int referenceCounter;      /* used to count number of clients, 0xffffffff for static SharedData */
+	public static final int MAX_VERSION_LENGTH=4;
+	//agljport:todo const void *dataMemory;         /* from udata_openChoice() - for cleanup */
+	//agljport:todo void *table;                    /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */
+
+    //const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
+	public UConverterStaticData staticData; /* pointer to the static (non changing) data. */
+
+    //UBool                sharedDataCached;   /* TRUE:  shared data is in cache, don't destroy on close() if 0 ref.  FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
+	public boolean                sharedDataCached;   /* TRUE:  shared data is in cache, don't destroy on close() if 0 ref.  FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
+	/*UBool               staticDataOwned;   TRUE if static data owned by shared data & should be freed with it, NEVER true for udata() loaded statics. This ignored variable was removed to make space for sharedDataCached.   */
+
+    //const UConverterImpl *impl;     /* vtable-style struct of mostly function pointers */
+	//public UConverterImpl impl;     /* vtable-style struct of mostly function pointers */
+
+	/*initial values of some members of the mutable part of object */
+    //uint32_t toUnicodeStatus;
+	public long toUnicodeStatus;
+
+	/*
+	 * Shared data structures currently come in two flavors:
+	 * - readonly for built-in algorithmic converters
+	 * - allocated for MBCS, with a pointer to an allocated UConverterTable
+	 *   which always has a UConverterMBCSTable
+	 *
+	 * To eliminate one allocation, I am making the UConverterMBCSTable
+	 * a member of the shared data. It is the last member so that static
+	 * definitions of UConverterSharedData work as before.
+	 * The table field above also remains to avoid updating all static
+	 * definitions, but is now unused.
+	 *
+	 * markus 2003-nov-07
+	 */
+	public UConverterMBCSTable mbcs;
+
+	public UConverterSharedData()
+	{
+		mbcs = new UConverterMBCSTable();
+	}
+	
+	public UConverterSharedData(int structSize_, int referenceCounter_, UConverterStaticData staticData_, boolean sharedDataCached_,/* UConverterImpl impl_,*/ long toUnicodeStatus_)
+	{
+		this();
+		structSize = structSize_;
+		referenceCounter = referenceCounter_;
+		staticData = staticData_;
+		sharedDataCached = sharedDataCached_;
+		//impl = impl_;
+		toUnicodeStatus = toUnicodeStatus_;
+	}
+
+	/**
+	 * UConverterImpl contains all the data and functions for a converter type.
+	 * Its function pointers work much like a C++ vtable.
+	 * Many converter types need to define only a subset of the functions;
+	 * when a function pointer is NULL, then a default action will be performed.
+	 *
+	 * Every converter type must implement toUnicode, fromUnicode, and getNextUChar,
+	 * otherwise the converter may crash.
+	 * Every converter type that has variable-length codepage sequences should
+	 * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for
+	 * correct offset handling.
+	 * All other functions may or may not be implemented - it depends only on
+	 * whether the converter type needs them.
+	 *
+	 * When open() fails, then close() will be called, if present.
+	 */
+	//public class UConverterImpl {
+	    //UConverterType type;
+	    //UConverterToUnicode toUnicode;
+/*	protected void doToUnicode(UConverterToUnicodeArgs args, int[] pErrorCode)
+	{
+	}
+	
+	public final void toUnicode(UConverterToUnicodeArgs args, int[] pErrorCode)
+	{
+		doToUnicode(args, pErrorCode);
+	}
+	
+	//UConverterFromUnicode fromUnicode;
+	protected void doFromUnicode(UConverterFromUnicodeArgs args, int[] pErrorCode)
+	{
+	}
+	
+	public final void fromUnicode(UConverterFromUnicodeArgs args, int[] pErrorCode)
+	{
+		doFromUnicode(args, pErrorCode);
+	}
+	
+	protected int doGetNextUChar(UConverterToUnicodeArgs args, int[] pErrorCode)
+	{
+		return 0;
+	}
+	
+	//UConverterGetNextUChar getNextUChar;
+	public final int getNextUChar(UConverterToUnicodeArgs args, int[] pErrorCode)
+	{
+		return doGetNextUChar(args, pErrorCode);
+	}
+	
+	//public interface UConverterImplLoadable extends UConverterImpl
+	protected void doLoad(UConverterLoadArgs pArgs, short[] raw, int[] pErrorCode)
+	{
+	}
+	
+*/	
+	protected void doUnload()
+	{
+	}
+
+	/*
+	//public interface UConverterImplOpenable extends UConverterImpl
+	protected void doOpen(UConverter cnv, String name, String locale, long options, int[] pErrorCode)
+	{
+	}
+    
+	//UConverterOpen open;
+	public final void open(UConverter cnv, String name, String locale, long options, int[] pErrorCode)
+	{
+		doOpen(cnv, name, locale, options, pErrorCode);
+	}
+	
+	protected void doClose(UConverter cnv)
+	{
+	}
+	
+    //UConverterClose close;
+	public final void close(UConverter cnv)
+	{
+		doClose(cnv);
+	}
+	
+	protected void doReset(UConverter cnv, int choice)
+	{
+	}
+	
+	//typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice);
+	//UConverterReset reset;
+	public final void reset(UConverter cnv, int choice)
+	{
+		doReset(cnv, choice);
+	}
+
+	//public interface UConverterImplVariableLength extends UConverterImpl
+	protected void doToUnicodeWithOffsets(UConverterToUnicodeArgs args, int[] pErrorCode)
+	{
+	}
+	
+    //UConverterToUnicode toUnicodeWithOffsets;
+	public final void toUnicodeWithOffsets(UConverterToUnicodeArgs args, int[] pErrorCode)
+	{
+		doToUnicodeWithOffsets(args, pErrorCode);
+	}
+	
+	protected void doFromUnicodeWithOffsets(UConverterFromUnicodeArgs args, int[] pErrorCode)
+	{
+	}
+	
+    //UConverterFromUnicode fromUnicodeWithOffsets;
+	public final void fromUnicodeWithOffsets(UConverterFromUnicodeArgs args, int[] pErrorCode)
+	{
+		doFromUnicodeWithOffsets(args, pErrorCode);
+	}
+
+	//public interface UConverterImplMisc extends UConverterImpl
+	protected void doGetStarters(UConverter converter, boolean starters[], int[] pErrorCode)
+	{
+	}
+	
+    //UConverterGetStarters getStarters;
+	public final void getStarters(UConverter converter, boolean starters[], int[] pErrorCode)
+	{
+		doGetStarters(converter, starters, pErrorCode);
+	}
+	
+	protected String doGetName(UConverter cnv)
+	{
+		return "";
+	}
+	
+    //UConverterGetName getName;
+	public final String getName(UConverter cnv)
+	{
+		return doGetName(cnv);
+	}
+	
+	protected void doWriteSub(UConverterFromUnicodeArgs pArgs, long offsetIndex, int[] pErrorCode)
+	{
+	}
+	
+	//UConverterWriteSub writeSub;
+	public final void writeSub(UConverterFromUnicodeArgs pArgs, long offsetIndex, int[] pErrorCode)
+	{
+		doWriteSub(pArgs, offsetIndex, pErrorCode);
+	}
+	
+	protected UConverter doSafeClone(UConverter cnv, byte[] stackBuffer, int[] pBufferSize, int[] status)
+	{
+		return new UConverter();
+	}
+
+    //UConverterSafeClone safeClone;
+	public final UConverter  safeClone(UConverter cnv, byte[] stackBuffer, int[] pBufferSize, int[] status)
+	{
+		return doSafeClone(cnv, stackBuffer, pBufferSize, status);
+	}
+	
+	protected void doGetUnicodeSet(UConverter cnv, UnicodeSet /*USetAdder* / sa, int /*UConverterUnicodeSet* / which, int[] pErrorCode)
+	{
+	}
+	
+    //UConverterGetUnicodeSet getUnicodeSet;
+	//public final void getUnicodeSet(UConverter cnv, UnicodeSet /*USetAdder* / sa, int /*UConverterUnicodeSet* / which, int[] pErrorCode)
+	//{
+	//	doGetUnicodeSet(cnv, sa, which, pErrorCode);
+	//}
+
+	//}
+
+	static final String DATA_TYPE = "cnv";
+	private static final int CNV_DATA_BUFFER_SIZE = 25000;
+	public static final int sizeofUConverterSharedData = 100;
+	
+	//static UDataMemoryIsAcceptable isCnvAcceptable;
+
+	/**
+	 * Load a non-algorithmic converter.
+	 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
+	 
+	// UConverterSharedData * load(UConverterLoadArgs *pArgs, UErrorCode *err)
+	public static final UConverterSharedData load(UConverterLoadArgs pArgs, int[] err)
+	{
+	    UConverterSharedData mySharedConverterData = null;
+	
+	    if(err == null || ErrorCode.isFailure(err[0])) {
+	        return null;
+	    }
+	
+	    if(pArgs.pkg != null && pArgs.pkg.length() != 0) {
+	         application-provided converters are not currently cached 
+	        return UConverterSharedData.createConverterFromFile(pArgs, err);
+	    }
+	
+	    //agljport:fix mySharedConverterData = getSharedConverterData(pArgs.name);
+	    if (mySharedConverterData == null)
+	    {
+	        Not cached, we need to stream it in from file 
+	        mySharedConverterData = UConverterSharedData.createConverterFromFile(pArgs, err);
+	        if (ErrorCode.isFailure(err[0]) || (mySharedConverterData == null))
+	        {
+	            return null;
+	        }
+	        else
+	        {
+	             share it with other library clients 
+	            //agljport:fix shareConverterData(mySharedConverterData);
+	        }
+	    }
+	    else
+	    {
+	         The data for this converter was already in the cache.            
+	         Update the reference counter on the shared data: one more client 
+	        mySharedConverterData.referenceCounter++;
+	    }
+	
+	    return mySharedConverterData;
+	}
+	
+	Takes an alias name gets an actual converter file name
+	 *goes to disk and opens it.
+	 *allocates the memory and returns a new UConverter object
+	 
+	//static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err)
+	public static final UConverterSharedData createConverterFromFile(UConverterLoadArgs pArgs, int[] err)
+	{
+	    UDataMemory data = null;
+	    UConverterSharedData sharedData = null;
+	
+	    //agljport:todo UTRACE_ENTRY_OC(UTRACE_LOAD);
+	
+	    if (err == null || ErrorCode.isFailure(err[0])) {
+	        //agljport:todo UTRACE_EXIT_STATUS(*err);
+	        return null;
+	    }
+	
+	    //agljport:todo UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg);
+	
+	    //agljport:fix data = udata_openChoice(pArgs.pkgArray, DATA_TYPE.getBytes(), pArgs.name, isCnvAcceptable, null, err);
+	    if(ErrorCode.isFailure(err[0]))
+	    {
+	        //agljport:todo UTRACE_EXIT_STATUS(*err);
+	        return null;
+	    }
+	
+	    sharedData = data_unFlattenClone(pArgs, data, err);
+	    if(ErrorCode.isFailure(err[0]))
+	    {
+	        //agljport:fix udata_close(data);
+	        //agljport:todo UTRACE_EXIT_STATUS(*err);
+	        return null;
+	    }
+	
+	    
+	     * TODO Store pkg in a field in the shared data so that delta-only converters
+	     * can load base converters from the same package.
+	     * If the pkg name is longer than the field, then either do not load the converter
+	     * in the first place, or just set the pkg field to "".
+	     
+	
+	    return sharedData;
+	}
+*/
+	UConverterDataReader dataReader = null;
+	
+
+
+	/*returns a converter type from a string
+	 */
+	// static const UConverterSharedData * getAlgorithmicTypeFromName(const char *realName)
+	public static final UConverterSharedData getAlgorithmicTypeFromName(String realName)
+	{
+	    long mid, start, limit;
+	    long lastMid;
+	    int result;
+	    StringBuffer strippedName = new StringBuffer(UConverterConstants.MAX_CONVERTER_NAME_LENGTH);
+	
+	    /* Lower case and remove ignoreable characters. */
+	    UConverterAlias.io_stripForCompare(strippedName, realName);
+	
+	    /* do a binary search for the alias */
+	    start = 0;
+	    limit = cnvNameType.length;
+	    mid = limit;
+	    lastMid = UConverterAlias.UINT32_MAX;
+	
+	    for (;;) {
+	        mid = (long)((start + limit) / 2);
+	        if (lastMid == mid) {   /* Have we moved? */
+	            break;  /* We haven't moved, and it wasn't found. */
+	        }
+	        lastMid = mid;
+	        result = strippedName.substring(0).compareTo(cnvNameType[(int)mid].name);
+	
+	        if (result < 0) {
+	            limit = mid;
+	        } else if (result > 0) {
+	            start = mid;
+	        } else {
+	            return converterData[cnvNameType[(int)mid].type];
+	        }
+	    }
+	
+	    return null;
+	}
+
+	/**
+	 * Fallbacks to Unicode are stored outside the normal state table and code point structures
+	 * in a vector of items of this type. They are sorted by offset.
+	 */
+	public final class MBCSToUFallback {
+	    int offset;
+	    int codePoint;
+	}
+	
+	/**
+	 * This is the MBCS part of the UConverterTable union (a runtime data structure).
+	 * It keeps all the per-converter data and points into the loaded mapping tables.
+	 */
+	public final class UConverterMBCSTable {
+	    /* toUnicode */
+	    short countStates;
+			byte dbcsOnlyState;
+			boolean stateTableOwned;
+	    int countToUFallbacks;
+	
+	    int stateTable[/*countStates*/][/*256*/];
+	    int swapLFNLStateTable[/*countStates*/][/*256*/]; /* for swaplfnl */
+	    char unicodeCodeUnits[/*countUnicodeResults*/];
+	    MBCSToUFallback toUFallbacks[/*countToUFallbacks*/];
+	
+	    /* fromUnicode */
+	    char fromUnicodeTable[];
+	    byte fromUnicodeBytes[];
+	    byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */
+	    int fromUBytesLength;
+	    short outputType, unicodeMask;
+	
+	    /* converter name for swaplfnl */
+	    String swapLFNLName;
+	
+	    /* extension data */
+	    UConverterSharedData baseSharedData;
+	    //int extIndexes[];
+		ByteBuffer extIndexes; // create int[] view etc. as needed
+
+		UConverterMBCSTable()
+		{
+		}
+
+		UConverterMBCSTable(UConverterMBCSTable t)
+		{
+			countStates = t.countStates;
+			dbcsOnlyState = t.dbcsOnlyState;
+			stateTableOwned = t.stateTableOwned;
+			countToUFallbacks = t.countToUFallbacks;
+			stateTable = t.stateTable;
+			swapLFNLStateTable = t.swapLFNLStateTable;
+			unicodeCodeUnits = t.unicodeCodeUnits;
+			toUFallbacks = t.toUFallbacks;
+			fromUnicodeTable = t.fromUnicodeTable;
+			fromUnicodeBytes = t.fromUnicodeBytes;
+			swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes;
+			fromUBytesLength = t.fromUBytesLength;
+			outputType = t.outputType;
+			unicodeMask = t.unicodeMask;
+			swapLFNLName = t.swapLFNLName;
+			baseSharedData = t.baseSharedData;
+			extIndexes = t.extIndexes;
+		}			
+	}
+
+	/**
+	 * MBCS data header. See data format description above.
+	 */
+	public final class MBCSHeader {
+	    byte version[/*U_MAX_VERSION_LENGTH*/];
+	    int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes;
+		int flags;
+		int fromUBytesLength;
+
+		public MBCSHeader()
+		{
+			version = new byte[MAX_VERSION_LENGTH];
+		}
+	}
+
+	/**
+	 * Enum for specifying basic types of converters
+	 * @see getType
+	 * @draft ICU 3.6
+	 */
+	public static final class UConverterType {
+	    public static final int UNSUPPORTED_CONVERTER = -1;
+	    public static final int SBCS = 0;
+	    public static final int DBCS = 1;
+	    public static final int MBCS = 2;
+	    public static final int LATIN_1 = 3;
+	    public static final int UTF8 = 4;
+	    public static final int UTF16_BigEndian = 5;
+	    public static final int UTF16_LittleEndian = 6;
+	    public static final int UTF32_BigEndian = 7;
+	    public static final int UTF32_LittleEndian = 8;
+	    public static final int EBCDIC_STATEFUL = 9;
+	    public static final int ISO_2022 = 10;
+	
+	    public static final int LMBCS_1 = 11;
+	    public static final int LMBCS_2 = LMBCS_1 + 1; //12
+	    public static final int LMBCS_3 = LMBCS_2 + 1; //13
+	    public static final int LMBCS_4 = LMBCS_3 + 1; //14
+	    public static final int LMBCS_5 = LMBCS_4 + 1; //15
+	    public static final int LMBCS_6 = LMBCS_5 + 1; //16
+	    public static final int LMBCS_8 = LMBCS_6 + 1; //17
+	    public static final int LMBCS_11 = LMBCS_8 + 1; //18
+	    public static final int LMBCS_16 = LMBCS_11 + 1; //19
+	    public static final int LMBCS_17 = LMBCS_16 + 1; //20
+	    public static final int LMBCS_18 = LMBCS_17 + 1; //21
+	    public static final int LMBCS_19 = LMBCS_18 + 1; //22
+	    public static final int LMBCS_LAST = LMBCS_19; //22
+	    public static final int HZ =LMBCS_LAST + 1; //23
+	    public static final int SCSU = HZ + 1; //24
+	    public static final int ISCII = SCSU + 1; //25
+	    public static final int US_ASCII = ISCII + 1; //26
+	    public static final int UTF7 = US_ASCII + 1; //27
+	    public static final int BOCU1 = UTF7 + 1; //28
+	    public static final int UTF16 = BOCU1 + 1; //29
+	    public static final int UTF32 = UTF16 + 1; //30
+	    public static final int CESU8 = UTF32 + 1; //31
+	    public static final int IMAP_MAILBOX = CESU8 + 1; //32
+	    public static final int MAC_ARABIC = IMAP_MAILBOX + 1; //33
+	    public static final int MAC_HEBREW = MAC_ARABIC + 1; //34
+	
+	    /* Number of converter types for which we have conversion routines. */
+	    public static final int NUMBER_OF_SUPPORTED_CONVERTER_TYPES = MAC_HEBREW + 1;
+	
+	}
+	
+	/**
+	 * Enum for specifying which platform a converter ID refers to.
+	 * The use of platform/CCSID is not recommended. See openCCSID().
+	 * @draft ICU 3.6
+	 */
+	public static final class UConverterPlatform {
+	    public static final int UNKNOWN = -1;
+	    public static final int IBM = 0;
+	}
+
+	static UConverterSharedData _MBCSData = null, /*_Latin1Data = null,*/ /*_UTF8Data = null,*/ /*_UTF16BEData = null,*/ /*_UTF16LEData = null,*/ /*_UTF32BEData = null,*/ /*_UTF32LEData = null,*/  /*_ISO2022Data = null,*/ _LMBCSData1 = null,_LMBCSData2 = null, _LMBCSData3 = null, _LMBCSData4 = null, _LMBCSData5 = null, _LMBCSData6 = null, _LMBCSData8 = null,_LMBCSData11 = null,_LMBCSData16 = null,_LMBCSData17 = null,_LMBCSData18 = null,_LMBCSData19 = null, _HZData = null, _SCSUData = null, /*_ISCIIData = null,*/ /*_ASCIIData = null,*/ _UTF7Data = null, _Bocu1Data = null, /*_UTF16Data = null, _UTF32Data = null,*/ _CESU8Data = null, _IMAPData = null;
+	static UConverterSharedData[] converterData;
+	static class cnvNameTypeClass {
+	  String name;
+		int type;
+		cnvNameTypeClass(String name_, int type_) { name = name_; type = type_; }
+	} 
+	
+	static cnvNameTypeClass cnvNameType[];
+	
+    static final String DATA_TYPE = "cnv";
+    static final int CNV_DATA_BUFFER_SIZE = 25000;
+    static final int SIZE_OF_UCONVERTER_SHARED_DATA = 100;
+
+	static final int MAXIMUM_UCS2 =            0x0000FFFF;
+	static final int MAXIMUM_UTF =             0x0010FFFF;
+	static final int MAXIMUM_UCS4 =            0x7FFFFFFF;
+	static final int HALF_SHIFT =              10;
+	static final int HALF_BASE =               0x0010000;
+	static final int HALF_MASK =               0x3FF;
+	static final int SURROGATE_HIGH_START =    0xD800;
+	static final int SURROGATE_HIGH_END =      0xDBFF;
+	static final int SURROGATE_LOW_START =     0xDC00;
+	static final int SURROGATE_LOW_END =       0xDFFF;
+	
+	/* -SURROGATE_LOW_START + HALF_BASE */
+	static final int SURROGATE_LOW_BASE =      9216;
+}
--- a/icu4j/src/com/ibm/icu/impl/UConverterStaticData.java
+++ b/icu4j/src/com/ibm/icu/impl/UConverterStaticData.java
@ -0,0 +1,61 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.impl;
+
+public final class UConverterStaticData {   /* +offset: size */
+    public int structSize;                /* +0: 4 Size of this structure */
+    
+    public String name; /* +4: 60  internal name of the converter- invariant chars */
+
+    public int codepage;               /* +64: 4 codepage # (now IBM-$codepage) */
+
+    public byte platform;                /* +68: 1 platform of the converter (only IBM now) */
+    public byte conversionType;          /* +69: 1 conversion type */
+
+    public byte minBytesPerChar;         /* +70: 1 Minimum # bytes per char in this codepage */
+    public byte maxBytesPerChar;         /* +71: 1 Maximum # bytes output per UChar in this codepage */
+
+    public byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4  [note:  4 and 8 byte boundary] */
+    public byte subCharLen;              /* +76: 1 */
+    
+    public byte hasToUnicodeFallback;   /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
+    public byte hasFromUnicodeFallback; /* +78: 1 */
+    public short unicodeMask;            /* +79: 1  bit 0: has supplementary  bit 1: has single surrogates */
+    public byte subChar1;               /* +80: 1  single-byte substitution character for IBM MBCS (0 if none) */
+    public byte reserved[/*19*/];           /* +81: 19 to round out the structure */
+                                    /* total size: 100 */
+    public UConverterStaticData()
+    {
+        subChar = new byte[UConverterConstants.MAX_SUBCHAR_LEN];
+        reserved = new byte[19];
+    }
+
+    public UConverterStaticData(int structSize_, String name_, int codepage_, byte platform_, byte conversionType_, byte minBytesPerChar_, byte maxBytesPerChar_, byte[] subChar_, byte subCharLen_, byte hasToUnicodeFallback_, byte hasFromUnicodeFallback_, short unicodeMask_, byte subChar1_, byte[] reserved_)
+    {
+        structSize = structSize_;
+        name = name_;
+        codepage = codepage_;
+        platform = platform_;
+        conversionType = conversionType_;
+        minBytesPerChar = minBytesPerChar_;
+        maxBytesPerChar = maxBytesPerChar_;
+        subChar = new byte[UConverterConstants.MAX_SUBCHAR_LEN];
+        System.arraycopy(subChar_, 0, subChar, 0, (subChar.length < subChar_.length? subChar.length : subChar_.length));
+        subCharLen = subCharLen_;
+        hasToUnicodeFallback = hasToUnicodeFallback_;
+        hasFromUnicodeFallback = hasFromUnicodeFallback_;
+        unicodeMask = unicodeMask_;
+        subChar1 = subChar1_;
+        reserved = new byte[19];
+        System.arraycopy(reserved_, 0, reserved, 0, (reserved.length < reserved_.length? reserved.length : reserved_.length));
+    }
+
+    public static final int sizeofUConverterStaticData = 100;
+}
+