java/6 : sun/io/CharToByteCp933.java

CharToByteCp933
https://openjdk.java.net/
GPLv2 + Classpath Exception
/*
 * Copyright (c) 1997, 2006, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package sun.io;

import sun.nio.cs.ext.IBM933;

Author: Malcolm Ayres/**
* @author Malcolm Ayres
*/

public class CharToByteCp933 extends CharToByteConverter
{
    private static final char SBase = '\uAC00';
    private static final char LBase = '\u1100';
    private static final char VBase = '\u1161';
    private static final char TBase = '\u11A7';
    private static final int  VCount = 21;
    private static final int  TCount = 28;
    private static final byte G0 = 0;
    private static final byte G1 = 1;
    private static final byte G2 = 2;
    private static final byte G3 = 3;
    private byte   charState = G0;
    private char   l, v, t;

    private int    byteState;
    private byte[] outputByte;
    private static final int SBCS = 0;
    private static final int DBCS = 1;
    private static final byte SO = 0x0e;
    private static final byte SI = 0x0f;

    private char highHalfZoneCode;

    private short[] index1;
    private String index2;
    private String index2a;
    private int  mask1;
    private int  mask2;
    private int  shift;

    private final static IBM933 nioCoder = new IBM933();


    public CharToByteCp933() {
       super();
       byteState = doSBCS()?SBCS:DBCS;
       highHalfZoneCode = 0;
       outputByte = new byte[2];
       mask1 = 0xFFF8;
       mask2 = 0x0007;
       shift = 3;
       index1 = nioCoder.getEncoderIndex1();
       index2 = nioCoder.getEncoderIndex2();
       index2a = nioCoder.getEncoderIndex2a();
       subBytes = new byte[1];
       subBytes[0] = 0x6f;
    }

    flush out any residual data and reset the buffer state
/**
      * flush out any residual data and reset the buffer state
      */
    public int flush(byte[] output, int outStart, int outEnd)
        throws MalformedInputException,
               ConversionBufferFullException
    {
       int bytesOut;

       byteOff = outStart;

       if (highHalfZoneCode != 0) {
           reset();
           badInputLength = 0;
           throw new MalformedInputException();
       }

       if (charState != G0) {
           try {
              unicodeToBuffer(composeHangul() ,output, outEnd);
           }
           catch(UnknownCharacterException e) {
              reset();
              badInputLength = 0;
              throw new MalformedInputException();
           }
           charState = G0;
       }

       if (byteState == DBCS && doSBCS()) {
          if (byteOff >= outEnd)
             throw new ConversionBufferFullException();
          output[byteOff++] = SI;
          byteState = SBCS;
       }

       bytesOut = byteOff - outStart;

       reset();
       return bytesOut;
    }

    Resets converter to its initial state.
/**
     * Resets converter to its initial state.
     */
    public void reset() {
       byteState = doSBCS()?SBCS:DBCS;
       highHalfZoneCode = 0;
       charState = G0;
       charOff = byteOff = 0;
    }

    Returns true if the given character can be converted to the
target character encoding.
/**
     * Returns true if the given character can be converted to the
     * target character encoding.
     */
    public boolean canConvert(char ch) {
       return encodeHangul(ch) != -1;
    }

    Sets the substitution bytes to use when the converter is in
substitution mode.  The given bytes should represent a valid
character in the target character encoding.
/**
     * Sets the substitution bytes to use when the converter is in
     * substitution mode.  The given bytes should represent a valid
     * character in the target character encoding.
     */

    public void setSubstitutionBytes( byte[] newSubBytes )
       throws IllegalArgumentException
    {
       if( newSubBytes.length > 2 || newSubBytes.length == 0) {
           throw new IllegalArgumentException();
       }

       subBytes = new byte[ newSubBytes.length ];
       System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length );

    }

    Character conversion
/**
     * Character conversion
     */

    public int convert(char[] input, int inOff, int inEnd,
                       byte[] output, int outOff, int outEnd)
        throws UnknownCharacterException, MalformedInputException,
               ConversionBufferFullException
    {
       char    inputChar;
       int     inputSize;

       charOff = inOff;
       byteOff = outOff;

       while (charOff < inEnd) {

          if (highHalfZoneCode == 0) {
             inputChar = input[charOff];
             inputSize = 1;
          } else {
             inputChar = highHalfZoneCode;
             inputSize = 0;
             highHalfZoneCode = 0;
          }

          switch (charState) {
          case G0:

             l = LBase;
             v = VBase;
             t = TBase;

             if ( isLeadingC(inputChar) ) {     // Leading Consonant
                l = inputChar;
                charState = G1;
                break;
             }

             if ( isVowel(inputChar) ) {        // Vowel
                v = inputChar;
                charState = G2;
                break;
             }

             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                t = inputChar;
                charState = G3;
                break;
             }

             break;

          case G1:
             if ( isLeadingC(inputChar) ) {     // Leading Consonant
                l = composeLL(l, inputChar);
                break;
             }

             if ( isVowel(inputChar) ) {        // Vowel
                v = inputChar;
                charState = G2;
                break;
             }

             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                t = inputChar;
                charState = G3;
                break;
             }

             unicodeToBuffer(composeHangul(), output, outEnd);

             charState = G0;
             break;

          case G2:
             if ( isLeadingC(inputChar) ) {     // Leading Consonant

                unicodeToBuffer(composeHangul(), output, outEnd);

                l = inputChar;
                v = VBase;
                t = TBase;
                charState = G1;
                break;
             }

             if ( isVowel(inputChar) ) {        // Vowel
                v = composeVV(l, inputChar);
                charState = G2;
                break;
             }

             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                t = inputChar;
                charState = G3;
                break;
             }

             unicodeToBuffer(composeHangul(), output, outEnd);

             charState = G0;

             break;

          case G3:
             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                t = composeTT(t, inputChar);
                charState = G3;
                break;
             }

             unicodeToBuffer(composeHangul(), output, outEnd);

             charState = G0;

             break;
          }

          if (charState != G0)
             charOff++;
          else {

             // Is this a high surrogate?
             if(inputChar >= '\ud800' && inputChar <= '\udbff') {
                // Is this the last character of the input?
                if (charOff + inputSize >= inEnd) {
                   highHalfZoneCode = inputChar;
                   charOff += inputSize;
                   break;
                }

                // Is there a low surrogate following?
                inputChar = input[charOff + inputSize];
                if (inputChar >= '\udc00' && inputChar <= '\udfff') {
                   // We have a valid surrogate pair.  Too bad we don't do
                   // surrogates.  Is substitution enabled?
                   if (subMode) {
                      if (subBytes.length == 1) {
                         outputByte[0] = 0x00;
                         outputByte[1] = subBytes[0];
                      } else {
                         outputByte[0] = subBytes[0];
                         outputByte[1] = subBytes[1];
                      }

                      bytesToBuffer(outputByte, output, outEnd);
                      inputSize++;
                   } else {
                      badInputLength = 2;
                      throw new UnknownCharacterException();
                   }
                } else {
                   // We have a malformed surrogate pair
                   badInputLength = 1;
                   throw new MalformedInputException();
                }
             }

               // Is this an unaccompanied low surrogate?
             else
                if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
                   badInputLength = 1;
                   throw new MalformedInputException();
                } else {
                   unicodeToBuffer(inputChar, output, outEnd);
                }

             charOff += inputSize;

          }

       }

       return byteOff - outOff;

    }

    private char composeHangul() {
       int lIndex, vIndex, tIndex;

       lIndex = l - LBase;
       vIndex = v - VBase;
       tIndex = t - TBase;

       return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
    }

    private char composeLL(char l1, char l2) {
       return l2;
    }

    private char composeVV(char v1, char v2) {
       return v2;
    }

    private char composeTT(char t1, char t2) {
       return t2;
    }

    private boolean isLeadingC(char c) {
       return (c >= LBase && c <= '\u1159');
    }

    private boolean isVowel(char c) {
       return (c >= VBase && c <= '\u11a2');
    }

    private boolean isTrailingC(char c) {
       return (c >= TBase && c <= '\u11f9');
    }

    returns the maximum number of bytes needed to convert a char
/**
     * returns the maximum number of bytes needed to convert a char
     */
    public int getMaxBytesPerChar() {
       return 4;
    }

    Return the character set ID
/**
     * Return the character set ID
     */
    public String getCharacterEncoding() {
       return "Cp933";
    }

    private function to add the bytes to the output buffer
/**
     * private function to add the bytes to the output buffer
     */
    private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
        throws ConversionBufferFullException,
               UnknownCharacterException {

       int spaceNeeded;

       // Set the output buffer into the correct state

       if (byteState == DBCS && theBytes[0] == 0x00) {
          if (byteOff >= outEnd)
             throw new ConversionBufferFullException();
          byteState = SBCS;
          output[byteOff++] = SI;
       } else
          if (byteState == SBCS && theBytes[0] != 0x00) {
             if (byteOff >= outEnd)
                throw new ConversionBufferFullException();
             byteState = DBCS;
             output[byteOff++] = SO;
          }


       // ensure sufficient space for the bytes(s)

       if (byteState == DBCS)
          spaceNeeded = 2;
       else
          spaceNeeded = 1;

       if (byteOff + spaceNeeded > outEnd)
          throw new ConversionBufferFullException();

       // move the data into the buffer

       if (byteState == SBCS)
          output[byteOff++] = theBytes[1];
       else {
          output[byteOff++] = theBytes[0];
          output[byteOff++] = theBytes[1];
       }
    }

    // return -1 for unmappable character
    protected int encodeHangul(char unicode) {
        int theBytes;
        int index;
        index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
        if (index < 15000)
            theBytes = (int)(index2.charAt(index));
        else
            theBytes = (int)(index2a.charAt(index-15000));

       // The input char is undefined if theBytes is 0 and the char is NOT unicode 0
        if (theBytes == 0 && unicode != '\u0000')
            return -1;
        return theBytes;
    }

    private function to add a unicode character to the output buffer
/**
     * private function to add a unicode character to the output buffer
     */
    private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
        throws ConversionBufferFullException,
               UnknownCharacterException {

       // first we convert the unicode to its byte representation
       int theBytes = encodeHangul(unicode);

       // if the unicode was not mappable - look for the substitution bytes
       if (theBytes == -1) {
          if (subMode) {
             if (subBytes.length == 1) {
                outputByte[0] = 0x00;
                outputByte[1] = subBytes[0];
             } else {
                outputByte[0] = subBytes[0];
                outputByte[1] = subBytes[1];
             }
          } else {
             badInputLength = 1;
             throw new UnknownCharacterException();
          }
       } else {
           outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
           outputByte[1] = (byte)(theBytes & 0x000000ff);
       }

       // now put the bytes in the buffer
       bytesToBuffer(outputByte, output, outEnd);
    }

    //Methods below are for subclass Cp834
    protected boolean doSBCS() {
        return true;
    }
}
/

java/ 6/ sun/io/CharToByteCp933.java