Bom Remover Source Code

IT_Programming/Java

Bom Remover Source Code

JJun ™ 2010. 12. 2. 14:45

package util;

import java.io.*;

import sun.io.ByteToCharConverter;
import sun.io.CharToByteConverter;

public class BOMDelete
{
 private final String CHARSET = "UTF-8";

 public static void main(String [] args)
 {
 //args = new String[1];
 //args[0] = "D:/_SVN/RAINBOW/enomix_enterprise/branches/dev/src/webapps/src/spectra/ee/webapps/common/resources/message_ja_JP.properties";
 if( args == null )
 {
 System.out.println("usage: java util.BOMDelete [[FilePath or FileName] charset]");
 }

 String charSet = "UTF-8";

 if( args.length>2 )
 charSet = args[1];

 String strInFileName = args[0];
 String strOutFileName = null;
 try
 {
 File inFile = new File(strInFileName);
 if( inFile.isDirectory() )
 {
 File [] files = inFile.listFiles();
 for(int i=0; i < files.length; i++)
 {
 strInFileName = files[i].getName();
 strOutFileName = strInFileName + ".TMP";
 deleteBOM(strInFileName, strOutFileName, charSet);
 }
 }
 else if ( inFile.isFile() )
 {
 strOutFileName = strInFileName + ".TMP";
 deleteBOM(strInFileName, strOutFileName, charSet);
 }
 }
 catch(Exception e)
 {
 e.printStackTrace();
 }
 }

 public static void deleteBOM(String strInFileName, String strOutFileName, String charSet)
 {
 BOMDelete bom = new BOMDelete();
 bom.native2Ascii(strInFileName, strOutFileName, charSet);
 bom.reverseNative2Ascii(strOutFileName, strInFileName, charSet);

 File tmpFile = new File(strOutFileName);
 if( tmpFile.exists() )
 tmpFile.delete();
 }

/**
 * 생성되어 있는 리소스 파일(strInFileName : 디폴트 Charset을 UTF-8로 처리함)을 
 * 자바 리소스로 변환된 자바 리소스 파일(strOutFileName)로 변환한다. 
 * 다국어 지원을 위해 제공하는 자바 tools의 native2ascii 프로그램과 동일한 역할을 한다.
 *
 * @param strInFileName 자바 리소스로 변환할 파일명
 * @param strOutFileName 자바 리소스로 변환될 파일명
 */
 public void native2Ascii(String strInFileName, String strOutFileName)
 {
 native2Ascii(strInFileName, strOutFileName, CHARSET);
 }

 /**
 * 생성되어 있는 Charset 포맷의 리소스 파일(strInFileName)을 
 * 자바 리소스로 변환된 자바 리소스 파일(strOutFileName)로 변환한다. 
 * 다국어 지원을 위해 제공하는 자바 tools의 native2ascii 명령어와 동일한 역할을 한다.
 *
 * @param strInFileName 자바 리소스로 변환할 파일명
 * @param strOutFileName 자바 리소스로 변환될 파일명
 * @param charSet 자바 리소스 파일 변환시 적용될 Charset, 원본파일의 Charset을 나타냄.
 */
 public void native2Ascii(String strInFileName, String strOutFileName, String charSet)
 {
 InputStream inputStream = null;
 OutputStream outputStream = null;
 DataInputStream dataInputStream = null;

 try
 {
 inputStream = makeInputStream(strInFileName);
 File checkFile = checkFileName(strInFileName, strOutFileName);
 outputStream = makeOutputStream(checkFile);
 dataInputStream = new DataInputStream(inputStream);

String lineSeparator = System.getProperty("line.separator");

            ByteToCharConverter bytetocharconverter;
            if (charSet != null)
                bytetocharconverter = ByteToCharConverter.getConverter(charSet);
            else
                bytetocharconverter = ByteToCharConverter.getDefault();

char arrCharAsc[] = new char[256];
 byte arrByteIn[] = new byte[256];
 String strReadLine;
 int check = 0;

 while ((strReadLine = dataInputStream.readLine()) != null)
 {
 if (strReadLine.length() > arrByteIn.length)
 {
 arrByteIn = new byte[strReadLine.length()];
 arrCharAsc = new char[strReadLine.length()];
 }

 for (int i = 0; i < strReadLine.length(); i++)
 {
 arrByteIn[i] = (byte) strReadLine.charAt(i);
 }

int byteLen = bytetocharconverter.convert(arrByteIn, 0, strReadLine.length(), arrCharAsc, 0, arrCharAsc.length);

 for (int i = 0; i < byteLen; i++)
 {
 // 일반 Ascii 코드값 '\177'(DEL) 이상의 값만 toHexString을 실시한다.
 // 예) \000 = NULL, \034 = file separator, \040 = space, \060~071 (0~9), \101~132 (A~Z), \141~172 (a~z), \177 DEL
 if (arrCharAsc[i] > '\177')
 {
 String strHexString = Integer.toHexString(arrCharAsc[i]);
 if( !"feff".equals(strHexString) || check>0 )
 {
 outputStream.write(92);
 outputStream.write(117);
 // 주어진 문자를 16진수 Ascii 문자열로 변환
 // 예) 한글 ==> d55c ae00, abc ==> 61, 62, 63
 // 실제로 a, b, c의 경우는 '\177'보다 값이 작음으로 이 로직을 타지 않는다.

 StringBuffer stringbuffer = new StringBuffer(strHexString);

 // 한글 ==> c55d 00ea, abc ==> 16, 26, 36
 stringbuffer.reverse();

 int iDiff = 4 - stringbuffer.length();
 // 길이 4자리가 안되는 경우에는 나머지를 "0"으로 채운다.
 // 한글 ==> c55d 00ea, abc ==> 1600, 2600, 3600
 for (int j = 0; j < iDiff; j++)
 {
 stringbuffer.append('0');
 }

 for (int j = 0; j < 4; j++)
 {
 // 뒷자리 부터 다시 기록한다.
 // 한글 ==> d55c ae00, abc ==> 0061, 0062, 0063
 outputStream.write(stringbuffer.charAt(3 - j));
 }
 }
 }
 else
 {
 outputStream.write(arrCharAsc[i]);
 }
 check++;
 }

 for (int i = 0; i < lineSeparator.length(); i++)
 {
 outputStream.write(lineSeparator.charAt(i));
 }

            }
            outputStream.close();
            if (strInFileName.equals(strOutFileName))
            {
                checkFile.renameTo(new File(strOutFileName));
                System.out.println( "native2Ascii=" + strInFileName + ", check file = " + checkFile.getName());
            }
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
        finally
        {
            try
            {
                if( dataInputStream != null )
                    dataInputStream.close();
            } catch(Exception e){}

try
 {
 if( outputStream != null )
 outputStream.close();
 } catch(Exception e){}

 try
 {
 if( inputStream != null )
 inputStream.close();
 } catch(Exception e){}
 }
 }

 /**
 * 자바 리소스로 변환되어 있는 파일(strInFileName)을 
 * 일반 리소스로 변환할 파일(strOutFileName)로 변환한다. 
 * 다국어 지원을 위해 제공하는 자바 tools의 native2ascii -reverse 명령어와 동일한 역할을 한다. 
 * 디폴트 Charset은 UTF-8로 인식한다.
 *
 * @param strInFileName 자바 리소스로 변환되어 있는 파일명
 * @param strOutFileName 일반 리소스로 변환할 파일명
 */
 public void reverseNative2Ascii(String strInFileName, String strOutFileName)
 {
 reverseNative2Ascii(strInFileName, strOutFileName, CHARSET);
 }

 /**
 * 자바 리소스로 변환되어 있는 파일(strInFileName)을 
 * 일반 리소스로 변환할 파일(strOutFileName)로 변환한다. 
 * 다국어 지원을 위해 제공하는 자바 tools의 native2ascii -reverse 명령어와 동일한 역할을 한다.
 *
 * @param strInFileName 자바 리소스로 변환되어 있는 파일명
 * @param strOutFileName 일반 리소스로 변환할 파일명
 * @param charSet 자바 리소스 파일 변환시 적용된 Charset, 원본파일의 Charset을 나타냄.
 */
 public void reverseNative2Ascii(String strInFileName, String strOutFileName, String charSet)
 {
 InputStream inputStream = null;
 OutputStream outputStream = null;
 DataInputStream dataInputStream = null;

 try
 {
 inputStream = makeInputStream(strInFileName);
 File checkFile = checkFileName(strInFileName, strOutFileName);
 outputStream = makeOutputStream(checkFile);
 dataInputStream = new DataInputStream(inputStream);

String lineSeparator = System.getProperty("line.separator");

            CharToByteConverter chartobyteconverter;
            if(charSet != null)
                chartobyteconverter = CharToByteConverter.getConverter(charSet);
            else
                chartobyteconverter = CharToByteConverter.getDefault();

chartobyteconverter.setSubstitutionMode(false);
 char arrCharAscIn[] = new char[256];
 byte arrByteOut[] = new byte[512];
 String strReadLine;

 while((strReadLine = dataInputStream.readLine()) != null)
 {
 if(strReadLine.length() > arrCharAscIn.length)
 {
 arrCharAscIn = new char[strReadLine.length()];
 arrByteOut = new byte[strReadLine.length() * 2];
 }

 int i = 0;
 int j;
 // 문자열이 \uc2dc\uc2a4\ud15c \uc815\uc758 => 이런 형태의 ascii 값인 경우만
 // '\'와 'u'를 제외한 4자리 hexString으로 변환
 for(j = 0; i < strReadLine.length(); j++)
 {
 int k;
 if((k = strReadLine.charAt(i)) == '\\' && strReadLine.length() > i + 1 && strReadLine.charAt(i + 1) == 'u')
 {
 int ii = i;
 for(i += 2; strReadLine.length() > i && strReadLine.charAt(i) == 'u'; i++);
 if(strReadLine.length() >= i + 4)
 {
 k = Integer.parseInt(strReadLine.substring(i, i + 4), 16);
 // 문자열이 \uc2dc\uc2a4\ud15c \uc815\uc758

                            // => 이런 형태의 ascii 값인 경우만 변환 i값을 +3해서 다음 문자열을 찾는다.
                            if(chartobyteconverter.canConvert((char)k))
                            {
                                i += 3;
                            }
                            // '\177' 이하 문자열의 경우 해당하는 값을 그대로 담는다.
                            else
                            {
                                i = ii;
                                k = strReadLine.charAt(i);
                            }
                        }
                    }
                    arrCharAscIn[j] = (char)k;
                    i++;
                }

int byteLen = chartobyteconverter.convert(arrCharAscIn, 0, j, arrByteOut, 0, arrByteOut.length);

 for(int k = 0; k < byteLen; k++)
 {
 outputStream.write(arrByteOut[k]);
 }

for(int k = 0; k < lineSeparator.length(); k++)
 {
 outputStream.write(lineSeparator.charAt(k));
 }

            }
            outputStream.close();
            if (strInFileName.equals(strOutFileName))
            {
                checkFile.renameTo(new File(strOutFileName));
                System.out.println( "reverseNative2Ascii=" + strInFileName + ", check file = " + checkFile.getName());
            }
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
        finally
        {
            try
            {
                if( dataInputStream != null )
                    dataInputStream.close();
            } catch(Exception e){}

            try
            {
                if( outputStream != null )
                    outputStream.close();
            } catch(Exception e){}

            try
            {
                if( inputStream != null )
                    inputStream.close();
            } catch(Exception e){}
        }
    }

    /**
     * native2ascii 또는 reverseNative2Ascii 메소드 실행시 입력되는 파일명에 따른
     * InputStream 객체를 생성해서 리턴한다.
     *
     * @param strInFileName : 입력 파일명
     * @return 입력된 파일에 대한 InputStream 객체
     * @throws Exception 파일을 읽지 못하거나 InputStream 생성시 에러가 발생했을 경우 throw한다.
     */
    private InputStream makeInputStream(String strInFileName) throws Exception
    {
        File file = new File(strInFileName);
        if (!file.canRead())
            throw new Exception(file.getName() + " can not be read.");

        BufferedInputStream bufferedinputStream = null;
        try
        {
            bufferedinputStream = new BufferedInputStream(new FileInputStream(file));
        }
        catch (IOException ioe)
        {
            throw ioe;
        }
        return bufferedinputStream;
    }

/**
 * native2ascii 또는 reverseNative2Ascii 메소드 실행시 출력되는 파일명에 따른
 * onputStream 객체를 생성해서 리턴한다.
 *
 * @param outFile : Output File 객체
 * @return 입력된 파일 객체의 OutputStream 객체
 * @throws Exception 파일을 읽지 못하거나 OutputStream 생성시 에러가 발생했을 경우 throw한다.
 */
 private OutputStream makeOutputStream(File outFile) throws Exception
 {
 BufferedOutputStream bufferedoutputstream = null;
 try
 {
 bufferedoutputstream = new BufferedOutputStream(new FileOutputStream(outFile));
 }
 catch (IOException ioe)
 {
 throw ioe;
 }
 return bufferedoutputstream;
 }

 /**
 * 입력 파일명과 출력 파일명의 이름이 동일할 경우 출력파일명을 임시로 만들어서 리턴한다. 
 * 이렇게 생성된 파일명은 작업 완료 후에 실제 출력 파일명으로 rename 한다.
 *
 * @param strInFileName 입력 파일명
 * @param strOutFileName 출력 파일명
 * @return 임시로 만들어진 출력 파일 객체
 */
 private File checkFileName(String strInFileName, String strOutFileName)
 {
 File file = new File(strOutFileName);
 if (strInFileName.equals(strOutFileName))
 {
 int i = 0;
 do
 {
 file = new File(file.getParent() + "/_N2A" + i + ".TMP");
 if (!file.exists())
 break;
 i++;
 }
 while (true);
 }
 else
 {
 file = new File(strOutFileName);
 }

 return file;
 }
}

저작자표시 비영리 변경금지

'IT_Programming > Java' 카테고리의 다른 글

[Java] 메모리에 zip 파일 생성하기 / 파일 캐싱 (0)	2010.12.29
[펌] Java code _ UTF형태 파일에서 BOM 제거하기 (0)	2010.12.02
[펌] 진보된 쓰레드 풀링 기법 구현 (0)	2010.11.30
내 쓰레드는 어디에? - 서버 애플리케이션에서 쓰레드 유출 피하기 (0)	2010.11.30
Handling Uncaught Exeptions (0)	2010.11.30

현재글Bom Remover Source Code

삶의 모든 순간을 즐겨라!

Today :
Yesterday :

삶의 모든 순간을 즐겨라!