IT_Programming/Java

[펌] Java code _ UTF형태 파일에서 BOM 제거하기

JJun ™ 2010. 12. 2. 14:47

-------------------------------------------------------------------------------------------------

출처: http://june1209.tistory.com/entry/UTF-8-%EC%9D%98-BOM-%EC%9D%84-%ED%95%9C%EA%BA%BC%EB%B2%88%EC%97%90-%EB%B0%94%EA%BE%B8%EB%8A%94-java-%EC%BD%94%EB%93%9C

-------------------------------------------------------------------------------------------------

 

리눅스 계열에서는 BOM 때문에 HTML이나 JSP 코드가 깨져서 나오는 경우가 많다.
한꺼번에 BOM을 삭제해주는 코드를 찾아봤으나 실패하고 직접 만들었다.
참고가 되길.

public void fncBomDelete(File directory) {
  try {
      for ( int i = 0; i < directory.listFiles().length; i++ ) {
          File tempFile = directory.listFiles()[i];
          if ( tempFile.isFile() ) {        
              FileInputStream fs = new FileInputStream(tempFile);
              byte[] bom = new byte[3];          
              fs.read(bom, 0, 3);
              if ( byteToHex(bom).equals("EFBBBF") ) {
                byte b[] = new byte[(int)tempFile.length()];
                int leng = 0;
                if( (leng = fs.read(b)) > 0 ){
                  FileOutputStream os = new FileOutputStream(tempFile.getAbsolutePath());
                  os.write(b,0,leng);
                  os.close();
                }
              }
              fs.close();
          } else if ( tempFile.isDirectory() ) {
            fncBomDelete(tempFile);
          }
        }
  } catch ( Exception e ) {
    e.fillInStackTrace();
  }
}
private static synchronized String byteToHex(byte[] data) {
  StringBuffer buf = new StringBuffer();
  for ( int i = 0; i < data.length;  i++) {
    buf.append(byteToHex(data[i]).toUpperCase());
  }
  return buf.toString();
}

private static synchronized String byteToHex(byte data) {
  StringBuffer buf = new StringBuffer();
  buf.append(toHexChar((data >>> 4) & 0x0F));
  buf.append(toHexChar(data & 0x0F));
  return buf.toString();
}

private static synchronized char toHexChar(int i) {
  if ((i >= 0) && (i <= 9)) {
    return (char) ('0' + i);
  } else {
    return (char) ('a' + (i -10));
  }
}

 


 

-------------------------------------------------------------------------------------------------

출처: http://www.omnibuscode.com/zeroboard/zboard.php?id=seeyou_programing_java&page=1&sn1=on&divpage=1&sn=on&ss=off&sc=off&keyword=Cugain&select_arrange=headnum&desc=asc&no=178

-------------------------------------------------------------------------------------------------

 

        String filePath = "C:\ReadMe.xml";   
        File file = new File(filePath);   
  
        if (file != null) {   
               
            // BOM 이 존재한다면 BOM을 제거하여 새로운 파일을 작성한다.(UTF-8)   
            DataInputStream dataInputStream = new DataInputStream(new FileInputStream(filePath));       
            int firstChar = dataInputStream.readByte();   
            int secondChar = dataInputStream.readByte();   
            int thirdChar = dataInputStream.readByte();   
               
            if (firstChar == -17 && secondChar == -69 && thirdChar == -65) {   
                DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(filePath+"_nobom"));       
                int i;   
                while ((i = dataInputStream.read()) != -1) {   
                    dataOutputStream.writeByte(i);   
                }   
                dataOutputStream.close();   
                file = new File(filePath+"_nobom");   
            }   
            dataInputStream.close();   
        } 

 

 

  /**  
   * BOM이 제거된 파일을 임시로 생성하여 리턴한다  
   * @param file  
   * @return  
   * @throws FileNotFoundException  
   * @throws IOException  
   */  
  public static File removeBOM(File file) throws FileNotFoundException, IOException {   
       
    DataInputStream dataInputStream = null;   
    try {   
    dataInputStream = new DataInputStream(new FileInputStream(file));          
       int firstChar = dataInputStream.readByte();      
       int secondChar = dataInputStream.readByte();      
       int thirdChar = dataInputStream.readByte();      
             
       if (firstChar == -17 && secondChar == -69 && thirdChar == -65) {   
        String filePath = file.getAbsolutePath();   
           
        DataOutputStream dataOutputStream = null;   
           
        try {   
            dataOutputStream = new DataOutputStream(new FileOutputStream(filePath+"_nobom"));          
            int i;      
            while ((i = dataInputStream.read()) != -1) {      
                dataOutputStream.writeByte(i);      
            }   
        } finally {   
            if (dataOutputStream != null) {   
                dataOutputStream.close();   
            }   
        }   
                 
        file = new File(filePath+"_nobom");      
       }      
    } finally {   
       if (dataInputStream != null) {   
         dataInputStream.close();   
       }   
    }   
         
    return file;   
       
  } 

 

 

참고사항 : BOM은 다음과 같은 형태가 존재한다.

               (모든 UTF형태의 문서에서 BOM검사를 수행하려면 다음을 참고한다.)

 

 

  00 00 FE FF         UTF-32, big-endian
  FF FE 00 00         UTF-32, little-endian
  FE FF                 UTF-16, big-endian
  FF FE                 UTF-16, little-endian
  EF BB BF            UTF-8