-
Bug
-
Resolution: Not an Issue
-
P4
-
None
-
1.4.2
-
x86
-
windows_xp
FULL PRODUCT VERSION :
java version "1.4.2_08"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.2_08-b03)
Java HotSpot(TM) Client VM (build 1.4.2_08-b03, mixed mode)
ADDITIONAL OS VERSION INFORMATION :
Microsoft Windows XP [Version 5.1.2600]
A DESCRIPTION OF THE PROBLEM :
I have a binary file "test_jap_enc_dec.dat" which contains Japanese characters in Shift_JIS format. I'm reading all the bytes and converting that to a japanese String, using String Constructor String(buf, 0, len, "shift_jis" ); And back to a file test_jap_enc_dec.html using UTF-8. Here is the code:
import java.io.*;
import java.util.*;
public class StreamConverter {
static void writeOutput(String str) {
try {
FileOutputStream fos = new FileOutputStream("c:/test_jap_enc_dec.html");
Writer out = new OutputStreamWriter(fos, "UTF-8");
out.write(str);
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
static String readInput() {
StringBuffer buffer = new StringBuffer();
try {
FileInputStream fis = new FileInputStream("c:/test_jap_enc_dec.dat");
byte buf[] = new byte[900000];
int len = 0;
while(true) {
byte b[] = new byte[1];
int n = fis.read( b );
if( n>0 ) {
buf[len++] = b[0];
}
else
{
break;
}
}
return new String(buf, 0, len, "shift_jis" );
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) {
String inputString = readInput();
String displayString = inputString;
String outStr = "<HTML><HEAD><META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html;charset=UTF-8\"></HEAD>" +
"<BODY>" + displayString +"</BODY><HTML>";
writeOutput( outStr );
}
}
In the output file nothing has changed. I tried this with reading an UTF-8 encoded file, and writing back in UTF-8 and it works file . I think Java is not converting from Shift_JIS to UTF-8 corrcetly.
We found a temporary solution. In jdk 1.3 we found that shift_jis is mapped to MS932. But currently we are using jdk 1.4, so when we need to decode shift_jis to UTF-8 we are using MS932. But problem still exists with charset.jar( I guess ).
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
import java.io.*;
import java.util.*;
public class StreamConverter {
static void writeOutput(String str) {
try {
FileOutputStream fos = new FileOutputStream("c:/test_jap_enc_dec.html");
Writer out = new OutputStreamWriter(fos, "UTF-8");
out.write(str);
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
static String readInput() {
StringBuffer buffer = new StringBuffer();
try {
FileInputStream fis = new FileInputStream("c:/test_jap_enc_dec.dat");
byte buf[] = new byte[900000];
int len = 0;
while(true) {
byte b[] = new byte[1];
int n = fis.read( b );
if( n>0 ) {
buf[len++] = b[0];
}
else
{
break;
}
}
return new String(buf, 0, len, "shift_jis" );
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) {
String inputString = readInput();
String displayString = inputString;
String outStr = "<HTML><HEAD><META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html;charset=UTF-8\"></HEAD>" +
"<BODY>" + displayString +"</BODY><HTML>";
writeOutput( outStr );
}
}
---------- END SOURCE ----------
###@###.### 2005-06-13 11:53:08 GMT
java version "1.4.2_08"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.2_08-b03)
Java HotSpot(TM) Client VM (build 1.4.2_08-b03, mixed mode)
ADDITIONAL OS VERSION INFORMATION :
Microsoft Windows XP [Version 5.1.2600]
A DESCRIPTION OF THE PROBLEM :
I have a binary file "test_jap_enc_dec.dat" which contains Japanese characters in Shift_JIS format. I'm reading all the bytes and converting that to a japanese String, using String Constructor String(buf, 0, len, "shift_jis" ); And back to a file test_jap_enc_dec.html using UTF-8. Here is the code:
import java.io.*;
import java.util.*;
public class StreamConverter {
static void writeOutput(String str) {
try {
FileOutputStream fos = new FileOutputStream("c:/test_jap_enc_dec.html");
Writer out = new OutputStreamWriter(fos, "UTF-8");
out.write(str);
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
static String readInput() {
StringBuffer buffer = new StringBuffer();
try {
FileInputStream fis = new FileInputStream("c:/test_jap_enc_dec.dat");
byte buf[] = new byte[900000];
int len = 0;
while(true) {
byte b[] = new byte[1];
int n = fis.read( b );
if( n>0 ) {
buf[len++] = b[0];
}
else
{
break;
}
}
return new String(buf, 0, len, "shift_jis" );
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) {
String inputString = readInput();
String displayString = inputString;
String outStr = "<HTML><HEAD><META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html;charset=UTF-8\"></HEAD>" +
"<BODY>" + displayString +"</BODY><HTML>";
writeOutput( outStr );
}
}
In the output file nothing has changed. I tried this with reading an UTF-8 encoded file, and writing back in UTF-8 and it works file . I think Java is not converting from Shift_JIS to UTF-8 corrcetly.
We found a temporary solution. In jdk 1.3 we found that shift_jis is mapped to MS932. But currently we are using jdk 1.4, so when we need to decode shift_jis to UTF-8 we are using MS932. But problem still exists with charset.jar( I guess ).
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
import java.io.*;
import java.util.*;
public class StreamConverter {
static void writeOutput(String str) {
try {
FileOutputStream fos = new FileOutputStream("c:/test_jap_enc_dec.html");
Writer out = new OutputStreamWriter(fos, "UTF-8");
out.write(str);
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
static String readInput() {
StringBuffer buffer = new StringBuffer();
try {
FileInputStream fis = new FileInputStream("c:/test_jap_enc_dec.dat");
byte buf[] = new byte[900000];
int len = 0;
while(true) {
byte b[] = new byte[1];
int n = fis.read( b );
if( n>0 ) {
buf[len++] = b[0];
}
else
{
break;
}
}
return new String(buf, 0, len, "shift_jis" );
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) {
String inputString = readInput();
String displayString = inputString;
String outStr = "<HTML><HEAD><META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html;charset=UTF-8\"></HEAD>" +
"<BODY>" + displayString +"</BODY><HTML>";
writeOutput( outStr );
}
}
---------- END SOURCE ----------
###@###.### 2005-06-13 11:53:08 GMT