-
Bug
-
Resolution: Not an Issue
-
P4
-
None
-
17, 23, 24
-
generic
-
generic
ADDITIONAL SYSTEM INFORMATION :
jdk 17, 21, 23 on windows 10
A DESCRIPTION OF THE PROBLEM :
Deflater outputs excess bytes under specific conditions.
I can reproduce with a FileChannel.transferTo(), but not with simple loop with bytebuffers.
With code below. You can feed a small file, as little as 100-150 bytes and it still shows the issue. Stepping through the Deflater code, upon finishing, the result of the call
result = deflateBytesBuffer(...) near line 765 (jdk 17) returns a value, whose bits gets shifted to find the bytes read and written (line 782/783). The "written" value is definitely wrong. The side effect are that the deflater produces more bytes, mostly repeating a portion of what it has. This is why I used the NO_COMPRESSION, so that with a simple short text file, I could detect the repeated pattern.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Write a small txt files of a few line, although any file would do, it is preferable to have a small one to examine later. Run the program with this file in argument, or no argument if the file is named "x".
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
The 2 .gz files should have the same size and be valid .gzip files.
ACTUAL -
One of the file is corrupted because it contains too many bytes (repeated portion of the deflater output). The other file is fine. The outputs should be identical since the same input was provided. Only the byte feeding technique was different.
debugging the Deflater code, shows a distinct output from the native method deflateBytesBuffer(..)
---------- BEGIN SOURCE ----------
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.channels.SelectableChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.file.StandardOpenOption;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.GZIPInputStream;
public class TestDeflaterBug2 {
static final boolean GZ = true;
public static void main(String[] args) throws IOException {
int complevel = Deflater.NO_COMPRESSION;
//int complevel = Deflater.DEFAULT_COMPRESSION;
test(args, complevel, false);
test(args, complevel, true);
}
static void test(String[] args, int complevel, boolean useTrfTo) throws IOException {
String inputFilename = args.length>0 ? args[0] : "x";
File f1 = new File(inputFilename);
if (!f1.isFile())
throw new IllegalArgumentException("input file does not exist: " + f1);
String outputFilename = inputFilename+"-L"+(complevel<0?"d":complevel)+(useTrfTo?"-trfto":"-bbloop");
File f2 = new File(outputFilename+ (GZ?".gz":".deflated"));
File dir = f2.getParentFile();
if (dir != null && !dir.exists() && !dir.mkdirs() && !dir.exists()) {
throw new IllegalArgumentException("output file parent directory does not exist or cannot be created");
}
System.out.println(f2);
try (FileChannel inCh = FileChannel.open(f1.toPath(), StandardOpenOption.READ)) {
try (FileChannel outCh = FileChannel.open(f2.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
outCh.truncate(0);
try (GzipWritableChannel deflChannel = new GzipWritableChannel(outCh, complevel)) {
long t = System.currentTimeMillis();
/////////////////////////////
if(useTrfTo) {
long pos = 0;
long remaining = inCh.size();
while(remaining>0) {
long len = inCh.transferTo(pos, remaining, deflChannel);
remaining -= len;
pos += len;
}
} else {
//ByteBuffer inBB = ByteBuffer.allocateDirect(8192);
ByteBuffer inBB = ByteBuffer.allocate(8192);
long lastDot = 0;
long pos = 0;
long end = inCh.size();
while(pos<end) {
inBB.clear();
int qty = inCh.read(inBB);
pos += qty;
inBB.flip();
while(inBB.hasRemaining())
deflChannel.write(inBB);
long currentDot = pos >>> 20;
for (long d = lastDot + 1; d <= currentDot; d++) {
System.out.print('.');
if (d % 200 == 199)
System.out.println();
}
lastDot = currentDot;
}
}
deflChannel.finish();
/////////////////////////////
long d = Math.max(1,System.currentTimeMillis() - t);
System.out.println("\tTook " + d + " ms, at ~ " + (inCh.size() / d) + " kB/s into "+f2.length()+" bytes");
System.out.println("\tHex view it with cli command:");
System.out.println("\t\tod -A x -t x1z -v "+f2);
}
}
}
}
static class GzipWritableChannel implements WritableByteChannel {
private final Deflater deflater;
int compressionLevel;
private final ByteBuffer workBuffer;
private final CRC32 crc = new CRC32();
private boolean closed;
private final WritableByteChannel out;
public GzipWritableChannel(WritableByteChannel out, int compressionLevel) throws IOException {
this.out = out;
if (out instanceof SelectableChannel && !((SelectableChannel)out).isBlocking())
throw new IllegalArgumentException("SelectableChannel in non-blocking mode not supported");
this.deflater = new Deflater(compressionLevel, true);
this.workBuffer = ByteBuffer.allocateDirect(0xffff + 2);
if(GZ)
writeHeader();
}
private void writeHeader() throws IOException {
workBuffer.clear().order(ByteOrder.LITTLE_ENDIAN);
workBuffer.putShort((short)GZIPInputStream.GZIP_MAGIC); //ID1 ID2
workBuffer.put((byte)Deflater.DEFLATED); //CM
workBuffer.put((byte)0);
workBuffer.putInt(0);//MTIME
workBuffer.put((byte)switch (compressionLevel) { //XFL
case Deflater.BEST_COMPRESSION -> 2;
case Deflater.BEST_SPEED -> 4;
default -> 0;
});
workBuffer.put((byte)-1); //OS
workBuffer.flip();
while(workBuffer.hasRemaining())
out.write(workBuffer);
}
@Override
public boolean isOpen() {
return !closed;
}
@Override
public int write(ByteBuffer src) throws IOException {
if (deflater.finished())
throw new IOException("compression already finished");
int len = src.remaining();
if (len > 0) {
src.mark();
deflater.setInput(src);
while (!deflater.needsInput())
drainDeflaterOnce();
src.reset();
crc.update(src);
}
return len;
}
private void drainDeflaterOnce() throws IOException {
workBuffer.clear();
deflater.deflate(workBuffer);
workBuffer.flip();
while (workBuffer.hasRemaining()) //this is only efficient on blocking channel, else may spin fast...
out.write(workBuffer);
}
@Override
public void close() throws IOException {
if (closed)
return;
try {
finish();
} finally {
deflater.end();
out.close();
closed = true;
}
}
public void finish() throws IOException {
if (deflater.finished())
return;
deflater.finish();
while (!deflater.finished())
drainDeflaterOnce();
if(GZ)
writeFooter();
}
private void writeFooter() throws IOException {
workBuffer.clear().order(ByteOrder.LITTLE_ENDIAN);
workBuffer.putInt((int)crc.getValue());
workBuffer.putInt(deflater.getTotalIn());
workBuffer.flip();
while(workBuffer.hasRemaining())
out.write(workBuffer);
}
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
None. We cannot predict that a user of the deflater would be passing direct bytebuffer or not, and with the filechannel unexplained interference.
FREQUENCY : always
jdk 17, 21, 23 on windows 10
A DESCRIPTION OF THE PROBLEM :
Deflater outputs excess bytes under specific conditions.
I can reproduce with a FileChannel.transferTo(), but not with simple loop with bytebuffers.
With code below. You can feed a small file, as little as 100-150 bytes and it still shows the issue. Stepping through the Deflater code, upon finishing, the result of the call
result = deflateBytesBuffer(...) near line 765 (jdk 17) returns a value, whose bits gets shifted to find the bytes read and written (line 782/783). The "written" value is definitely wrong. The side effect are that the deflater produces more bytes, mostly repeating a portion of what it has. This is why I used the NO_COMPRESSION, so that with a simple short text file, I could detect the repeated pattern.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Write a small txt files of a few line, although any file would do, it is preferable to have a small one to examine later. Run the program with this file in argument, or no argument if the file is named "x".
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
The 2 .gz files should have the same size and be valid .gzip files.
ACTUAL -
One of the file is corrupted because it contains too many bytes (repeated portion of the deflater output). The other file is fine. The outputs should be identical since the same input was provided. Only the byte feeding technique was different.
debugging the Deflater code, shows a distinct output from the native method deflateBytesBuffer(..)
---------- BEGIN SOURCE ----------
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.channels.SelectableChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.file.StandardOpenOption;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.GZIPInputStream;
public class TestDeflaterBug2 {
static final boolean GZ = true;
public static void main(String[] args) throws IOException {
int complevel = Deflater.NO_COMPRESSION;
//int complevel = Deflater.DEFAULT_COMPRESSION;
test(args, complevel, false);
test(args, complevel, true);
}
static void test(String[] args, int complevel, boolean useTrfTo) throws IOException {
String inputFilename = args.length>0 ? args[0] : "x";
File f1 = new File(inputFilename);
if (!f1.isFile())
throw new IllegalArgumentException("input file does not exist: " + f1);
String outputFilename = inputFilename+"-L"+(complevel<0?"d":complevel)+(useTrfTo?"-trfto":"-bbloop");
File f2 = new File(outputFilename+ (GZ?".gz":".deflated"));
File dir = f2.getParentFile();
if (dir != null && !dir.exists() && !dir.mkdirs() && !dir.exists()) {
throw new IllegalArgumentException("output file parent directory does not exist or cannot be created");
}
System.out.println(f2);
try (FileChannel inCh = FileChannel.open(f1.toPath(), StandardOpenOption.READ)) {
try (FileChannel outCh = FileChannel.open(f2.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
outCh.truncate(0);
try (GzipWritableChannel deflChannel = new GzipWritableChannel(outCh, complevel)) {
long t = System.currentTimeMillis();
/////////////////////////////
if(useTrfTo) {
long pos = 0;
long remaining = inCh.size();
while(remaining>0) {
long len = inCh.transferTo(pos, remaining, deflChannel);
remaining -= len;
pos += len;
}
} else {
//ByteBuffer inBB = ByteBuffer.allocateDirect(8192);
ByteBuffer inBB = ByteBuffer.allocate(8192);
long lastDot = 0;
long pos = 0;
long end = inCh.size();
while(pos<end) {
inBB.clear();
int qty = inCh.read(inBB);
pos += qty;
inBB.flip();
while(inBB.hasRemaining())
deflChannel.write(inBB);
long currentDot = pos >>> 20;
for (long d = lastDot + 1; d <= currentDot; d++) {
System.out.print('.');
if (d % 200 == 199)
System.out.println();
}
lastDot = currentDot;
}
}
deflChannel.finish();
/////////////////////////////
long d = Math.max(1,System.currentTimeMillis() - t);
System.out.println("\tTook " + d + " ms, at ~ " + (inCh.size() / d) + " kB/s into "+f2.length()+" bytes");
System.out.println("\tHex view it with cli command:");
System.out.println("\t\tod -A x -t x1z -v "+f2);
}
}
}
}
static class GzipWritableChannel implements WritableByteChannel {
private final Deflater deflater;
int compressionLevel;
private final ByteBuffer workBuffer;
private final CRC32 crc = new CRC32();
private boolean closed;
private final WritableByteChannel out;
public GzipWritableChannel(WritableByteChannel out, int compressionLevel) throws IOException {
this.out = out;
if (out instanceof SelectableChannel && !((SelectableChannel)out).isBlocking())
throw new IllegalArgumentException("SelectableChannel in non-blocking mode not supported");
this.deflater = new Deflater(compressionLevel, true);
this.workBuffer = ByteBuffer.allocateDirect(0xffff + 2);
if(GZ)
writeHeader();
}
private void writeHeader() throws IOException {
workBuffer.clear().order(ByteOrder.LITTLE_ENDIAN);
workBuffer.putShort((short)GZIPInputStream.GZIP_MAGIC); //ID1 ID2
workBuffer.put((byte)Deflater.DEFLATED); //CM
workBuffer.put((byte)0);
workBuffer.putInt(0);//MTIME
workBuffer.put((byte)switch (compressionLevel) { //XFL
case Deflater.BEST_COMPRESSION -> 2;
case Deflater.BEST_SPEED -> 4;
default -> 0;
});
workBuffer.put((byte)-1); //OS
workBuffer.flip();
while(workBuffer.hasRemaining())
out.write(workBuffer);
}
@Override
public boolean isOpen() {
return !closed;
}
@Override
public int write(ByteBuffer src) throws IOException {
if (deflater.finished())
throw new IOException("compression already finished");
int len = src.remaining();
if (len > 0) {
src.mark();
deflater.setInput(src);
while (!deflater.needsInput())
drainDeflaterOnce();
src.reset();
crc.update(src);
}
return len;
}
private void drainDeflaterOnce() throws IOException {
workBuffer.clear();
deflater.deflate(workBuffer);
workBuffer.flip();
while (workBuffer.hasRemaining()) //this is only efficient on blocking channel, else may spin fast...
out.write(workBuffer);
}
@Override
public void close() throws IOException {
if (closed)
return;
try {
finish();
} finally {
deflater.end();
out.close();
closed = true;
}
}
public void finish() throws IOException {
if (deflater.finished())
return;
deflater.finish();
while (!deflater.finished())
drainDeflaterOnce();
if(GZ)
writeFooter();
}
private void writeFooter() throws IOException {
workBuffer.clear().order(ByteOrder.LITTLE_ENDIAN);
workBuffer.putInt((int)crc.getValue());
workBuffer.putInt(deflater.getTotalIn());
workBuffer.flip();
while(workBuffer.hasRemaining())
out.write(workBuffer);
}
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
None. We cannot predict that a user of the deflater would be passing direct bytebuffer or not, and with the filechannel unexplained interference.
FREQUENCY : always