1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.gzip;
20
21 import java.io.BufferedInputStream;
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.EOFException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.util.zip.CRC32;
29 import java.util.zip.DataFormatException;
30 import java.util.zip.Deflater;
31 import java.util.zip.Inflater;
32
33 import org.apache.commons.compress.compressors.CompressorInputStream;
34 import org.apache.commons.compress.utils.ByteUtils;
35 import org.apache.commons.compress.utils.InputStreamStatistics;
36 import org.apache.commons.io.input.BoundedInputStream;
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 public class GzipCompressorInputStream extends CompressorInputStream implements InputStreamStatistics {
68
69
70
71 private static final int FHCRC = 0x02;
72 private static final int FEXTRA = 0x04;
73 private static final int FNAME = 0x08;
74 private static final int FCOMMENT = 0x10;
75 private static final int FRESERVED = 0xE0;
76
77
78
79
80
81
82
83
84
85
86 public static boolean matches(final byte[] signature, final int length) {
87 return length >= 2 && signature[0] == 31 && signature[1] == -117;
88 }
89
90 private static byte[] readToNull(final DataInput inData) throws IOException {
91 try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
92 int b;
93 while ((b = inData.readUnsignedByte()) != 0) {
94 bos.write(b);
95 }
96 return bos.toByteArray();
97 }
98 }
99
100 private final BoundedInputStream countingStream;
101
102
103
104 private final InputStream in;
105
106
107 private final boolean decompressConcatenated;
108
109
110 private final byte[] buf = new byte[8192];
111
112
113 private int bufUsed;
114
115
116 private Inflater inf = new Inflater(true);
117
118
119 private final CRC32 crc = new CRC32();
120
121
122 private boolean endReached;
123
124
125 private final byte[] oneByte = new byte[1];
126
127 private final GzipParameters parameters = new GzipParameters();
128
129
130
131
132
133
134
135
136
137
138 public GzipCompressorInputStream(final InputStream inputStream) throws IOException {
139 this(inputStream, false);
140 }
141
142
143
144
145
146
147
148
149
150
151
152
153
154 public GzipCompressorInputStream(final InputStream inputStream, final boolean decompressConcatenated) throws IOException {
155 countingStream = BoundedInputStream.builder().setInputStream(inputStream).get();
156
157
158 if (countingStream.markSupported()) {
159 in = countingStream;
160 } else {
161 in = new BufferedInputStream(countingStream);
162 }
163
164 this.decompressConcatenated = decompressConcatenated;
165 init(true);
166 }
167
168
169
170
171
172
173 @Override
174 public void close() throws IOException {
175 if (inf != null) {
176 inf.end();
177 inf = null;
178 }
179
180 if (this.in != System.in) {
181 this.in.close();
182 }
183 }
184
185
186
187
188 @Override
189 public long getCompressedCount() {
190 return countingStream.getCount();
191 }
192
193
194
195
196
197
198
199 public GzipParameters getMetaData() {
200 return parameters;
201 }
202
203 private boolean init(final boolean isFirstMember) throws IOException {
204 if (!isFirstMember && !decompressConcatenated) {
205 throw new IllegalStateException("Unexpected: isFirstMember and decompressConcatenated are both false!");
206 }
207
208
209 final int magic0 = in.read();
210
211
212
213 if (magic0 == -1 && !isFirstMember) {
214 return false;
215 }
216
217 if (magic0 != 31 || in.read() != 139) {
218 throw new IOException(isFirstMember ? "Input is not in the .gz format" : "Garbage after a valid .gz stream");
219 }
220
221
222 final DataInput inData = new DataInputStream(in);
223 final int method = inData.readUnsignedByte();
224 if (method != Deflater.DEFLATED) {
225 throw new IOException("Unsupported compression method " + method + " in the .gz header");
226 }
227
228 final int flg = inData.readUnsignedByte();
229 if ((flg & FRESERVED) != 0) {
230 throw new IOException("Reserved flags are set in the .gz header");
231 }
232
233 parameters.setModificationTime(ByteUtils.fromLittleEndian(inData, 4) * 1000);
234 switch (inData.readUnsignedByte()) {
235 case 2:
236 parameters.setCompressionLevel(Deflater.BEST_COMPRESSION);
237 break;
238 case 4:
239 parameters.setCompressionLevel(Deflater.BEST_SPEED);
240 break;
241 default:
242
243 break;
244 }
245 parameters.setOperatingSystem(inData.readUnsignedByte());
246
247
248 if ((flg & FEXTRA) != 0) {
249 int xlen = inData.readUnsignedByte();
250 xlen |= inData.readUnsignedByte() << 8;
251
252
253
254
255 while (xlen-- > 0) {
256 inData.readUnsignedByte();
257 }
258 }
259
260
261 if ((flg & FNAME) != 0) {
262 parameters.setFileName(new String(readToNull(inData), GzipUtils.GZIP_ENCODING));
263 }
264
265
266 if ((flg & FCOMMENT) != 0) {
267 parameters.setComment(new String(readToNull(inData), GzipUtils.GZIP_ENCODING));
268 }
269
270
271
272
273
274
275 if ((flg & FHCRC) != 0) {
276 inData.readShort();
277 }
278
279
280 inf.reset();
281 crc.reset();
282
283 return true;
284 }
285
286 @Override
287 public int read() throws IOException {
288 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
289 }
290
291
292
293
294
295
296 @Override
297 public int read(final byte[] b, int off, int len) throws IOException {
298 if (len == 0) {
299 return 0;
300 }
301 if (endReached) {
302 return -1;
303 }
304
305 int size = 0;
306
307 while (len > 0) {
308 if (inf.needsInput()) {
309
310
311 in.mark(buf.length);
312
313 bufUsed = in.read(buf);
314 if (bufUsed == -1) {
315 throw new EOFException();
316 }
317
318 inf.setInput(buf, 0, bufUsed);
319 }
320
321 final int ret;
322 try {
323 ret = inf.inflate(b, off, len);
324 } catch (final DataFormatException e) {
325 throw new IOException("Gzip-compressed data is corrupt");
326 }
327
328 crc.update(b, off, ret);
329 off += ret;
330 len -= ret;
331 size += ret;
332 count(ret);
333
334 if (inf.finished()) {
335
336
337 in.reset();
338
339 final int skipAmount = bufUsed - inf.getRemaining();
340 if (org.apache.commons.io.IOUtils.skip(in, skipAmount) != skipAmount) {
341 throw new IOException();
342 }
343
344 bufUsed = 0;
345
346 final DataInput inData = new DataInputStream(in);
347
348
349 final long crcStored = ByteUtils.fromLittleEndian(inData, 4);
350
351 if (crcStored != crc.getValue()) {
352 throw new IOException("Gzip-compressed data is corrupt " + "(CRC32 error)");
353 }
354
355
356 final long isize = ByteUtils.fromLittleEndian(inData, 4);
357
358 if (isize != (inf.getBytesWritten() & 0xffffffffL)) {
359 throw new IOException("Gzip-compressed data is corrupt" + "(uncompressed size mismatch)");
360 }
361
362
363 if (!decompressConcatenated || !init(false)) {
364 inf.end();
365 inf = null;
366 endReached = true;
367 return size == 0 ? -1 : size;
368 }
369 }
370 }
371
372 return size;
373 }
374 }