View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.gzip;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  import java.net.URI;
24  import java.net.URISyntaxException;
25  import java.nio.ByteBuffer;
26  import java.nio.ByteOrder;
27  import java.nio.charset.StandardCharsets;
28  import java.util.zip.CRC32;
29  import java.util.zip.Deflater;
30  import java.util.zip.GZIPInputStream;
31  import java.util.zip.GZIPOutputStream;
32  
33  import org.apache.commons.compress.compressors.CompressorOutputStream;
34  
35  /**
36   * Compressed output stream using the gzip format. This implementation improves over the standard {@link GZIPOutputStream} class by allowing the configuration
37   * of the compression level and the header metadata (file name, comment, modification time, operating system and extra flags).
38   *
39   * @see <a href="https://tools.ietf.org/html/rfc1952">GZIP File Format Specification</a>
40   */
41  public class GzipCompressorOutputStream extends CompressorOutputStream<OutputStream> {
42  
43      /** Header flag indicating a file name follows the header */
44      private static final int FNAME = 1 << 3;
45  
46      /** Header flag indicating a comment follows the header */
47      private static final int FCOMMENT = 1 << 4;
48  
49      /** Deflater used to compress the data */
50      private final Deflater deflater;
51  
52      /** The buffer receiving the compressed data from the deflater */
53      private final byte[] deflateBuffer;
54  
55      /** Indicates if the stream has been closed */
56      private boolean closed;
57  
58      /** The checksum of the uncompressed data */
59      private final CRC32 crc = new CRC32();
60  
61      /**
62       * Creates a gzip compressed output stream with the default parameters.
63       *
64       * @param out the stream to compress to
65       * @throws IOException if writing fails
66       */
67      public GzipCompressorOutputStream(final OutputStream out) throws IOException {
68          this(out, new GzipParameters());
69      }
70  
71      /**
72       * Creates a gzip compressed output stream with the specified parameters.
73       *
74       * @param out        the stream to compress to
75       * @param parameters the parameters to use
76       * @throws IOException if writing fails
77       *
78       * @since 1.7
79       */
80      public GzipCompressorOutputStream(final OutputStream out, final GzipParameters parameters) throws IOException {
81          super(out);
82          this.deflater = new Deflater(parameters.getCompressionLevel(), true);
83          this.deflater.setStrategy(parameters.getDeflateStrategy());
84          this.deflateBuffer = new byte[parameters.getBufferSize()];
85          writeHeader(parameters);
86      }
87  
88      @Override
89      public void close() throws IOException {
90          if (!closed) {
91              try {
92                  finish();
93              } finally {
94                  deflater.end();
95                  out.close();
96                  closed = true;
97              }
98          }
99      }
100 
101     private void deflate() throws IOException {
102         final int length = deflater.deflate(deflateBuffer, 0, deflateBuffer.length);
103         if (length > 0) {
104             out.write(deflateBuffer, 0, length);
105         }
106     }
107 
108     /**
109      * Finishes writing compressed data to the underlying stream without closing it.
110      *
111      * @since 1.7
112      * @throws IOException on error
113      */
114     public void finish() throws IOException {
115         if (!deflater.finished()) {
116             deflater.finish();
117 
118             while (!deflater.finished()) {
119                 deflate();
120             }
121 
122             writeTrailer();
123         }
124     }
125 
126     /**
127      * Gets the bytes encoded in the {@value GzipUtils#GZIP_ENCODING} Charset.
128      * <p>
129      * If the string cannot be encoded directly with {@value GzipUtils#GZIP_ENCODING}, then use URI-style percent encoding.
130      * </p>
131      *
132      * @param string The string to encode.
133      * @return
134      * @throws IOException
135      */
136     private byte[] getBytes(final String string) throws IOException {
137         if (GzipUtils.GZIP_ENCODING.newEncoder().canEncode(string)) {
138             return string.getBytes(GzipUtils.GZIP_ENCODING);
139         }
140         try {
141             return new URI(null, null, string, null).toASCIIString().getBytes(StandardCharsets.US_ASCII);
142         } catch (final URISyntaxException e) {
143             throw new IOException(string, e);
144         }
145     }
146 
147     /**
148      * {@inheritDoc}
149      *
150      * @since 1.1
151      */
152     @Override
153     public void write(final byte[] buffer) throws IOException {
154         write(buffer, 0, buffer.length);
155     }
156 
157     /**
158      * {@inheritDoc}
159      *
160      * @since 1.1
161      */
162     @Override
163     public void write(final byte[] buffer, final int offset, final int length) throws IOException {
164         if (deflater.finished()) {
165             throw new IOException("Cannot write more data, the end of the compressed data stream has been reached");
166         }
167         if (length > 0) {
168             deflater.setInput(buffer, offset, length);
169 
170             while (!deflater.needsInput()) {
171                 deflate();
172             }
173 
174             crc.update(buffer, offset, length);
175         }
176     }
177 
178     @Override
179     public void write(final int b) throws IOException {
180         write(new byte[] { (byte) (b & 0xff) }, 0, 1);
181     }
182 
183     private void writeHeader(final GzipParameters parameters) throws IOException {
184         final String fileName = parameters.getFileName();
185         final String comment = parameters.getComment();
186 
187         final ByteBuffer buffer = ByteBuffer.allocate(10);
188         buffer.order(ByteOrder.LITTLE_ENDIAN);
189         buffer.putShort((short) GZIPInputStream.GZIP_MAGIC);
190         buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate)
191         buffer.put((byte) ((fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
192         buffer.putInt((int) (parameters.getModificationTime() / 1000));
193 
194         // extra flags
195         final int compressionLevel = parameters.getCompressionLevel();
196         if (compressionLevel == Deflater.BEST_COMPRESSION) {
197             buffer.put((byte) 2);
198         } else if (compressionLevel == Deflater.BEST_SPEED) {
199             buffer.put((byte) 4);
200         } else {
201             buffer.put((byte) 0);
202         }
203 
204         buffer.put((byte) parameters.getOperatingSystem());
205 
206         out.write(buffer.array());
207 
208         if (fileName != null) {
209             out.write(getBytes(fileName));
210             out.write(0);
211         }
212 
213         if (comment != null) {
214             out.write(getBytes(comment));
215             out.write(0);
216         }
217     }
218 
219     private void writeTrailer() throws IOException {
220         final ByteBuffer buffer = ByteBuffer.allocate(8);
221         buffer.order(ByteOrder.LITTLE_ENDIAN);
222         buffer.putInt((int) crc.getValue());
223         buffer.putInt(deflater.getTotalIn());
224 
225         out.write(buffer.array());
226     }
227 
228 }