001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.gzip;
020
021import java.io.IOException;
022import java.io.OutputStream;
023import java.net.URI;
024import java.net.URISyntaxException;
025import java.nio.ByteBuffer;
026import java.nio.ByteOrder;
027import java.nio.charset.StandardCharsets;
028import java.util.zip.CRC32;
029import java.util.zip.Deflater;
030import java.util.zip.GZIPInputStream;
031import java.util.zip.GZIPOutputStream;
032
033import org.apache.commons.compress.compressors.CompressorOutputStream;
034
035/**
036 * Compressed output stream using the gzip format. This implementation improves over the standard {@link GZIPOutputStream} class by allowing the configuration
037 * of the compression level and the header metadata (file name, comment, modification time, operating system and extra flags).
038 *
039 * @see <a href="https://tools.ietf.org/html/rfc1952">GZIP File Format Specification</a>
040 */
041public class GzipCompressorOutputStream extends CompressorOutputStream<OutputStream> {
042
043    /** Header flag indicating a file name follows the header */
044    private static final int FNAME = 1 << 3;
045
046    /** Header flag indicating a comment follows the header */
047    private static final int FCOMMENT = 1 << 4;
048
049    /** Deflater used to compress the data */
050    private final Deflater deflater;
051
052    /** The buffer receiving the compressed data from the deflater */
053    private final byte[] deflateBuffer;
054
055    /** Indicates if the stream has been closed */
056    private boolean closed;
057
058    /** The checksum of the uncompressed data */
059    private final CRC32 crc = new CRC32();
060
061    /**
062     * Creates a gzip compressed output stream with the default parameters.
063     *
064     * @param out the stream to compress to
065     * @throws IOException if writing fails
066     */
067    public GzipCompressorOutputStream(final OutputStream out) throws IOException {
068        this(out, new GzipParameters());
069    }
070
071    /**
072     * Creates a gzip compressed output stream with the specified parameters.
073     *
074     * @param out        the stream to compress to
075     * @param parameters the parameters to use
076     * @throws IOException if writing fails
077     *
078     * @since 1.7
079     */
080    public GzipCompressorOutputStream(final OutputStream out, final GzipParameters parameters) throws IOException {
081        super(out);
082        this.deflater = new Deflater(parameters.getCompressionLevel(), true);
083        this.deflater.setStrategy(parameters.getDeflateStrategy());
084        this.deflateBuffer = new byte[parameters.getBufferSize()];
085        writeHeader(parameters);
086    }
087
088    @Override
089    public void close() throws IOException {
090        if (!closed) {
091            try {
092                finish();
093            } finally {
094                deflater.end();
095                out.close();
096                closed = true;
097            }
098        }
099    }
100
101    private void deflate() throws IOException {
102        final int length = deflater.deflate(deflateBuffer, 0, deflateBuffer.length);
103        if (length > 0) {
104            out.write(deflateBuffer, 0, length);
105        }
106    }
107
108    /**
109     * Finishes writing compressed data to the underlying stream without closing it.
110     *
111     * @since 1.7
112     * @throws IOException on error
113     */
114    public void finish() throws IOException {
115        if (!deflater.finished()) {
116            deflater.finish();
117
118            while (!deflater.finished()) {
119                deflate();
120            }
121
122            writeTrailer();
123        }
124    }
125
126    /**
127     * Gets the bytes encoded in the {@value GzipUtils#GZIP_ENCODING} Charset.
128     * <p>
129     * If the string cannot be encoded directly with {@value GzipUtils#GZIP_ENCODING}, then use URI-style percent encoding.
130     * </p>
131     *
132     * @param string The string to encode.
133     * @return
134     * @throws IOException
135     */
136    private byte[] getBytes(final String string) throws IOException {
137        if (GzipUtils.GZIP_ENCODING.newEncoder().canEncode(string)) {
138            return string.getBytes(GzipUtils.GZIP_ENCODING);
139        }
140        try {
141            return new URI(null, null, string, null).toASCIIString().getBytes(StandardCharsets.US_ASCII);
142        } catch (final URISyntaxException e) {
143            throw new IOException(string, e);
144        }
145    }
146
147    /**
148     * {@inheritDoc}
149     *
150     * @since 1.1
151     */
152    @Override
153    public void write(final byte[] buffer) throws IOException {
154        write(buffer, 0, buffer.length);
155    }
156
157    /**
158     * {@inheritDoc}
159     *
160     * @since 1.1
161     */
162    @Override
163    public void write(final byte[] buffer, final int offset, final int length) throws IOException {
164        if (deflater.finished()) {
165            throw new IOException("Cannot write more data, the end of the compressed data stream has been reached");
166        }
167        if (length > 0) {
168            deflater.setInput(buffer, offset, length);
169
170            while (!deflater.needsInput()) {
171                deflate();
172            }
173
174            crc.update(buffer, offset, length);
175        }
176    }
177
178    @Override
179    public void write(final int b) throws IOException {
180        write(new byte[] { (byte) (b & 0xff) }, 0, 1);
181    }
182
183    private void writeHeader(final GzipParameters parameters) throws IOException {
184        final String fileName = parameters.getFileName();
185        final String comment = parameters.getComment();
186
187        final ByteBuffer buffer = ByteBuffer.allocate(10);
188        buffer.order(ByteOrder.LITTLE_ENDIAN);
189        buffer.putShort((short) GZIPInputStream.GZIP_MAGIC);
190        buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate)
191        buffer.put((byte) ((fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
192        buffer.putInt((int) (parameters.getModificationTime() / 1000));
193
194        // extra flags
195        final int compressionLevel = parameters.getCompressionLevel();
196        if (compressionLevel == Deflater.BEST_COMPRESSION) {
197            buffer.put((byte) 2);
198        } else if (compressionLevel == Deflater.BEST_SPEED) {
199            buffer.put((byte) 4);
200        } else {
201            buffer.put((byte) 0);
202        }
203
204        buffer.put((byte) parameters.getOperatingSystem());
205
206        out.write(buffer.array());
207
208        if (fileName != null) {
209            out.write(getBytes(fileName));
210            out.write(0);
211        }
212
213        if (comment != null) {
214            out.write(getBytes(comment));
215            out.write(0);
216        }
217    }
218
219    private void writeTrailer() throws IOException {
220        final ByteBuffer buffer = ByteBuffer.allocate(8);
221        buffer.order(ByteOrder.LITTLE_ENDIAN);
222        buffer.putInt((int) crc.getValue());
223        buffer.putInt(deflater.getTotalIn());
224
225        out.write(buffer.array());
226    }
227
228}