001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.z;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.compressors.lzw.LZWInputStream;
026
027/**
028 * Input stream that decompresses .Z files.
029 *
030 * @NotThreadSafe
031 * @since 1.7
032 */
033public class ZCompressorInputStream extends LZWInputStream {
034    private static final int MAGIC_1 = 0x1f;
035    private static final int MAGIC_2 = 0x9d;
036    private static final int BLOCK_MODE_MASK = 0x80;
037    private static final int MAX_CODE_SIZE_MASK = 0x1f;
038
039    /**
040     * Checks if the signature matches what is expected for a UNIX compress file.
041     *
042     * @param signature the bytes to check
043     * @param length    the number of bytes to check
044     * @return true, if this stream is a UNIX compress compressed stream, false otherwise
045     *
046     * @since 1.9
047     */
048    public static boolean matches(final byte[] signature, final int length) {
049        return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
050    }
051
052    private final boolean blockMode;
053    private final int maxCodeSize;
054
055    private long totalCodesRead;
056
057    public ZCompressorInputStream(final InputStream inputStream) throws IOException {
058        this(inputStream, -1);
059    }
060
061    public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb) throws IOException {
062        super(inputStream, ByteOrder.LITTLE_ENDIAN);
063        final int firstByte = (int) in.readBits(8);
064        final int secondByte = (int) in.readBits(8);
065        final int thirdByte = (int) in.readBits(8);
066        if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
067            throw new IOException("Input is not in .Z format");
068        }
069        blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
070        maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
071        if (blockMode) {
072            setClearCode(DEFAULT_CODE_SIZE);
073        }
074        initializeTables(maxCodeSize, memoryLimitInKb);
075        clearEntries();
076    }
077
078    /**
079     * {@inheritDoc}
080     * <p>
081     * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
082     * warning.</strong>
083     * </p>
084     */
085    @Override
086    protected int addEntry(final int previousCode, final byte character) throws IOException {
087        final int maxTableSize = 1 << getCodeSize();
088        final int r = addEntry(previousCode, character, maxTableSize);
089        if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) {
090            reAlignReading();
091            incrementCodeSize();
092        }
093        return r;
094    }
095
096    private void clearEntries() {
097        setTableSize((1 << 8) + (blockMode ? 1 : 0));
098    }
099
100    /**
101     * {@inheritDoc}
102     * <p>
103     * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
104     * warning.</strong>
105     * </p>
106     */
107    @Override
108    protected int decompressNextSymbol() throws IOException {
109        //
110        // table entry table entry
111        // _____________ _____
112        // table entry / \ / \
113        // ____________/ \ \
114        // / / \ / \ \
115        // +---+---+---+---+---+---+---+---+---+---+
116        // | . | . | . | . | . | . | . | . | . | . |
117        // +---+---+---+---+---+---+---+---+---+---+
118        // |<--------->|<------------->|<----->|<->|
119        // symbol symbol symbol symbol
120        //
121        final int code = readNextCode();
122        if (code < 0) {
123            return -1;
124        }
125        if (blockMode && code == getClearCode()) {
126            clearEntries();
127            reAlignReading();
128            resetCodeSize();
129            resetPreviousCode();
130            return 0;
131        }
132        boolean addedUnfinishedEntry = false;
133        if (code == getTableSize()) {
134            addRepeatOfPreviousCode();
135            addedUnfinishedEntry = true;
136        } else if (code > getTableSize()) {
137            throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code));
138        }
139        return expandCodeToOutputStack(code, addedUnfinishedEntry);
140    }
141
142    /**
143     * {@inheritDoc}
144     * <p>
145     * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
146     * warning.</strong>
147     * </p>
148     */
149    @Override
150    protected int readNextCode() throws IOException {
151        final int code = super.readNextCode();
152        if (code >= 0) {
153            ++totalCodesRead;
154        }
155        return code;
156    }
157
158    private void reAlignReading() throws IOException {
159        // "compress" works in multiples of 8 symbols, each codeBits bits long.
160        // When codeBits changes, the remaining unused symbols in the current
161        // group of 8 are still written out, in the old codeSize,
162        // as garbage values (usually zeroes) that need to be skipped.
163        long codeReadsToThrowAway = 8 - totalCodesRead % 8;
164        if (codeReadsToThrowAway == 8) {
165            codeReadsToThrowAway = 0;
166        }
167        for (long i = 0; i < codeReadsToThrowAway; i++) {
168            readNextCode();
169        }
170        in.clearBitCache();
171    }
172
173}