001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.z; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.compressors.lzw.LZWInputStream; 026 027/** 028 * Input stream that decompresses .Z files. 029 * 030 * @NotThreadSafe 031 * @since 1.7 032 */ 033public class ZCompressorInputStream extends LZWInputStream { 034 private static final int MAGIC_1 = 0x1f; 035 private static final int MAGIC_2 = 0x9d; 036 private static final int BLOCK_MODE_MASK = 0x80; 037 private static final int MAX_CODE_SIZE_MASK = 0x1f; 038 039 /** 040 * Checks if the signature matches what is expected for a UNIX compress file. 041 * 042 * @param signature the bytes to check 043 * @param length the number of bytes to check 044 * @return true, if this stream is a UNIX compress compressed stream, false otherwise 045 * 046 * @since 1.9 047 */ 048 public static boolean matches(final byte[] signature, final int length) { 049 return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2; 050 } 051 052 private final boolean blockMode; 053 private final int maxCodeSize; 054 055 private long totalCodesRead; 056 057 public ZCompressorInputStream(final InputStream inputStream) throws IOException { 058 this(inputStream, -1); 059 } 060 061 public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb) throws IOException { 062 super(inputStream, ByteOrder.LITTLE_ENDIAN); 063 final int firstByte = (int) in.readBits(8); 064 final int secondByte = (int) in.readBits(8); 065 final int thirdByte = (int) in.readBits(8); 066 if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) { 067 throw new IOException("Input is not in .Z format"); 068 } 069 blockMode = (thirdByte & BLOCK_MODE_MASK) != 0; 070 maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK; 071 if (blockMode) { 072 setClearCode(DEFAULT_CODE_SIZE); 073 } 074 initializeTables(maxCodeSize, memoryLimitInKb); 075 clearEntries(); 076 } 077 078 /** 079 * {@inheritDoc} 080 * <p> 081 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without 082 * warning.</strong> 083 * </p> 084 */ 085 @Override 086 protected int addEntry(final int previousCode, final byte character) throws IOException { 087 final int maxTableSize = 1 << getCodeSize(); 088 final int r = addEntry(previousCode, character, maxTableSize); 089 if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) { 090 reAlignReading(); 091 incrementCodeSize(); 092 } 093 return r; 094 } 095 096 private void clearEntries() { 097 setTableSize((1 << 8) + (blockMode ? 1 : 0)); 098 } 099 100 /** 101 * {@inheritDoc} 102 * <p> 103 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without 104 * warning.</strong> 105 * </p> 106 */ 107 @Override 108 protected int decompressNextSymbol() throws IOException { 109 // 110 // table entry table entry 111 // _____________ _____ 112 // table entry / \ / \ 113 // ____________/ \ \ 114 // / / \ / \ \ 115 // +---+---+---+---+---+---+---+---+---+---+ 116 // | . | . | . | . | . | . | . | . | . | . | 117 // +---+---+---+---+---+---+---+---+---+---+ 118 // |<--------->|<------------->|<----->|<->| 119 // symbol symbol symbol symbol 120 // 121 final int code = readNextCode(); 122 if (code < 0) { 123 return -1; 124 } 125 if (blockMode && code == getClearCode()) { 126 clearEntries(); 127 reAlignReading(); 128 resetCodeSize(); 129 resetPreviousCode(); 130 return 0; 131 } 132 boolean addedUnfinishedEntry = false; 133 if (code == getTableSize()) { 134 addRepeatOfPreviousCode(); 135 addedUnfinishedEntry = true; 136 } else if (code > getTableSize()) { 137 throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code)); 138 } 139 return expandCodeToOutputStack(code, addedUnfinishedEntry); 140 } 141 142 /** 143 * {@inheritDoc} 144 * <p> 145 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without 146 * warning.</strong> 147 * </p> 148 */ 149 @Override 150 protected int readNextCode() throws IOException { 151 final int code = super.readNextCode(); 152 if (code >= 0) { 153 ++totalCodesRead; 154 } 155 return code; 156 } 157 158 private void reAlignReading() throws IOException { 159 // "compress" works in multiples of 8 symbols, each codeBits bits long. 160 // When codeBits changes, the remaining unused symbols in the current 161 // group of 8 are still written out, in the old codeSize, 162 // as garbage values (usually zeroes) that need to be skipped. 163 long codeReadsToThrowAway = 8 - totalCodesRead % 8; 164 if (codeReadsToThrowAway == 8) { 165 codeReadsToThrowAway = 0; 166 } 167 for (long i = 0; i < codeReadsToThrowAway; i++) { 168 readNextCode(); 169 } 170 in.clearBitCache(); 171 } 172 173}