001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.EOFException; 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.Map; 025 026/** 027 * CodecEncoding is used to get the right Codec for a given meta-encoding. 028 */ 029public class CodecEncoding { 030 031 private static final int[] EMPTY_INT_ARRAY = {}; 032 033 /** 034 * The canonical encodings are defined to allow a single byte to represent one of the standard encodings. The following values are defined in the Pack200 035 * specification, and this array cannot be changed. 036 */ 037 private static final BHSDCodec[] canonicalCodec = { null, new BHSDCodec(1, 256), new BHSDCodec(1, 256, 1), new BHSDCodec(1, 256, 0, 1), 038 new BHSDCodec(1, 256, 1, 1), new BHSDCodec(2, 256), new BHSDCodec(2, 256, 1), new BHSDCodec(2, 256, 0, 1), new BHSDCodec(2, 256, 1, 1), 039 new BHSDCodec(3, 256), new BHSDCodec(3, 256, 1), new BHSDCodec(3, 256, 0, 1), new BHSDCodec(3, 256, 1, 1), new BHSDCodec(4, 256), 040 new BHSDCodec(4, 256, 1), new BHSDCodec(4, 256, 0, 1), new BHSDCodec(4, 256, 1, 1), new BHSDCodec(5, 4), new BHSDCodec(5, 4, 1), 041 new BHSDCodec(5, 4, 2), new BHSDCodec(5, 16), new BHSDCodec(5, 16, 1), new BHSDCodec(5, 16, 2), new BHSDCodec(5, 32), new BHSDCodec(5, 32, 1), 042 new BHSDCodec(5, 32, 2), new BHSDCodec(5, 64), new BHSDCodec(5, 64, 1), new BHSDCodec(5, 64, 2), new BHSDCodec(5, 128), new BHSDCodec(5, 128, 1), 043 new BHSDCodec(5, 128, 2), new BHSDCodec(5, 4, 0, 1), new BHSDCodec(5, 4, 1, 1), new BHSDCodec(5, 4, 2, 1), new BHSDCodec(5, 16, 0, 1), 044 new BHSDCodec(5, 16, 1, 1), new BHSDCodec(5, 16, 2, 1), new BHSDCodec(5, 32, 0, 1), new BHSDCodec(5, 32, 1, 1), new BHSDCodec(5, 32, 2, 1), 045 new BHSDCodec(5, 64, 0, 1), new BHSDCodec(5, 64, 1, 1), new BHSDCodec(5, 64, 2, 1), new BHSDCodec(5, 128, 0, 1), new BHSDCodec(5, 128, 1, 1), 046 new BHSDCodec(5, 128, 2, 1), new BHSDCodec(2, 192), new BHSDCodec(2, 224), new BHSDCodec(2, 240), new BHSDCodec(2, 248), new BHSDCodec(2, 252), 047 new BHSDCodec(2, 8, 0, 1), new BHSDCodec(2, 8, 1, 1), new BHSDCodec(2, 16, 0, 1), new BHSDCodec(2, 16, 1, 1), new BHSDCodec(2, 32, 0, 1), 048 new BHSDCodec(2, 32, 1, 1), new BHSDCodec(2, 64, 0, 1), new BHSDCodec(2, 64, 1, 1), new BHSDCodec(2, 128, 0, 1), new BHSDCodec(2, 128, 1, 1), 049 new BHSDCodec(2, 192, 0, 1), new BHSDCodec(2, 192, 1, 1), new BHSDCodec(2, 224, 0, 1), new BHSDCodec(2, 224, 1, 1), new BHSDCodec(2, 240, 0, 1), 050 new BHSDCodec(2, 240, 1, 1), new BHSDCodec(2, 248, 0, 1), new BHSDCodec(2, 248, 1, 1), new BHSDCodec(3, 192), new BHSDCodec(3, 224), 051 new BHSDCodec(3, 240), new BHSDCodec(3, 248), new BHSDCodec(3, 252), new BHSDCodec(3, 8, 0, 1), new BHSDCodec(3, 8, 1, 1), 052 new BHSDCodec(3, 16, 0, 1), new BHSDCodec(3, 16, 1, 1), new BHSDCodec(3, 32, 0, 1), new BHSDCodec(3, 32, 1, 1), new BHSDCodec(3, 64, 0, 1), 053 new BHSDCodec(3, 64, 1, 1), new BHSDCodec(3, 128, 0, 1), new BHSDCodec(3, 128, 1, 1), new BHSDCodec(3, 192, 0, 1), new BHSDCodec(3, 192, 1, 1), 054 new BHSDCodec(3, 224, 0, 1), new BHSDCodec(3, 224, 1, 1), new BHSDCodec(3, 240, 0, 1), new BHSDCodec(3, 240, 1, 1), new BHSDCodec(3, 248, 0, 1), 055 new BHSDCodec(3, 248, 1, 1), new BHSDCodec(4, 192), new BHSDCodec(4, 224), new BHSDCodec(4, 240), new BHSDCodec(4, 248), new BHSDCodec(4, 252), 056 new BHSDCodec(4, 8, 0, 1), new BHSDCodec(4, 8, 1, 1), new BHSDCodec(4, 16, 0, 1), new BHSDCodec(4, 16, 1, 1), new BHSDCodec(4, 32, 0, 1), 057 new BHSDCodec(4, 32, 1, 1), new BHSDCodec(4, 64, 0, 1), new BHSDCodec(4, 64, 1, 1), new BHSDCodec(4, 128, 0, 1), new BHSDCodec(4, 128, 1, 1), 058 new BHSDCodec(4, 192, 0, 1), new BHSDCodec(4, 192, 1, 1), new BHSDCodec(4, 224, 0, 1), new BHSDCodec(4, 224, 1, 1), new BHSDCodec(4, 240, 0, 1), 059 new BHSDCodec(4, 240, 1, 1), new BHSDCodec(4, 248, 0, 1), new BHSDCodec(4, 248, 1, 1) }; 060 061 private static Map<BHSDCodec, Integer> canonicalCodecsToSpecifiers; 062 063 static { 064 final HashMap<BHSDCodec, Integer> reverseMap = new HashMap<>(canonicalCodec.length); 065 for (int i = 0; i < canonicalCodec.length; i++) { 066 reverseMap.put(canonicalCodec[i], Integer.valueOf(i)); 067 } 068 canonicalCodecsToSpecifiers = reverseMap; 069 } 070 071 public static BHSDCodec getCanonicalCodec(final int i) { 072 return canonicalCodec[i]; 073 } 074 075 /** 076 * Gets the codec specified by the given value byte and optional byte header. If the value is >= 116, then bytes may be consumed from the secondary 077 * input stream, which is taken to be the contents of the band_headers byte array. Since the values from this are consumed and not repeated, the input 078 * stream should be reused for subsequent encodings. This does not therefore close the input stream. 079 * 080 * @param value the canonical encoding value 081 * @param in the input stream to read additional byte headers from 082 * @param defaultCodec TODO 083 * @return the corresponding codec, or {@code null} if the default should be used 084 * 085 * @throws IOException if there is a problem reading from the input stream (which in reality, is never, since the band_headers are likely stored in a 086 * byte array and accessed via a ByteArrayInputStream. However, an EOFException could occur if things go wrong) 087 * @throws Pack200Exception TODO 088 */ 089 public static Codec getCodec(final int value, final InputStream in, final Codec defaultCodec) throws IOException, Pack200Exception { 090 // Sanity check to make sure that no-one has changed 091 // the canonical codecs, which would really cause havoc 092 if (canonicalCodec.length != 116) { 093 throw new Error("Canonical encodings have been incorrectly modified"); 094 } 095 if (value < 0) { 096 throw new IllegalArgumentException("Encoding cannot be less than zero"); 097 } 098 if (value == 0) { 099 return defaultCodec; 100 } 101 if (value <= 115) { 102 return canonicalCodec[value]; 103 } 104 if (value == 116) { 105 int code = in.read(); 106 if (code == -1) { 107 throw new EOFException("End of buffer read whilst trying to decode codec"); 108 } 109 final int d = code & 0x01; 110 final int s = code >> 1 & 0x03; 111 final int b = (code >> 3 & 0x07) + 1; // this might result in an invalid 112 // number, but it's checked in the 113 // Codec constructor 114 code = in.read(); 115 if (code == -1) { 116 throw new EOFException("End of buffer read whilst trying to decode codec"); 117 } 118 final int h = code + 1; 119 // This handles the special cases for invalid combinations of data. 120 return new BHSDCodec(b, h, s, d); 121 } 122 if (value >= 117 && value <= 140) { // Run codec 123 final int offset = value - 117; 124 final int kx = offset & 3; 125 final boolean kbflag = (offset >> 2 & 1) == 1; 126 final boolean adef = (offset >> 3 & 1) == 1; 127 final boolean bdef = (offset >> 4 & 1) == 1; 128 // If both A and B use the default encoding, what's the point of 129 // having a run of default values followed by default values 130 if (adef && bdef) { 131 throw new Pack200Exception("ADef and BDef should never both be true"); 132 } 133 final int kb = kbflag ? in.read() : 3; 134 final int k = (kb + 1) * (int) Math.pow(16, kx); 135 Codec aCodec, bCodec; 136 if (adef) { 137 aCodec = defaultCodec; 138 } else { 139 aCodec = getCodec(in.read(), in, defaultCodec); 140 } 141 if (bdef) { 142 bCodec = defaultCodec; 143 } else { 144 bCodec = getCodec(in.read(), in, defaultCodec); 145 } 146 return new RunCodec(k, aCodec, bCodec); 147 } 148 if (value < 141 || value > 188) { 149 throw new Pack200Exception("Invalid codec encoding byte (" + value + ") found"); 150 } 151 final int offset = value - 141; 152 final boolean fdef = (offset & 1) == 1; 153 final boolean udef = (offset >> 1 & 1) == 1; 154 final int tdefl = offset >> 2; 155 final boolean tdef = tdefl != 0; 156 // From section 6.7.3 of spec 157 final int[] tdefToL = { 0, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 }; 158 final int l = tdefToL[tdefl]; 159 // NOTE: Do not re-factor this to bring out uCodec; the order in 160 // which 161 // they are read from the stream is important 162 if (tdef) { 163 final Codec fCodec = fdef ? defaultCodec : getCodec(in.read(), in, defaultCodec); 164 final Codec uCodec = udef ? defaultCodec : getCodec(in.read(), in, defaultCodec); 165 // Unfortunately, if tdef, then tCodec depends both on l and 166 // also on k, the 167 // number of items read from the fCodec. So we don't know in 168 // advance what 169 // the codec will be. 170 return new PopulationCodec(fCodec, l, uCodec); 171 } 172 final Codec fCodec = fdef ? defaultCodec : getCodec(in.read(), in, defaultCodec); 173 final Codec tCodec = getCodec(in.read(), in, defaultCodec); 174 final Codec uCodec = udef ? defaultCodec : getCodec(in.read(), in, defaultCodec); 175 return new PopulationCodec(fCodec, tCodec, uCodec); 176 } 177 178 public static int[] getSpecifier(final Codec codec, final Codec defaultForBand) { 179 if (canonicalCodecsToSpecifiers.containsKey(codec)) { 180 return new int[] { canonicalCodecsToSpecifiers.get(codec).intValue() }; 181 } 182 if (codec instanceof BHSDCodec) { 183 // Cache these? 184 final BHSDCodec bhsdCodec = (BHSDCodec) codec; 185 final int[] specifiers = new int[3]; 186 specifiers[0] = 116; 187 specifiers[1] = (bhsdCodec.isDelta() ? 1 : 0) + 2 * bhsdCodec.getS() + 8 * (bhsdCodec.getB() - 1); 188 specifiers[2] = bhsdCodec.getH() - 1; 189 return specifiers; 190 } 191 if (codec instanceof RunCodec) { 192 final RunCodec runCodec = (RunCodec) codec; 193 final int k = runCodec.getK(); 194 int kb; 195 int kx; 196 if (k <= 256) { 197 kb = 0; 198 kx = k - 1; 199 } else if (k <= 4096) { 200 kb = 1; 201 kx = k / 16 - 1; 202 } else if (k <= 65536) { 203 kb = 2; 204 kx = k / 256 - 1; 205 } else { 206 kb = 3; 207 kx = k / 4096 - 1; 208 } 209 final Codec aCodec = runCodec.getACodec(); 210 final Codec bCodec = runCodec.getBCodec(); 211 int abDef = 0; 212 if (aCodec.equals(defaultForBand)) { 213 abDef = 1; 214 } else if (bCodec.equals(defaultForBand)) { 215 abDef = 2; 216 } 217 final int first = 117 + kb + (kx == 3 ? 0 : 4) + 8 * abDef; 218 final int[] aSpecifier = abDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(aCodec, defaultForBand); 219 final int[] bSpecifier = abDef == 2 ? EMPTY_INT_ARRAY : getSpecifier(bCodec, defaultForBand); 220 final int[] specifier = new int[1 + (kx == 3 ? 0 : 1) + aSpecifier.length + bSpecifier.length]; 221 specifier[0] = first; 222 int index = 1; 223 if (kx != 3) { 224 specifier[1] = kx; 225 index++; 226 } 227 for (final int element : aSpecifier) { 228 specifier[index] = element; 229 index++; 230 } 231 for (final int element : bSpecifier) { 232 specifier[index] = element; 233 index++; 234 } 235 return specifier; 236 } 237 if (codec instanceof PopulationCodec) { 238 final PopulationCodec populationCodec = (PopulationCodec) codec; 239 final Codec tokenCodec = populationCodec.getTokenCodec(); 240 final Codec favouredCodec = populationCodec.getFavouredCodec(); 241 final Codec unfavouredCodec = populationCodec.getUnfavouredCodec(); 242 final int fDef = favouredCodec.equals(defaultForBand) ? 1 : 0; 243 final int uDef = unfavouredCodec.equals(defaultForBand) ? 1 : 0; 244 int tDefL = 0; 245 final int[] favoured = populationCodec.getFavoured(); 246 if (favoured != null) { 247 if (tokenCodec == Codec.BYTE1) { 248 tDefL = 1; 249 } else if (tokenCodec instanceof BHSDCodec) { 250 final BHSDCodec tokenBHSD = (BHSDCodec) tokenCodec; 251 if (tokenBHSD.getS() == 0) { 252 final int[] possibleLValues = { 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 }; 253 final int l = 256 - tokenBHSD.getH(); 254 int index = Arrays.binarySearch(possibleLValues, l); 255 if (index != -1) { 256 // TODO: check range is ok for ks 257 tDefL = index++; 258 } 259 } 260 } 261 } 262 final int first = 141 + fDef + 2 * uDef + 4 * tDefL; 263 final int[] favouredSpecifier = fDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(favouredCodec, defaultForBand); 264 final int[] tokenSpecifier = tDefL != 0 ? EMPTY_INT_ARRAY : getSpecifier(tokenCodec, defaultForBand); 265 final int[] unfavouredSpecifier = uDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(unfavouredCodec, defaultForBand); 266 final int[] specifier = new int[1 + favouredSpecifier.length + unfavouredSpecifier.length + tokenSpecifier.length]; 267 specifier[0] = first; 268 int index = 1; 269 for (final int element : favouredSpecifier) { 270 specifier[index] = element; 271 index++; 272 } 273 for (final int element : tokenSpecifier) { 274 specifier[index] = element; 275 index++; 276 } 277 for (final int element : unfavouredSpecifier) { 278 specifier[index] = element; 279 index++; 280 } 281 return specifier; 282 } 283 284 return null; 285 } 286 287 public static int getSpecifierForDefaultCodec(final BHSDCodec defaultCodec) { 288 return getSpecifier(defaultCodec, null)[0]; 289 } 290}