001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.IOException; 020import java.io.InputStream; 021 022import org.apache.commons.io.input.BoundedInputStream; 023 024/** 025 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa). 026 * <p> 027 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many 028 * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and 029 * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical 030 * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)}) 031 * </p> 032 */ 033public abstract class Codec { 034 035 /** 036 * BCI5 = (5,4): Used for storing branching information in bytecode. 037 */ 038 public static final BHSDCodec BCI5 = new BHSDCodec(5, 4); 039 040 /** 041 * BRANCH5 = (5,4,2): Used for storing branching information in bytecode. 042 */ 043 public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2); 044 045 /** 046 * BYTE1 = (1,256): Used for storing plain bytes. 047 */ 048 public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256); 049 050 /** 051 * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters < 127 are 052 * stored in a single byte. 053 */ 054 public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128); 055 056 /** 057 * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values. 058 */ 059 public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1); 060 061 /** 062 * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are 063 * expected to be non-negative. 064 */ 065 public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1); 066 067 /** 068 * SIGNED5 = (5,64,1): Used for small signed values. 069 */ 070 public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1); 071 072 /** 073 * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values. 074 */ 075 public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1); 076 077 /** 078 * UNSIGNED5 = (5,64): Used for small unsigned values. 079 */ 080 public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64); 081 082 public int lastBandLength; 083 084 int check(final int n, final InputStream in) throws Pack200Exception { 085 if (in instanceof BoundedInputStream) { 086 final BoundedInputStream bin = (BoundedInputStream) in; 087 final long count = bin.getCount(); 088 final long maxLength = bin.getMaxCount(); 089 if (maxLength > -1) { 090 final long remaining = maxLength - count; 091 final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)"; 092 if (count < -1) { 093 throw new Pack200Exception(String.format(format, n, count, maxLength, remaining)); 094 } 095 if (n > remaining) { 096 throw new Pack200Exception(String.format(format, n, count, maxLength, remaining)); 097 } 098 } 099 } 100 return n; 101 } 102 103 /** 104 * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta 105 * encodings. 106 * 107 * @param in the input stream to read from 108 * @return the value as a long 109 * @throws IOException if there is a problem reading from the underlying input stream 110 * @throws Pack200Exception if the encoding is a delta encoding 111 */ 112 public abstract int decode(InputStream in) throws IOException, Pack200Exception; 113 114 /** 115 * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous 116 * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value 117 * to be passed in by default using code similar to: 118 * 119 * <pre> 120 * long last = 0; 121 * while (condition) { 122 * last = codec.decode(in, last); 123 * // do something with last 124 * } 125 * </pre> 126 * 127 * @param in the input stream to read from 128 * @param last the previous value read, which must be supplied if the codec is a delta encoding 129 * @return the value as a long 130 * @throws IOException if there is a problem reading from the underlying input stream 131 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 132 */ 133 public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception; 134 135 /** 136 * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec}) 137 * only work when the number of values to be read is known. 138 * 139 * @param n the number of values to decode 140 * @param in the input stream to read from 141 * @return an array of {@code int} values corresponding to values decoded 142 * @throws IOException if there is a problem reading from the underlying input stream 143 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 144 */ 145 public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception { 146 lastBandLength = 0; 147 final int[] result = new int[check(n, in)]; 148 int last = 0; 149 for (int i = 0; i < n; i++) { 150 result[i] = last = decode(in, last); 151 } 152 return result; 153 } 154 155 /** 156 * Decodes a sequence of {@code n} values from {@code in}. 157 * 158 * @param n the number of values to decode 159 * @param in the input stream to read from 160 * @param firstValue the first value in the band if it has already been read 161 * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array. 162 * @throws IOException if there is a problem reading from the underlying input stream 163 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 164 */ 165 public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception { 166 final int[] result = new int[check(n, in) + 1]; 167 result[0] = firstValue; 168 int last = firstValue; 169 for (int i = 1; i < n + 1; i++) { 170 result[i] = last = decode(in, last); 171 } 172 return result; 173 } 174 175 /** 176 * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings. 177 * 178 * @param value the value to encode 179 * @return the encoded bytes 180 * @throws Pack200Exception TODO 181 */ 182 public abstract byte[] encode(int value) throws Pack200Exception; 183 184 /** 185 * Encodes a single value into a sequence of bytes. 186 * 187 * @param value the value to encode 188 * @param last the previous value encoded (for delta encodings) 189 * @return the encoded bytes 190 * @throws Pack200Exception TODO 191 */ 192 public abstract byte[] encode(int value, int last) throws Pack200Exception; 193 194 /** 195 * Encodes a sequence of integers into a byte array 196 * 197 * @param ints the values to encode 198 * @return byte[] encoded bytes 199 * @throws Pack200Exception if there is a problem encoding any of the values 200 */ 201 public byte[] encode(final int[] ints) throws Pack200Exception { 202 int total = 0; 203 final byte[][] bytes = new byte[ints.length][]; 204 for (int i = 0; i < ints.length; i++) { 205 bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0); 206 total += bytes[i].length; 207 } 208 final byte[] encoded = new byte[total]; 209 int index = 0; 210 for (final byte[] element : bytes) { 211 System.arraycopy(element, 0, encoded, index, element.length); 212 index += element.length; 213 } 214 return encoded; 215 } 216}