1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.compress.harmony.pack200;
18
19 import java.io.IOException;
20 import java.io.InputStream;
21
22 import org.apache.commons.io.input.BoundedInputStream;
23
24 /**
25 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa).
26 * <p>
27 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many
28 * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and
29 * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical
30 * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)})
31 * </p>
32 */
33 public abstract class Codec {
34
35 /**
36 * BCI5 = (5,4): Used for storing branching information in bytecode.
37 */
38 public static final BHSDCodec BCI5 = new BHSDCodec(5, 4);
39
40 /**
41 * BRANCH5 = (5,4,2): Used for storing branching information in bytecode.
42 */
43 public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2);
44
45 /**
46 * BYTE1 = (1,256): Used for storing plain bytes.
47 */
48 public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256);
49
50 /**
51 * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters < 127 are
52 * stored in a single byte.
53 */
54 public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128);
55
56 /**
57 * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values.
58 */
59 public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1);
60
61 /**
62 * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are
63 * expected to be non-negative.
64 */
65 public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1);
66
67 /**
68 * SIGNED5 = (5,64,1): Used for small signed values.
69 */
70 public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1);
71
72 /**
73 * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values.
74 */
75 public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1);
76
77 /**
78 * UNSIGNED5 = (5,64): Used for small unsigned values.
79 */
80 public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64);
81
82 public int lastBandLength;
83
84 int check(final int n, final InputStream in) throws Pack200Exception {
85 if (in instanceof BoundedInputStream) {
86 final BoundedInputStream bin = (BoundedInputStream) in;
87 final long count = bin.getCount();
88 final long maxLength = bin.getMaxCount();
89 if (maxLength > -1) {
90 final long remaining = maxLength - count;
91 final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)";
92 if (count < -1) {
93 throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
94 }
95 if (n > remaining) {
96 throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
97 }
98 }
99 }
100 return n;
101 }
102
103 /**
104 * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta
105 * encodings.
106 *
107 * @param in the input stream to read from
108 * @return the value as a long
109 * @throws IOException if there is a problem reading from the underlying input stream
110 * @throws Pack200Exception if the encoding is a delta encoding
111 */
112 public abstract int decode(InputStream in) throws IOException, Pack200Exception;
113
114 /**
115 * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous
116 * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value
117 * to be passed in by default using code similar to:
118 *
119 * <pre>
120 * long last = 0;
121 * while (condition) {
122 * last = codec.decode(in, last);
123 * // do something with last
124 * }
125 * </pre>
126 *
127 * @param in the input stream to read from
128 * @param last the previous value read, which must be supplied if the codec is a delta encoding
129 * @return the value as a long
130 * @throws IOException if there is a problem reading from the underlying input stream
131 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
132 */
133 public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception;
134
135 /**
136 * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec})
137 * only work when the number of values to be read is known.
138 *
139 * @param n the number of values to decode
140 * @param in the input stream to read from
141 * @return an array of {@code int} values corresponding to values decoded
142 * @throws IOException if there is a problem reading from the underlying input stream
143 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
144 */
145 public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
146 lastBandLength = 0;
147 final int[] result = new int[check(n, in)];
148 int last = 0;
149 for (int i = 0; i < n; i++) {
150 result[i] = last = decode(in, last);
151 }
152 return result;
153 }
154
155 /**
156 * Decodes a sequence of {@code n} values from {@code in}.
157 *
158 * @param n the number of values to decode
159 * @param in the input stream to read from
160 * @param firstValue the first value in the band if it has already been read
161 * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array.
162 * @throws IOException if there is a problem reading from the underlying input stream
163 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
164 */
165 public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception {
166 final int[] result = new int[check(n, in) + 1];
167 result[0] = firstValue;
168 int last = firstValue;
169 for (int i = 1; i < n + 1; i++) {
170 result[i] = last = decode(in, last);
171 }
172 return result;
173 }
174
175 /**
176 * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings.
177 *
178 * @param value the value to encode
179 * @return the encoded bytes
180 * @throws Pack200Exception TODO
181 */
182 public abstract byte[] encode(int value) throws Pack200Exception;
183
184 /**
185 * Encodes a single value into a sequence of bytes.
186 *
187 * @param value the value to encode
188 * @param last the previous value encoded (for delta encodings)
189 * @return the encoded bytes
190 * @throws Pack200Exception TODO
191 */
192 public abstract byte[] encode(int value, int last) throws Pack200Exception;
193
194 /**
195 * Encodes a sequence of integers into a byte array
196 *
197 * @param ints the values to encode
198 * @return byte[] encoded bytes
199 * @throws Pack200Exception if there is a problem encoding any of the values
200 */
201 public byte[] encode(final int[] ints) throws Pack200Exception {
202 int total = 0;
203 final byte[][] bytes = new byte[ints.length][];
204 for (int i = 0; i < ints.length; i++) {
205 bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0);
206 total += bytes[i].length;
207 }
208 final byte[] encoded = new byte[total];
209 int index = 0;
210 for (final byte[] element : bytes) {
211 System.arraycopy(element, 0, encoded, index, element.length);
212 index += element.length;
213 }
214 return encoded;
215 }
216 }