1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.lz4;
20
21 import java.io.IOException;
22 import java.io.OutputStream;
23 import java.util.Arrays;
24 import java.util.Deque;
25 import java.util.Iterator;
26 import java.util.LinkedList;
27
28 import org.apache.commons.compress.compressors.CompressorOutputStream;
29 import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
30 import org.apache.commons.compress.compressors.lz77support.Parameters;
31 import org.apache.commons.compress.utils.ByteUtils;
32
33
34
35
36
37
38
39
40 public class BlockLZ4CompressorOutputStream extends CompressorOutputStream<OutputStream> {
41
42 static final class Pair {
43
44 private static int lengths(final int litLength, final int brLength) {
45 final int l = Math.min(litLength, 15);
46 final int br = brLength < 4 ? 0 : brLength < 19 ? brLength - 4 : 15;
47 return l << BlockLZ4CompressorInputStream.SIZE_BITS | br;
48 }
49
50 private static void writeLength(int length, final OutputStream out) throws IOException {
51 while (length >= 255) {
52 out.write(255);
53 length -= 255;
54 }
55 out.write(length);
56 }
57
58 private final Deque<byte[]> literals = new LinkedList<>();
59
60 private int literalLength;
61
62 private int brOffset, brLength;
63
64 private boolean written;
65
66 byte[] addLiteral(final LZ77Compressor.LiteralBlock block) {
67 final byte[] copy = Arrays.copyOfRange(block.getData(), block.getOffset(), block.getOffset() + block.getLength());
68 literals.add(copy);
69 literalLength += copy.length;
70 return copy;
71 }
72
73 private int backReferenceLength() {
74 return brLength;
75 }
76
77 boolean canBeWritten(final int lengthOfBlocksAfterThisPair) {
78 return hasBackReference() && lengthOfBlocksAfterThisPair >= MIN_OFFSET_OF_LAST_BACK_REFERENCE + MIN_BACK_REFERENCE_LENGTH;
79 }
80
81 boolean hasBackReference() {
82 return brOffset > 0;
83 }
84
85 private boolean hasBeenWritten() {
86 return written;
87 }
88
89 int length() {
90 return literalLength() + brLength;
91 }
92
93 private int literalLength() {
94
95 if (literalLength != 0) {
96 return literalLength;
97 }
98 int length = 0;
99 for (final byte[] b : literals) {
100 length += b.length;
101 }
102 return literalLength = length;
103 }
104
105 private void prependLiteral(final byte[] data) {
106 literals.addFirst(data);
107 literalLength += data.length;
108 }
109
110 private void prependTo(final Pair other) {
111 final Iterator<byte[]> listBackwards = literals.descendingIterator();
112 while (listBackwards.hasNext()) {
113 other.prependLiteral(listBackwards.next());
114 }
115 }
116
117 void setBackReference(final LZ77Compressor.BackReference block) {
118 if (hasBackReference()) {
119 throw new IllegalStateException();
120 }
121 brOffset = block.getOffset();
122 brLength = block.getLength();
123 }
124
125 private Pair splitWithNewBackReferenceLengthOf(final int newBackReferenceLength) {
126 final Pair p = new Pair();
127 p.literals.addAll(literals);
128 p.brOffset = brOffset;
129 p.brLength = newBackReferenceLength;
130 return p;
131 }
132
133 void writeTo(final OutputStream out) throws IOException {
134 final int litLength = literalLength();
135 out.write(lengths(litLength, brLength));
136 if (litLength >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) {
137 writeLength(litLength - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out);
138 }
139 for (final byte[] b : literals) {
140 out.write(b);
141 }
142 if (hasBackReference()) {
143 ByteUtils.toLittleEndian(out, brOffset, 2);
144 if (brLength - MIN_BACK_REFERENCE_LENGTH >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) {
145 writeLength(brLength - MIN_BACK_REFERENCE_LENGTH - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out);
146 }
147 }
148 written = true;
149 }
150 }
151
152 private static final int MIN_BACK_REFERENCE_LENGTH = 4;
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173 private static final int MIN_OFFSET_OF_LAST_BACK_REFERENCE = 12;
174
175
176
177
178
179
180 public static Parameters.Builder createParameterBuilder() {
181 final int maxLen = BlockLZ4CompressorInputStream.WINDOW_SIZE - 1;
182 return Parameters.builder(BlockLZ4CompressorInputStream.WINDOW_SIZE).withMinBackReferenceLength(MIN_BACK_REFERENCE_LENGTH)
183 .withMaxBackReferenceLength(maxLen).withMaxOffset(maxLen).withMaxLiteralLength(maxLen);
184 }
185
186 private final LZ77Compressor compressor;
187
188
189 private final byte[] oneByte = new byte[1];
190 private boolean finished;
191
192 private final Deque<Pair> pairs = new LinkedList<>();
193
194
195
196 private final Deque<byte[]> expandedBlocks = new LinkedList<>();
197
198
199
200
201
202
203 public BlockLZ4CompressorOutputStream(final OutputStream out) {
204 this(out, createParameterBuilder().build());
205 }
206
207
208
209
210
211
212
213 public BlockLZ4CompressorOutputStream(final OutputStream out, final Parameters params) {
214 super(out);
215 compressor = new LZ77Compressor(params, block -> {
216 switch (block.getType()) {
217 case LITERAL:
218 addLiteralBlock((LZ77Compressor.LiteralBlock) block);
219 break;
220 case BACK_REFERENCE:
221 addBackReference((LZ77Compressor.BackReference) block);
222 break;
223 case EOD:
224 writeFinalLiteralBlock();
225 break;
226 }
227 });
228 }
229
230 private void addBackReference(final LZ77Compressor.BackReference block) throws IOException {
231 final Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
232 last.setBackReference(block);
233 recordBackReference(block);
234 clearUnusedBlocksAndPairs();
235 }
236
237 private void addLiteralBlock(final LZ77Compressor.LiteralBlock block) throws IOException {
238 final Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
239 recordLiteral(last.addLiteral(block));
240 clearUnusedBlocksAndPairs();
241 }
242
243 private void clearUnusedBlocks() {
244 int blockLengths = 0;
245 int blocksToKeep = 0;
246 for (final byte[] b : expandedBlocks) {
247 blocksToKeep++;
248 blockLengths += b.length;
249 if (blockLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) {
250 break;
251 }
252 }
253 final int size = expandedBlocks.size();
254 for (int i = blocksToKeep; i < size; i++) {
255 expandedBlocks.removeLast();
256 }
257 }
258
259 private void clearUnusedBlocksAndPairs() {
260 clearUnusedBlocks();
261 clearUnusedPairs();
262 }
263
264 private void clearUnusedPairs() {
265 int pairLengths = 0;
266 int pairsToKeep = 0;
267 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
268 final Pair p = it.next();
269 pairsToKeep++;
270 pairLengths += p.length();
271 if (pairLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) {
272 break;
273 }
274 }
275 final int size = pairs.size();
276 for (int i = pairsToKeep; i < size; i++) {
277 final Pair p = pairs.peekFirst();
278 if (!p.hasBeenWritten()) {
279 break;
280 }
281 pairs.removeFirst();
282 }
283 }
284
285 @Override
286 public void close() throws IOException {
287 try {
288 finish();
289 } finally {
290 out.close();
291 }
292 }
293
294 private byte[] expand(final int offset, final int length) {
295 final byte[] expanded = new byte[length];
296 if (offset == 1) {
297 final byte[] block = expandedBlocks.peekFirst();
298 final byte b = block[block.length - 1];
299 if (b != 0) {
300 Arrays.fill(expanded, b);
301 }
302 } else {
303 expandFromList(expanded, offset, length);
304 }
305 return expanded;
306 }
307
308 private void expandFromList(final byte[] expanded, final int offset, final int length) {
309 int offsetRemaining = offset;
310 int lengthRemaining = length;
311 int writeOffset = 0;
312 while (lengthRemaining > 0) {
313
314 byte[] block = null;
315 final int copyLen;
316 final int copyOffset;
317 if (offsetRemaining > 0) {
318 int blockOffset = 0;
319 for (final byte[] b : expandedBlocks) {
320 if (b.length + blockOffset >= offsetRemaining) {
321 block = b;
322 break;
323 }
324 blockOffset += b.length;
325 }
326 if (block == null) {
327
328 throw new IllegalStateException("Failed to find a block containing offset " + offset);
329 }
330 copyOffset = blockOffset + block.length - offsetRemaining;
331 copyLen = Math.min(lengthRemaining, block.length - copyOffset);
332 } else {
333
334 block = expanded;
335 copyOffset = -offsetRemaining;
336 copyLen = Math.min(lengthRemaining, writeOffset + offsetRemaining);
337 }
338 System.arraycopy(block, copyOffset, expanded, writeOffset, copyLen);
339 offsetRemaining -= copyLen;
340 lengthRemaining -= copyLen;
341 writeOffset += copyLen;
342 }
343 }
344
345
346
347
348
349
350 public void finish() throws IOException {
351 if (!finished) {
352 compressor.finish();
353 finished = true;
354 }
355 }
356
357
358
359
360
361
362
363
364
365
366 public void prefill(final byte[] data, final int off, final int len) {
367 if (len > 0) {
368 final byte[] b = Arrays.copyOfRange(data, off, off + len);
369 compressor.prefill(b);
370 recordLiteral(b);
371 }
372 }
373
374 private void recordBackReference(final LZ77Compressor.BackReference block) {
375 expandedBlocks.addFirst(expand(block.getOffset(), block.getLength()));
376 }
377
378 private void recordLiteral(final byte[] b) {
379 expandedBlocks.addFirst(b);
380 }
381
382 private void rewriteLastPairs() {
383 final LinkedList<Pair> lastPairs = new LinkedList<>();
384 final LinkedList<Integer> pairLength = new LinkedList<>();
385 int offset = 0;
386 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
387 final Pair p = it.next();
388 if (p.hasBeenWritten()) {
389 break;
390 }
391 final int len = p.length();
392 pairLength.addFirst(len);
393 lastPairs.addFirst(p);
394 offset += len;
395 if (offset >= MIN_OFFSET_OF_LAST_BACK_REFERENCE) {
396 break;
397 }
398 }
399 lastPairs.forEach(pairs::remove);
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424 final int lastPairsSize = lastPairs.size();
425 int toExpand = 0;
426 for (int i = 1; i < lastPairsSize; i++) {
427 toExpand += pairLength.get(i);
428 }
429 final Pair replacement = new Pair();
430 if (toExpand > 0) {
431 replacement.prependLiteral(expand(toExpand, toExpand));
432 }
433 final Pair splitCandidate = lastPairs.get(0);
434 final int stillNeeded = MIN_OFFSET_OF_LAST_BACK_REFERENCE - toExpand;
435 final int brLen = splitCandidate.hasBackReference() ? splitCandidate.backReferenceLength() : 0;
436 if (splitCandidate.hasBackReference() && brLen >= MIN_BACK_REFERENCE_LENGTH + stillNeeded) {
437 replacement.prependLiteral(expand(toExpand + stillNeeded, stillNeeded));
438 pairs.add(splitCandidate.splitWithNewBackReferenceLengthOf(brLen - stillNeeded));
439 } else {
440 if (splitCandidate.hasBackReference()) {
441 replacement.prependLiteral(expand(toExpand + brLen, brLen));
442 }
443 splitCandidate.prependTo(replacement);
444 }
445 pairs.add(replacement);
446 }
447
448 @Override
449 public void write(final byte[] data, final int off, final int len) throws IOException {
450 compressor.compress(data, off, len);
451 }
452
453 @Override
454 public void write(final int b) throws IOException {
455 oneByte[0] = (byte) (b & 0xff);
456 write(oneByte);
457 }
458
459 private Pair writeBlocksAndReturnUnfinishedPair(final int length) throws IOException {
460 writeWritablePairs(length);
461 Pair last = pairs.peekLast();
462 if (last == null || last.hasBackReference()) {
463 last = new Pair();
464 pairs.addLast(last);
465 }
466 return last;
467 }
468
469 private void writeFinalLiteralBlock() throws IOException {
470 rewriteLastPairs();
471 for (final Pair p : pairs) {
472 if (!p.hasBeenWritten()) {
473 p.writeTo(out);
474 }
475 }
476 pairs.clear();
477 }
478
479 private void writeWritablePairs(final int lengthOfBlocksAfterLastPair) throws IOException {
480 int unwrittenLength = lengthOfBlocksAfterLastPair;
481 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
482 final Pair p = it.next();
483 if (p.hasBeenWritten()) {
484 break;
485 }
486 unwrittenLength += p.length();
487 }
488 for (final Pair p : pairs) {
489 if (p.hasBeenWritten()) {
490 continue;
491 }
492 unwrittenLength -= p.length();
493 if (!p.canBeWritten(unwrittenLength)) {
494 break;
495 }
496 p.writeTo(out);
497 }
498 }
499 }