1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.lz77support;
20
21 import java.io.IOException;
22 import java.util.Objects;
23
24 import org.apache.commons.lang3.ArrayFill;
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79 public class LZ77Compressor {
80
81
82
83
84 public static final class BackReference extends Block {
85 private final int offset, length;
86
87 public BackReference(final int offset, final int length) {
88 this.offset = offset;
89 this.length = length;
90 }
91
92
93
94
95
96
97 public int getLength() {
98 return length;
99 }
100
101
102
103
104
105
106 public int getOffset() {
107 return offset;
108 }
109
110 @Override
111 public BlockType getType() {
112 return BlockType.BACK_REFERENCE;
113 }
114
115 @Override
116 public String toString() {
117 return "BackReference with offset " + offset + " and length " + length;
118 }
119 }
120
121
122
123
124
125
126
127
128
129 public abstract static class Block {
130
131 public enum BlockType {
132 LITERAL, BACK_REFERENCE, EOD
133 }
134
135 public abstract BlockType getType();
136 }
137
138
139
140
141
142
143
144
145
146 public interface Callback {
147
148
149
150
151
152
153 void accept(Block b) throws IOException;
154 }
155
156
157 public static final class EOD extends Block {
158 @Override
159 public BlockType getType() {
160 return BlockType.EOD;
161 }
162 }
163
164
165
166
167
168
169
170
171
172 public static final class LiteralBlock extends Block {
173 private final byte[] data;
174 private final int offset, length;
175
176 public LiteralBlock(final byte[] data, final int offset, final int length) {
177 this.data = data;
178 this.offset = offset;
179 this.length = length;
180 }
181
182
183
184
185
186
187
188
189
190
191 public byte[] getData() {
192 return data;
193 }
194
195
196
197
198
199
200 public int getLength() {
201 return length;
202 }
203
204
205
206
207
208
209 public int getOffset() {
210 return offset;
211 }
212
213 @Override
214 public BlockType getType() {
215 return BlockType.LITERAL;
216 }
217
218 @Override
219 public String toString() {
220 return "LiteralBlock starting at " + offset + " with length " + length;
221 }
222 }
223
224 private static final Block THE_EOD = new EOD();
225
226 static final int NUMBER_OF_BYTES_IN_HASH = 3;
227 private static final int NO_MATCH = -1;
228
229
230 private static final int HASH_SIZE = 1 << 15;
231 private static final int HASH_MASK = HASH_SIZE - 1;
232
233 private static final int H_SHIFT = 5;
234 private final Parameters params;
235 private final Callback callback;
236
237
238 private final byte[] window;
239
240
241
242
243 private final int[] head;
244
245
246
247
248 private final int[] prev;
249
250 private final int wMask;
251 private boolean initialized;
252
253 private int currentPosition;
254
255
256 private int lookahead;
257
258 private int insertHash;
259
260
261
262 private int blockStart;
263
264
265 private int matchStart = NO_MATCH;
266
267
268
269
270 private int missedInserts;
271
272
273
274
275
276
277
278
279 public LZ77Compressor(final Parameters params, final Callback callback) {
280 Objects.requireNonNull(params, "params");
281 Objects.requireNonNull(callback, "callback");
282
283 this.params = params;
284 this.callback = callback;
285
286 final int wSize = params.getWindowSize();
287 window = new byte[wSize * 2];
288 wMask = wSize - 1;
289 head = ArrayFill.fill(new int[HASH_SIZE], NO_MATCH);
290 prev = new int[wSize];
291 }
292
293 private void catchUpMissedInserts() {
294 while (missedInserts > 0) {
295 insertString(currentPosition - missedInserts--);
296 }
297 }
298
299 private void compress() throws IOException {
300 final int minMatch = params.getMinBackReferenceLength();
301 final boolean lazy = params.getLazyMatching();
302 final int lazyThreshold = params.getLazyMatchingThreshold();
303
304 while (lookahead >= minMatch) {
305 catchUpMissedInserts();
306 int matchLength = 0;
307 final int hashHead = insertString(currentPosition);
308 if (hashHead != NO_MATCH && hashHead - currentPosition <= params.getMaxOffset()) {
309
310 matchLength = longestMatch(hashHead);
311
312 if (lazy && matchLength <= lazyThreshold && lookahead > minMatch) {
313
314 matchLength = longestMatchForNextPosition(matchLength);
315 }
316 }
317 if (matchLength >= minMatch) {
318 if (blockStart != currentPosition) {
319
320 flushLiteralBlock();
321 blockStart = NO_MATCH;
322 }
323 flushBackReference(matchLength);
324 insertStringsInMatch(matchLength);
325 lookahead -= matchLength;
326 currentPosition += matchLength;
327 blockStart = currentPosition;
328 } else {
329
330 lookahead--;
331 currentPosition++;
332 if (currentPosition - blockStart >= params.getMaxLiteralLength()) {
333 flushLiteralBlock();
334 blockStart = currentPosition;
335 }
336 }
337 }
338 }
339
340
341
342
343
344
345
346 public void compress(final byte[] data) throws IOException {
347 compress(data, 0, data.length);
348 }
349
350
351
352
353
354
355
356
357
358 public void compress(final byte[] data, int off, int len) throws IOException {
359 final int wSize = params.getWindowSize();
360 while (len > wSize) {
361 doCompress(data, off, wSize);
362 off += wSize;
363 len -= wSize;
364 }
365 if (len > 0) {
366 doCompress(data, off, len);
367 }
368 }
369
370
371 private void doCompress(final byte[] data, final int off, final int len) throws IOException {
372 final int spaceLeft = window.length - currentPosition - lookahead;
373 if (len > spaceLeft) {
374 slide();
375 }
376 System.arraycopy(data, off, window, currentPosition + lookahead, len);
377 lookahead += len;
378 if (!initialized && lookahead >= params.getMinBackReferenceLength()) {
379 initialize();
380 }
381 if (initialized) {
382 compress();
383 }
384 }
385
386
387
388
389
390
391
392
393
394
395 public void finish() throws IOException {
396 if (blockStart != currentPosition || lookahead > 0) {
397 currentPosition += lookahead;
398 flushLiteralBlock();
399 }
400 callback.accept(THE_EOD);
401 }
402
403 private void flushBackReference(final int matchLength) throws IOException {
404 callback.accept(new BackReference(currentPosition - matchStart, matchLength));
405 }
406
407 private void flushLiteralBlock() throws IOException {
408 callback.accept(new LiteralBlock(window, blockStart, currentPosition - blockStart));
409 }
410
411 private void initialize() {
412 for (int i = 0; i < NUMBER_OF_BYTES_IN_HASH - 1; i++) {
413 insertHash = nextHash(insertHash, window[i]);
414 }
415 initialized = true;
416 }
417
418
419
420
421
422
423
424
425 private int insertString(final int pos) {
426 insertHash = nextHash(insertHash, window[pos - 1 + NUMBER_OF_BYTES_IN_HASH]);
427 final int hashHead = head[insertHash];
428 prev[pos & wMask] = hashHead;
429 head[insertHash] = pos;
430 return hashHead;
431 }
432
433 private void insertStringsInMatch(final int matchLength) {
434
435
436 final int stop = Math.min(matchLength - 1, lookahead - NUMBER_OF_BYTES_IN_HASH);
437
438 for (int i = 1; i <= stop; i++) {
439 insertString(currentPosition + i);
440 }
441 missedInserts = matchLength - stop - 1;
442 }
443
444
445
446
447
448
449
450
451 private int longestMatch(int matchHead) {
452 final int minLength = params.getMinBackReferenceLength();
453 int longestMatchLength = minLength - 1;
454 final int maxPossibleLength = Math.min(params.getMaxBackReferenceLength(), lookahead);
455 final int minIndex = Math.max(0, currentPosition - params.getMaxOffset());
456 final int niceBackReferenceLength = Math.min(maxPossibleLength, params.getNiceBackReferenceLength());
457 final int maxCandidates = params.getMaxCandidates();
458 for (int candidates = 0; candidates < maxCandidates && matchHead >= minIndex; candidates++) {
459 int currentLength = 0;
460 for (int i = 0; i < maxPossibleLength; i++) {
461 if (window[matchHead + i] != window[currentPosition + i]) {
462 break;
463 }
464 currentLength++;
465 }
466 if (currentLength > longestMatchLength) {
467 longestMatchLength = currentLength;
468 matchStart = matchHead;
469 if (currentLength >= niceBackReferenceLength) {
470
471 break;
472 }
473 }
474 matchHead = prev[matchHead & wMask];
475 }
476 return longestMatchLength;
477 }
478
479 private int longestMatchForNextPosition(final int prevMatchLength) {
480
481 final int prevMatchStart = matchStart;
482 final int prevInsertHash = insertHash;
483
484 lookahead--;
485 currentPosition++;
486 final int hashHead = insertString(currentPosition);
487 final int prevHashHead = prev[currentPosition & wMask];
488 int matchLength = longestMatch(hashHead);
489
490 if (matchLength <= prevMatchLength) {
491
492 matchLength = prevMatchLength;
493 matchStart = prevMatchStart;
494
495
496 head[insertHash] = prevHashHead;
497 insertHash = prevInsertHash;
498 currentPosition--;
499 lookahead++;
500 }
501 return matchLength;
502 }
503
504
505
506
507
508
509
510
511
512 private int nextHash(final int oldHash, final byte nextByte) {
513 final int nextVal = nextByte & 0xFF;
514 return (oldHash << H_SHIFT ^ nextVal) & HASH_MASK;
515 }
516
517
518
519
520
521
522
523
524
525
526
527
528 public void prefill(final byte[] data) {
529 if (currentPosition != 0 || lookahead != 0) {
530 throw new IllegalStateException("The compressor has already started to accept data, can't prefill anymore");
531 }
532
533
534 final int len = Math.min(params.getWindowSize(), data.length);
535 System.arraycopy(data, data.length - len, window, 0, len);
536
537 if (len >= NUMBER_OF_BYTES_IN_HASH) {
538 initialize();
539 final int stop = len - NUMBER_OF_BYTES_IN_HASH + 1;
540 for (int i = 0; i < stop; i++) {
541 insertString(i);
542 }
543 missedInserts = NUMBER_OF_BYTES_IN_HASH - 1;
544 } else {
545 missedInserts = len;
546 }
547 blockStart = currentPosition = len;
548 }
549
550 private void slide() throws IOException {
551 final int wSize = params.getWindowSize();
552 if (blockStart != currentPosition && blockStart < wSize) {
553 flushLiteralBlock();
554 blockStart = currentPosition;
555 }
556 System.arraycopy(window, wSize, window, 0, wSize);
557 currentPosition -= wSize;
558 matchStart -= wSize;
559 blockStart -= wSize;
560 for (int i = 0; i < HASH_SIZE; i++) {
561 final int h = head[i];
562 head[i] = h >= wSize ? h - wSize : NO_MATCH;
563 }
564 for (int i = 0; i < wSize; i++) {
565 final int p = prev[i];
566 prev[i] = p >= wSize ? p - wSize : NO_MATCH;
567 }
568 }
569 }