1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.compress.harmony.pack200;
18
19 import java.io.IOException;
20 import java.io.OutputStream;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.stream.IntStream;
27
28
29
30
31 public abstract class BandSet {
32
33
34
35
36 public class BandAnalysisResults {
37
38
39 private int numCodecsTried;
40
41
42 private int saved;
43
44
45
46 private int[] extraMetadata;
47
48
49 private byte[] encodedBand;
50
51
52
53 private Codec betterCodec;
54
55 }
56
57
58
59
60
61 public class BandData {
62
63 private final int[] band;
64 private int smallest = Integer.MAX_VALUE;
65 private int largest = Integer.MIN_VALUE;
66 private int smallestDelta;
67 private int largestDelta;
68
69 private int deltaIsAscending;
70 private int smallDeltaCount;
71
72 private double averageAbsoluteDelta;
73 private double averageAbsoluteValue;
74
75 private Map<Integer, Integer> distinctValues;
76
77
78
79
80
81
82 public BandData(final int[] band) {
83 this.band = band;
84 final Integer one = Integer.valueOf(1);
85 for (int i = 0; i < band.length; i++) {
86 if (band[i] < smallest) {
87 smallest = band[i];
88 }
89 if (band[i] > largest) {
90 largest = band[i];
91 }
92 if (i != 0) {
93 final int delta = band[i] - band[i - 1];
94 if (delta < smallestDelta) {
95 smallestDelta = delta;
96 }
97 if (delta > largestDelta) {
98 largestDelta = delta;
99 }
100 if (delta >= 0) {
101 deltaIsAscending++;
102 }
103 averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1);
104 if (Math.abs(delta) < 256) {
105 smallDeltaCount++;
106 }
107 } else {
108 smallestDelta = band[0];
109 largestDelta = band[0];
110 }
111 averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length;
112 if (effort > 3) {
113 if (distinctValues == null) {
114 distinctValues = new HashMap<>();
115 }
116 final Integer value = Integer.valueOf(band[i]);
117 Integer count = distinctValues.get(value);
118 if (count == null) {
119 count = one;
120 } else {
121 count = Integer.valueOf(count.intValue() + 1);
122 }
123 distinctValues.put(value, count);
124 }
125 }
126 }
127
128
129
130
131
132
133 public boolean anyNegatives() {
134 return smallest < 0;
135 }
136
137
138
139
140
141
142 public boolean mainlyPositiveDeltas() {
143
144 return (float) deltaIsAscending / (float) band.length > 0.95F;
145 }
146
147
148
149
150
151
152 public boolean mainlySmallDeltas() {
153
154 return (float) smallDeltaCount / (float) band.length > 0.7F;
155 }
156
157
158
159
160
161
162 public int numDistinctValues() {
163 if (distinctValues == null) {
164 return band.length;
165 }
166 return distinctValues.size();
167 }
168
169
170
171
172
173
174 public boolean wellCorrelated() {
175
176 return averageAbsoluteDelta * 3.1 < averageAbsoluteValue;
177 }
178
179 }
180
181 private static final byte[] EMPTY_BYTE_ARRAY = {};
182
183
184
185 private static final int[] effortThresholds = { 0, 0, 1000, 500, 100, 100, 100, 100, 100, 0 };
186
187 protected final SegmentHeader segmentHeader;
188 final int effort;
189
190 private long[] canonicalLargest;
191
192 private long[] canonicalSmallest;
193
194
195
196
197
198
199
200 public BandSet(final int effort, final SegmentHeader header) {
201 this.effort = effort;
202 this.segmentHeader = header;
203 }
204
205 private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec) throws Pack200Exception {
206
207 final BandAnalysisResults results = new BandAnalysisResults();
208
209 if (canonicalLargest == null) {
210 canonicalLargest = new long[116];
211 canonicalSmallest = new long[116];
212 for (int i = 1; i < canonicalLargest.length; i++) {
213 canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest();
214 canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest();
215 }
216 }
217 final BandData bandData = new BandData(band);
218
219
220 final byte[] encoded = defaultCodec.encode(band);
221 results.encodedBand = encoded;
222
223
224 if (encoded.length <= band.length + 23 - 2 * effort) {
225 return results;
226 }
227
228
229 if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) {
230 results.encodedBand = Codec.BYTE1.encode(band);
231 results.betterCodec = Codec.BYTE1;
232 return results;
233 }
234
235
236 if (effort > 3 && !name.equals("POPULATION")) {
237 final int numDistinctValues = bandData.numDistinctValues();
238 final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length;
239
240
241 if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02 || effort > 6 && distinctValuesAsProportion < 0.04) {
242 encodeWithPopulationCodec(band, defaultCodec, bandData, results);
243 if (timeToStop(results)) {
244 return results;
245 }
246 }
247 }
248
249 final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>();
250
251
252 if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) {
253 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2);
254 }
255
256 if (bandData.wellCorrelated()) {
257 if (bandData.mainlyPositiveDeltas()) {
258 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1);
259 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3);
260 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4);
261 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5);
262 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1);
263 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3);
264 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4);
265 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5);
266 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2);
267 } else {
268 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1);
269 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3);
270 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2);
271 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4);
272 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5);
273 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1);
274 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2);
275 }
276 } else if (bandData.anyNegatives()) {
277 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1);
278 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2);
279 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1);
280 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2);
281 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3);
282 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4);
283 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5);
284 } else {
285 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1);
286 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3);
287 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4);
288 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5);
289 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2);
290 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1);
291 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3);
292 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4);
293 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5);
294 }
295 for (final BHSDCodec[] family : codecFamiliesToTry) {
296 tryCodecs(band, defaultCodec, bandData, results, encoded, family);
297 if (timeToStop(results)) {
298 break;
299 }
300 }
301
302 return results;
303 }
304
305
306
307
308
309
310
311 protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) {
312 final int[] array = new int[list.size()];
313 for (int i = 0; i < array.length; i++) {
314 array[i] = list.get(i).getIndex();
315 if (array[i] < 0) {
316 throw new IllegalArgumentException("Index should be > 0");
317 }
318 }
319 return array;
320 }
321
322
323
324
325
326
327
328 protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) {
329 final int[] array = new int[list.size()];
330 for (int j = 0; j < array.length; j++) {
331 final ConstantPoolEntry cpEntry = list.get(j);
332 array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1;
333 if (cpEntry != null && cpEntry.getIndex() < 0) {
334 throw new IllegalArgumentException("Index should be > 0");
335 }
336 }
337 return array;
338 }
339
340
341
342
343
344
345
346
347
348
349 public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec) throws Pack200Exception {
350 byte[] encodedBand = null;
351
352
353
354
355 if (effort > 1 && ints.length >= effortThresholds[effort]) {
356 final BandAnalysisResults results = analyseBand(name, ints, defaultCodec);
357 final Codec betterCodec = results.betterCodec;
358 encodedBand = results.encodedBand;
359 if (betterCodec != null) {
360 if (betterCodec instanceof BHSDCodec) {
361 final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec);
362 int specifier = specifierBand[0];
363 if (specifierBand.length > 1) {
364 for (int i = 1; i < specifierBand.length; i++) {
365 segmentHeader.appendBandCodingSpecifier(specifierBand[i]);
366 }
367 }
368 if (defaultCodec.isSigned()) {
369 specifier = -1 - specifier;
370 } else {
371 specifier += defaultCodec.getL();
372 }
373 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier });
374 final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
375 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
376 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
377 return band;
378 }
379 if (betterCodec instanceof PopulationCodec) {
380 IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier);
381 return encodedBand;
382 }
383 if (betterCodec instanceof RunCodec) {
384
385 }
386 }
387 }
388
389
390 if (ints.length > 0) {
391 if (encodedBand == null) {
392 encodedBand = defaultCodec.encode(ints);
393 }
394 final int first = ints[0];
395 if (defaultCodec.getB() != 1) {
396 if (defaultCodec.isSigned() && first >= -256 && first <= -1) {
397 final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec);
398 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier });
399 final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
400 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
401 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
402 return band;
403 }
404 if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) {
405 final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL();
406 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier });
407 final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
408 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
409 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
410 return band;
411 }
412 }
413 return encodedBand;
414 }
415 return EMPTY_BYTE_ARRAY;
416 }
417
418
419
420
421
422
423
424
425
426
427
428
429 protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags)
430 throws Pack200Exception {
431 if (!haveHiFlags) {
432 final int[] loBits = new int[flags.length];
433 Arrays.setAll(loBits, i -> (int) flags[i]);
434 return encodeBandInt(name, loBits, loCodec);
435 }
436 final int[] hiBits = new int[flags.length];
437 final int[] loBits = new int[flags.length];
438 for (int i = 0; i < flags.length; i++) {
439 final long l = flags[i];
440 hiBits[i] = (int) (l >> 32);
441 loBits[i] = (int) l;
442 }
443 final byte[] hi = encodeBandInt(name, hiBits, hiCodec);
444 final byte[] lo = encodeBandInt(name, loBits, loCodec);
445 final byte[] total = new byte[hi.length + lo.length];
446 System.arraycopy(hi, 0, total, 0, hi.length);
447 System.arraycopy(lo, 0, total, hi.length + 1, lo.length);
448 return total;
449 }
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478 protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags)
479 throws Pack200Exception {
480 return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags);
481 }
482
483
484
485
486
487
488
489
490
491 public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception {
492 return codec.encode(value);
493 }
494
495
496
497
498
499
500
501
502
503 public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception {
504 return codec.encode(band);
505 }
506
507 private void encodeWithPopulationCodec(final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results)
508 throws Pack200Exception {
509 results.numCodecsTried += 3;
510 final Map<Integer, Integer> distinctValues = bandData.distinctValues;
511
512 final List<Integer> favored = new ArrayList<>();
513 distinctValues.forEach((k, v) -> {
514 if (v.intValue() > 2 || distinctValues.size() < 256) {
515 favored.add(k);
516 }
517 });
518
519
520 if (distinctValues.size() > 255) {
521 favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0)));
522 }
523
524 final Map<Integer, Integer> favoredToIndex = new HashMap<>();
525 for (int i = 0; i < favored.size(); i++) {
526 favoredToIndex.put(favored.get(i), Integer.valueOf(i));
527 }
528
529 final IntList unfavoured = new IntList();
530 final int[] tokens = new int[band.length];
531 for (int i = 0; i < band.length; i++) {
532 final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i]));
533 if (favouredIndex == null) {
534 tokens[i] = 0;
535 unfavoured.add(band[i]);
536 } else {
537 tokens[i] = favouredIndex.intValue() + 1;
538 }
539 }
540 favored.add(favored.get(favored.size() - 1));
541 final int[] favouredBand = integerListToArray(favored);
542 final int[] unfavouredBand = unfavoured.toArray();
543
544
545 final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec);
546 final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec);
547
548 int tdefL = 0;
549 int l = 0;
550 Codec tokenCodec = null;
551 byte[] tokensEncoded;
552 final int k = favored.size() - 1;
553 if (k < 256) {
554 tdefL = 1;
555 tokensEncoded = Codec.BYTE1.encode(tokens);
556 } else {
557 final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec);
558 tokenCodec = tokenResults.betterCodec;
559 tokensEncoded = tokenResults.encodedBand;
560 if (tokenCodec == null) {
561 tokenCodec = defaultCodec;
562 }
563 l = ((BHSDCodec) tokenCodec).getL();
564 final int h = ((BHSDCodec) tokenCodec).getH();
565 final int s = ((BHSDCodec) tokenCodec).getS();
566 final int b = ((BHSDCodec) tokenCodec).getB();
567 final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0;
568 if (s == 0 && d == 0) {
569 boolean canUseTDefL = true;
570 if (b > 1) {
571 final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h);
572 if (oneLowerB.largest() >= k) {
573 canUseTDefL = false;
574 }
575 }
576 if (canUseTDefL) {
577 switch (l) {
578 case 4:
579 tdefL = 1;
580 break;
581 case 8:
582 tdefL = 2;
583 break;
584 case 16:
585 tdefL = 3;
586 break;
587 case 32:
588 tdefL = 4;
589 break;
590 case 64:
591 tdefL = 5;
592 break;
593 case 128:
594 tdefL = 6;
595 break;
596 case 192:
597 tdefL = 7;
598 break;
599 case 224:
600 tdefL = 8;
601 break;
602 case 240:
603 tdefL = 9;
604 break;
605 case 248:
606 tdefL = 10;
607 break;
608 case 252:
609 tdefL = 11;
610 break;
611 }
612 }
613 }
614 }
615
616 final byte[] favouredEncoded = favouredResults.encodedBand;
617 final byte[] unfavouredEncoded = unfavouredResults.encodedBand;
618 final Codec favouredCodec = favouredResults.betterCodec;
619 final Codec unfavouredCodec = unfavouredResults.betterCodec;
620
621 int specifier = 141 + (favouredCodec == null ? 1 : 0) + 4 * tdefL + (unfavouredCodec == null ? 2 : 0);
622 final IntList extraBandMetadata = new IntList(3);
623 if (favouredCodec != null) {
624 IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add);
625 }
626 if (tdefL == 0) {
627 IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add);
628 }
629 if (unfavouredCodec != null) {
630 IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add);
631 }
632 final int[] extraMetadata = extraBandMetadata.toArray();
633 final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata);
634 if (defaultCodec.isSigned()) {
635 specifier = -1 - specifier;
636 } else {
637 specifier += defaultCodec.getL();
638 }
639 final byte[] firstValueEncoded = defaultCodec.encode(new int[] { specifier });
640 final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length + unfavouredEncoded.length;
641
642 if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) {
643 results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length);
644 final byte[] encodedBand = new byte[totalBandLength];
645 System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length);
646 System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length);
647 System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length, tokensEncoded.length);
648 System.arraycopy(unfavouredEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length,
649 unfavouredEncoded.length);
650 results.encodedBand = encodedBand;
651 results.extraMetadata = extraMetadata;
652 if (l != 0) {
653 results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec);
654 } else {
655 results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec);
656 }
657 }
658 }
659
660
661
662
663 private long[] flatten(final long[][] flags) {
664 int totalSize = 0;
665 for (final long[] flag : flags) {
666 totalSize += flag.length;
667 }
668 final long[] flatArray = new long[totalSize];
669 int index = 0;
670 for (final long[] flag : flags) {
671 for (final long element : flag) {
672 flatArray[index] = element;
673 index++;
674 }
675 }
676 return flatArray;
677 }
678
679
680
681
682
683
684
685 protected int[] integerListToArray(final List<Integer> integerList) {
686 return integerList.stream().mapToInt(Integer::intValue).toArray();
687 }
688
689
690
691
692
693
694
695 protected long[] longListToArray(final List<Long> longList) {
696 return longList.stream().mapToLong(Long::longValue).toArray();
697 }
698
699
700
701
702
703
704
705
706 public abstract void pack(OutputStream out) throws IOException, Pack200Exception;
707
708 private boolean timeToStop(final BandAnalysisResults results) {
709
710
711 if (effort > 6) {
712 return results.numCodecsTried >= effort * 2;
713 }
714 return results.numCodecsTried >= effort;
715
716
717 }
718
719 private void tryCodecs(final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results, final byte[] encoded,
720 final BHSDCodec[] potentialCodecs) throws Pack200Exception {
721 for (final BHSDCodec potential : potentialCodecs) {
722 if (potential.equals(defaultCodec)) {
723 return;
724
725 }
726 if (potential.isDelta()) {
727 if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta && potential.largest() >= bandData.largest
728 && potential.smallest() <= bandData.smallest) {
729
730 final byte[] encoded2 = potential.encode(band);
731 results.numCodecsTried++;
732 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null));
733 final int saved = encoded.length - encoded2.length - specifierEncoded.length;
734 if (saved > results.saved) {
735 results.betterCodec = potential;
736 results.encodedBand = encoded2;
737 results.saved = saved;
738 }
739 }
740 } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) {
741 final byte[] encoded2 = potential.encode(band);
742 results.numCodecsTried++;
743 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null));
744 final int saved = encoded.length - encoded2.length - specifierEncoded.length;
745 if (saved > results.saved) {
746 results.betterCodec = potential;
747 results.encodedBand = encoded2;
748 results.saved = saved;
749 }
750 }
751 if (timeToStop(results)) {
752 return;
753 }
754 }
755 }
756
757 }