001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026import java.util.stream.IntStream; 027 028/** 029 * Abstract superclass for a set of bands 030 */ 031public abstract class BandSet { 032 033 /** 034 * Results obtained by trying different Codecs to encode a band 035 */ 036 public class BandAnalysisResults { 037 038 // The number of Codecs tried so far 039 private int numCodecsTried; 040 041 // The number of bytes saved by using betterCodec instead of the default codec 042 private int saved; 043 044 // Extra metadata to pass to the segment header (to be appended to the 045 // band_headers band) 046 private int[] extraMetadata; 047 048 // The results of encoding the band with betterCodec 049 private byte[] encodedBand; 050 051 // The best Codec found so far, or should be null if the default is the 052 // best so far 053 private Codec betterCodec; 054 055 } 056 057 /** 058 * BandData represents information about a band, e.g. largest value etc and is used in the heuristics that calculate whether an alternative Codec could make 059 * the encoded band smaller. 060 */ 061 public class BandData { 062 063 private final int[] band; 064 private int smallest = Integer.MAX_VALUE; 065 private int largest = Integer.MIN_VALUE; 066 private int smallestDelta; 067 private int largestDelta; 068 069 private int deltaIsAscending; 070 private int smallDeltaCount; 071 072 private double averageAbsoluteDelta; 073 private double averageAbsoluteValue; 074 075 private Map<Integer, Integer> distinctValues; 076 077 /** 078 * Constructs a new instance of BandData. The band is then analysed. 079 * 080 * @param band the band of integers 081 */ 082 public BandData(final int[] band) { 083 this.band = band; 084 final Integer one = Integer.valueOf(1); 085 for (int i = 0; i < band.length; i++) { 086 if (band[i] < smallest) { 087 smallest = band[i]; 088 } 089 if (band[i] > largest) { 090 largest = band[i]; 091 } 092 if (i != 0) { 093 final int delta = band[i] - band[i - 1]; 094 if (delta < smallestDelta) { 095 smallestDelta = delta; 096 } 097 if (delta > largestDelta) { 098 largestDelta = delta; 099 } 100 if (delta >= 0) { 101 deltaIsAscending++; 102 } 103 averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1); 104 if (Math.abs(delta) < 256) { 105 smallDeltaCount++; 106 } 107 } else { 108 smallestDelta = band[0]; 109 largestDelta = band[0]; 110 } 111 averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length; 112 if (effort > 3) { // do calculations needed to consider population codec 113 if (distinctValues == null) { 114 distinctValues = new HashMap<>(); 115 } 116 final Integer value = Integer.valueOf(band[i]); 117 Integer count = distinctValues.get(value); 118 if (count == null) { 119 count = one; 120 } else { 121 count = Integer.valueOf(count.intValue() + 1); 122 } 123 distinctValues.put(value, count); 124 } 125 } 126 } 127 128 /** 129 * Returns true if any band elements are negative. 130 * 131 * @return true if any band elements are negative. 132 */ 133 public boolean anyNegatives() { 134 return smallest < 0; 135 } 136 137 /** 138 * Returns true if the band deltas are mainly positive (heuristic). 139 * 140 * @return true if the band deltas are mainly positive (heuristic). 141 */ 142 public boolean mainlyPositiveDeltas() { 143 // Note: the value below has been tuned - please test carefully if changing it 144 return (float) deltaIsAscending / (float) band.length > 0.95F; 145 } 146 147 /** 148 * Returns true if the deltas between adjacent band elements are mainly small (heuristic). 149 * 150 * @return true if the deltas between adjacent band elements are mainly small (heuristic). 151 */ 152 public boolean mainlySmallDeltas() { 153 // Note: the value below has been tuned - please test carefully if changing it 154 return (float) smallDeltaCount / (float) band.length > 0.7F; 155 } 156 157 /** 158 * Returns the total number of distinct values found in the band. 159 * 160 * @return the total number of distinct values found in the band. 161 */ 162 public int numDistinctValues() { 163 if (distinctValues == null) { 164 return band.length; 165 } 166 return distinctValues.size(); 167 } 168 169 /** 170 * Returns true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 171 * 172 * @return true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 173 */ 174 public boolean wellCorrelated() { 175 // Note: the value below has been tuned - please test carefully if changing it 176 return averageAbsoluteDelta * 3.1 < averageAbsoluteValue; 177 } 178 179 } 180 181 private static final byte[] EMPTY_BYTE_ARRAY = {}; 182 183 // Minimum size of band for each effort level where we consider alternative codecs 184 // Note: these values have been tuned - please test carefully if changing them 185 private static final int[] effortThresholds = { 0, 0, 1000, 500, 100, 100, 100, 100, 100, 0 }; 186 187 protected final SegmentHeader segmentHeader; 188 final int effort; 189 190 private long[] canonicalLargest; 191 192 private long[] canonicalSmallest; 193 194 /** 195 * Constructs a new BandSet. 196 * 197 * @param effort the packing effort to be used (must be 1-9) 198 * @param header the segment header 199 */ 200 public BandSet(final int effort, final SegmentHeader header) { 201 this.effort = effort; 202 this.segmentHeader = header; 203 } 204 205 private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec) throws Pack200Exception { 206 207 final BandAnalysisResults results = new BandAnalysisResults(); 208 209 if (canonicalLargest == null) { 210 canonicalLargest = new long[116]; 211 canonicalSmallest = new long[116]; 212 for (int i = 1; i < canonicalLargest.length; i++) { 213 canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest(); 214 canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest(); 215 } 216 } 217 final BandData bandData = new BandData(band); 218 219 // Check that there is a reasonable saving to be made 220 final byte[] encoded = defaultCodec.encode(band); 221 results.encodedBand = encoded; 222 223 // Note: these values have been tuned - please test carefully if changing them 224 if (encoded.length <= band.length + 23 - 2 * effort) { // TODO: tweak 225 return results; 226 } 227 228 // Check if we can use BYTE1 as that's a 1:1 mapping if we can 229 if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) { 230 results.encodedBand = Codec.BYTE1.encode(band); 231 results.betterCodec = Codec.BYTE1; 232 return results; 233 } 234 235 // Consider a population codec (but can't be nested) 236 if (effort > 3 && !name.equals("POPULATION")) { 237 final int numDistinctValues = bandData.numDistinctValues(); 238 final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length; 239 240 // Note: these values have been tuned - please test carefully if changing them 241 if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02 || effort > 6 && distinctValuesAsProportion < 0.04) { // TODO: tweak 242 encodeWithPopulationCodec(band, defaultCodec, bandData, results); 243 if (timeToStop(results)) { 244 return results; 245 } 246 } 247 } 248 249 final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>(); 250 251 // See if the deltas are mainly small increments 252 if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) { 253 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2); 254 } 255 256 if (bandData.wellCorrelated()) { // Try delta encodings 257 if (bandData.mainlyPositiveDeltas()) { 258 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 259 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 260 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 261 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 262 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 263 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 264 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 265 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 266 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 267 } else { 268 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 269 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 270 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 271 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 272 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 273 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 274 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 275 } 276 } else if (bandData.anyNegatives()) { 277 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 278 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 279 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 280 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 281 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 282 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 283 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 284 } else { 285 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 286 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 287 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 288 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 289 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 290 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 291 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 292 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 293 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 294 } 295 for (final BHSDCodec[] family : codecFamiliesToTry) { 296 tryCodecs(band, defaultCodec, bandData, results, encoded, family); 297 if (timeToStop(results)) { 298 break; 299 } 300 } 301 302 return results; 303 } 304 305 /** 306 * Converts a list of ConstantPoolEntrys to an int[] array of their indices 307 * 308 * @param list conversion source. 309 * @return conversion result. 310 */ 311 protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) { 312 final int[] array = new int[list.size()]; 313 for (int i = 0; i < array.length; i++) { 314 array[i] = list.get(i).getIndex(); 315 if (array[i] < 0) { 316 throw new IllegalArgumentException("Index should be > 0"); 317 } 318 } 319 return array; 320 } 321 322 /** 323 * Converts a list of ConstantPoolEntrys or nulls to an int[] array of their indices +1 (or 0 for nulls) 324 * 325 * @param list conversion source. 326 * @return conversion result. 327 */ 328 protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) { 329 final int[] array = new int[list.size()]; 330 for (int j = 0; j < array.length; j++) { 331 final ConstantPoolEntry cpEntry = list.get(j); 332 array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1; 333 if (cpEntry != null && cpEntry.getIndex() < 0) { 334 throw new IllegalArgumentException("Index should be > 0"); 335 } 336 } 337 return array; 338 } 339 340 /** 341 * Encode a band of integers. The default codec may be used, but other Codecs are considered if effort is greater than 1. 342 * 343 * @param name name of the band (used for debugging) 344 * @param ints the band 345 * @param defaultCodec the default Codec 346 * @return the encoded band 347 * @throws Pack200Exception TODO 348 */ 349 public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec) throws Pack200Exception { 350 byte[] encodedBand = null; 351 // Useful for debugging 352// if (ints.length > 0) { 353// System.out.println("encoding " + name + " " + ints.length); 354// } 355 if (effort > 1 && ints.length >= effortThresholds[effort]) { 356 final BandAnalysisResults results = analyseBand(name, ints, defaultCodec); 357 final Codec betterCodec = results.betterCodec; 358 encodedBand = results.encodedBand; 359 if (betterCodec != null) { 360 if (betterCodec instanceof BHSDCodec) { 361 final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec); 362 int specifier = specifierBand[0]; 363 if (specifierBand.length > 1) { 364 for (int i = 1; i < specifierBand.length; i++) { 365 segmentHeader.appendBandCodingSpecifier(specifierBand[i]); 366 } 367 } 368 if (defaultCodec.isSigned()) { 369 specifier = -1 - specifier; 370 } else { 371 specifier += defaultCodec.getL(); 372 } 373 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 374 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 375 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 376 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 377 return band; 378 } 379 if (betterCodec instanceof PopulationCodec) { 380 IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier); 381 return encodedBand; 382 } 383 if (betterCodec instanceof RunCodec) { 384 // TODO? 385 } 386 } 387 } 388 389 // If we get here then we've decided to use the default codec. 390 if (ints.length > 0) { 391 if (encodedBand == null) { 392 encodedBand = defaultCodec.encode(ints); 393 } 394 final int first = ints[0]; 395 if (defaultCodec.getB() != 1) { 396 if (defaultCodec.isSigned() && first >= -256 && first <= -1) { 397 final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec); 398 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 399 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 400 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 401 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 402 return band; 403 } 404 if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) { 405 final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL(); 406 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 407 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 408 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 409 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 410 return band; 411 } 412 } 413 return encodedBand; 414 } 415 return EMPTY_BYTE_ARRAY; 416 } 417 418 /** 419 * Encode a band of longs (values are split into their high and low 32 bits and then encoded as two separate bands 420 * 421 * @param name name of the band (for debugging purposes) 422 * @param flags the band 423 * @param loCodec Codec for the low 32-bits band 424 * @param hiCodec Codec for the high 32-bits band 425 * @param haveHiFlags ignores the high band if true as all values would be zero 426 * @return the encoded band 427 * @throws Pack200Exception TODO 428 */ 429 protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags) 430 throws Pack200Exception { 431 if (!haveHiFlags) { 432 final int[] loBits = new int[flags.length]; 433 Arrays.setAll(loBits, i -> (int) flags[i]); 434 return encodeBandInt(name, loBits, loCodec); 435 } 436 final int[] hiBits = new int[flags.length]; 437 final int[] loBits = new int[flags.length]; 438 for (int i = 0; i < flags.length; i++) { 439 final long l = flags[i]; 440 hiBits[i] = (int) (l >> 32); 441 loBits[i] = (int) l; 442 } 443 final byte[] hi = encodeBandInt(name, hiBits, hiCodec); 444 final byte[] lo = encodeBandInt(name, loBits, loCodec); 445 final byte[] total = new byte[hi.length + lo.length]; 446 System.arraycopy(hi, 0, total, 0, hi.length); 447 System.arraycopy(lo, 0, total, hi.length + 1, lo.length); 448 return total; 449 } 450 451// This could be useful if further enhancements are done but is not currently used 452// 453// private void encodeWithRunCodec(String name, int[] band, int index, 454// BHSDCodec defaultCodec, BandData bandData, 455// BandAnalysisResults results) throws Pack200Exception { 456// int[] firstBand = new int[index]; 457// int[] secondBand = new int[band.length - index]; 458// System.arraycopy(band, 0, firstBand, 0, index); 459// System.arraycopy(band, index, secondBand, 0, secondBand.length); 460// BandAnalysisResults firstResults = analyseBand(name + "A", firstBand, defaultCodec); 461// BandAnalysisResults secondResults = analyseBand(name + "B", secondBand, defaultCodec); 462// int specifier = 117; 463// byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 464// int totalLength = firstResults.encodedBand.length + secondResults.encodedBand.length + specifierEncoded.length + 4; // TODO actual 465// if (totalLength < results.encodedBand.length) { 466// System.out.println("using run codec"); 467// results.saved += results.encodedBand.length - totalLength; 468// byte[] encodedBand = new byte[specifierEncoded.length + firstResults.encodedBand.length + secondResults.encodedBand.length]; 469// System.arraycopy(specifierEncoded, 0, encodedBand, 0, specifierEncoded.length); 470// System.arraycopy(firstResults.encodedBand, 0, encodedBand, specifierEncoded.length, firstResults.encodedBand.length); 471// System.arraycopy(secondResults.encodedBand, 0, encodedBand, specifierEncoded.length + firstResults.encodedBand.length, 472// secondResults.encodedBand.length); 473// results.encodedBand = encodedBand; 474// results.betterCodec = new RunCodec(index, firstResults.betterCodec, secondResults.betterCodec); 475// } 476// } 477 478 protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags) 479 throws Pack200Exception { 480 return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags); 481 } 482 483 /** 484 * Encode a single value with the given Codec 485 * 486 * @param value the value to encode 487 * @param codec Codec to use 488 * @return the encoded value 489 * @throws Pack200Exception TODO 490 */ 491 public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception { 492 return codec.encode(value); 493 } 494 495 /** 496 * Encode a band without considering other Codecs 497 * 498 * @param band the band 499 * @param codec the Codec to use 500 * @return the encoded band 501 * @throws Pack200Exception TODO 502 */ 503 public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception { 504 return codec.encode(band); 505 } 506 507 private void encodeWithPopulationCodec(final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results) 508 throws Pack200Exception { 509 results.numCodecsTried += 3; // quite a bit more effort to try this codec 510 final Map<Integer, Integer> distinctValues = bandData.distinctValues; 511 512 final List<Integer> favored = new ArrayList<>(); 513 distinctValues.forEach((k, v) -> { 514 if (v.intValue() > 2 || distinctValues.size() < 256) { // TODO: tweak 515 favored.add(k); 516 } 517 }); 518 519 // Sort the favored list with the most commonly occurring first 520 if (distinctValues.size() > 255) { 521 favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0))); 522 } 523 524 final Map<Integer, Integer> favoredToIndex = new HashMap<>(); 525 for (int i = 0; i < favored.size(); i++) { 526 favoredToIndex.put(favored.get(i), Integer.valueOf(i)); 527 } 528 529 final IntList unfavoured = new IntList(); 530 final int[] tokens = new int[band.length]; 531 for (int i = 0; i < band.length; i++) { 532 final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i])); 533 if (favouredIndex == null) { 534 tokens[i] = 0; 535 unfavoured.add(band[i]); 536 } else { 537 tokens[i] = favouredIndex.intValue() + 1; 538 } 539 } 540 favored.add(favored.get(favored.size() - 1)); // repeat last value 541 final int[] favouredBand = integerListToArray(favored); 542 final int[] unfavouredBand = unfavoured.toArray(); 543 544 // Analyse the three bands to get the best codec 545 final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec); 546 final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec); 547 548 int tdefL = 0; 549 int l = 0; 550 Codec tokenCodec = null; 551 byte[] tokensEncoded; 552 final int k = favored.size() - 1; 553 if (k < 256) { 554 tdefL = 1; 555 tokensEncoded = Codec.BYTE1.encode(tokens); 556 } else { 557 final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec); 558 tokenCodec = tokenResults.betterCodec; 559 tokensEncoded = tokenResults.encodedBand; 560 if (tokenCodec == null) { 561 tokenCodec = defaultCodec; 562 } 563 l = ((BHSDCodec) tokenCodec).getL(); 564 final int h = ((BHSDCodec) tokenCodec).getH(); 565 final int s = ((BHSDCodec) tokenCodec).getS(); 566 final int b = ((BHSDCodec) tokenCodec).getB(); 567 final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0; 568 if (s == 0 && d == 0) { 569 boolean canUseTDefL = true; 570 if (b > 1) { 571 final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h); 572 if (oneLowerB.largest() >= k) { 573 canUseTDefL = false; 574 } 575 } 576 if (canUseTDefL) { 577 switch (l) { 578 case 4: 579 tdefL = 1; 580 break; 581 case 8: 582 tdefL = 2; 583 break; 584 case 16: 585 tdefL = 3; 586 break; 587 case 32: 588 tdefL = 4; 589 break; 590 case 64: 591 tdefL = 5; 592 break; 593 case 128: 594 tdefL = 6; 595 break; 596 case 192: 597 tdefL = 7; 598 break; 599 case 224: 600 tdefL = 8; 601 break; 602 case 240: 603 tdefL = 9; 604 break; 605 case 248: 606 tdefL = 10; 607 break; 608 case 252: 609 tdefL = 11; 610 break; 611 } 612 } 613 } 614 } 615 616 final byte[] favouredEncoded = favouredResults.encodedBand; 617 final byte[] unfavouredEncoded = unfavouredResults.encodedBand; 618 final Codec favouredCodec = favouredResults.betterCodec; 619 final Codec unfavouredCodec = unfavouredResults.betterCodec; 620 621 int specifier = 141 + (favouredCodec == null ? 1 : 0) + 4 * tdefL + (unfavouredCodec == null ? 2 : 0); 622 final IntList extraBandMetadata = new IntList(3); 623 if (favouredCodec != null) { 624 IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add); 625 } 626 if (tdefL == 0) { 627 IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add); 628 } 629 if (unfavouredCodec != null) { 630 IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add); 631 } 632 final int[] extraMetadata = extraBandMetadata.toArray(); 633 final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata); 634 if (defaultCodec.isSigned()) { 635 specifier = -1 - specifier; 636 } else { 637 specifier += defaultCodec.getL(); 638 } 639 final byte[] firstValueEncoded = defaultCodec.encode(new int[] { specifier }); 640 final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length + unfavouredEncoded.length; 641 642 if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) { 643 results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length); 644 final byte[] encodedBand = new byte[totalBandLength]; 645 System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length); 646 System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length); 647 System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length, tokensEncoded.length); 648 System.arraycopy(unfavouredEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length, 649 unfavouredEncoded.length); 650 results.encodedBand = encodedBand; 651 results.extraMetadata = extraMetadata; 652 if (l != 0) { 653 results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec); 654 } else { 655 results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec); 656 } 657 } 658 } 659 660 /* 661 * Flatten a 2-dimension array into a 1-dimension array 662 */ 663 private long[] flatten(final long[][] flags) { 664 int totalSize = 0; 665 for (final long[] flag : flags) { 666 totalSize += flag.length; 667 } 668 final long[] flatArray = new long[totalSize]; 669 int index = 0; 670 for (final long[] flag : flags) { 671 for (final long element : flag) { 672 flatArray[index] = element; 673 index++; 674 } 675 } 676 return flatArray; 677 } 678 679 /** 680 * Converts a list of Integers to an int[] array. 681 * 682 * @param integerList conversion source. 683 * @return conversion result. 684 */ 685 protected int[] integerListToArray(final List<Integer> integerList) { 686 return integerList.stream().mapToInt(Integer::intValue).toArray(); 687 } 688 689 /** 690 * Converts a list of Longs to an long[] array. 691 * 692 * @param longList conversion source. 693 * @return conversion result. 694 */ 695 protected long[] longListToArray(final List<Long> longList) { 696 return longList.stream().mapToLong(Long::longValue).toArray(); 697 } 698 699 /** 700 * Write the packed set of bands to the given output stream 701 * 702 * @param out TODO 703 * @throws IOException If an I/O error occurs. 704 * @throws Pack200Exception TODO 705 */ 706 public abstract void pack(OutputStream out) throws IOException, Pack200Exception; 707 708 private boolean timeToStop(final BandAnalysisResults results) { 709 // if tried more than effort number of codecs for this band then return 710 // Note: these values have been tuned - please test carefully if changing them 711 if (effort > 6) { 712 return results.numCodecsTried >= effort * 2; 713 } 714 return results.numCodecsTried >= effort; 715 // May want to also check how much we've saved if performance needs improving, e.g. saved more than effort*2 % 716 // || (float) results.saved/(float) results.encodedBand.length > (float) effort * 2/100; 717 } 718 719 private void tryCodecs(final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results, final byte[] encoded, 720 final BHSDCodec[] potentialCodecs) throws Pack200Exception { 721 for (final BHSDCodec potential : potentialCodecs) { 722 if (potential.equals(defaultCodec)) { 723 return; // don't try codecs with greater cardinality in the same 'family' as the default codec as there 724 // won't be any savings 725 } 726 if (potential.isDelta()) { 727 if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta && potential.largest() >= bandData.largest 728 && potential.smallest() <= bandData.smallest) { 729 // TODO: can represent some negative deltas with overflow 730 final byte[] encoded2 = potential.encode(band); 731 results.numCodecsTried++; 732 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 733 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 734 if (saved > results.saved) { 735 results.betterCodec = potential; 736 results.encodedBand = encoded2; 737 results.saved = saved; 738 } 739 } 740 } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) { 741 final byte[] encoded2 = potential.encode(band); 742 results.numCodecsTried++; 743 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 744 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 745 if (saved > results.saved) { 746 results.betterCodec = potential; 747 results.encodedBand = encoded2; 748 results.saved = saved; 749 } 750 } 751 if (timeToStop(results)) { 752 return; 753 } 754 } 755 } 756 757}