BlockSort.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.bzip2;
import java.util.Arrays;
import java.util.BitSet;
/**
* Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link BZip2CompressorOutputStream}.
*
* <p>
* This class is based on a Java port of Julian Seward's blocksort.c in his libbzip2
* </p>
*
* <p>
* The Burrows-Wheeler transform is a reversible transform of the original data that is supposed to group similar bytes close to each other. The idea is to sort
* all permutations of the input and only keep the last byte of each permutation. E.g. for "Commons Compress" you'd get:
* </p>
*
* <pre>
* CompressCommons
* Commons Compress
* CompressCommons
* essCommons Compr
* mmons CompressCo
* mons CompressCom
* mpressCommons Co
* ns CompressCommo
* ommons CompressC
* ompressCommons C
* ons CompressComm
* pressCommons Com
* ressCommons Comp
* s CompressCommon
* sCommons Compres
* ssCommons Compre
* </pre>
*
* <p>
* Which results in a new text "ss romooCCmmpnse", in adition the index of the first line that contained the original text is kept - in this case it is 1. The
* idea is that in a long English text all permutations that start with "he" are likely suffixes of a "the" and thus they end in "t" leading to a larger block
* of "t"s that can better be compressed by the subsequent Move-to-Front, run-length und Huffman encoding steps.
* </p>
*
* <p>
* For more information see for example:
* </p>
* <ul>
* <li><a href="http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf">Burrows, M. and Wheeler, D.: A Block-sorting Lossless Data Compression
* Algorithm</a></li>
* <li><a href="http://webglimpse.net/pubs/suffix.pdf">Manber, U. and Myers, G.: Suffix arrays: A new method for on-line string searches</a></li>
* <li><a href="http://www.cs.tufts.edu/~nr/comp150fp/archive/bob-sedgewick/fast-strings.pdf">Bentley, J.L. and Sedgewick, R.: Fast Algorithms for Sorting and
* Searching Strings</a></li>
* </ul>
*
* @NotThreadSafe
*/
final class BlockSort {
/*
* Some of the constructs used in the C code cannot be ported literally to Java - for example macros, unsigned types. Some code has been hand-tuned to
* improve performance. In order to avoid memory pressure some structures are reused for several blocks and some memory is even shared between sorting and
* the MTF stage even though either algorithm uses it for its own purpose.
*
* Comments preserved from the actual C code are prefixed with "LBZ2:".
*/
/*
* 2012-05-20 Stefan Bodewig:
*
* This class seems to mix several revisions of libbzip2's code. The mainSort function and those used by it look closer to the 0.9.5 version but show some
* variations introduced later. At the same time the logic of Compress 1.4 to randomize the block on bad input has been dropped after libbzip2 0.9.0 and
* replaced by a fallback sorting algorithm.
*
* I've added the fallbackSort function of 1.0.6 and tried to integrate it with the existing code without touching too much. I've also removed the now
* unused randomization code.
*/
private static final int FTAB_LENGTH = 65537; // 262148 byte
/*
* LBZ2: If you are ever unlucky/improbable enough to get a stack overflow whilst sorting, increase the following constant and try again. In practice I have
* never seen the stack go above 27 elems, so the following limit seems very generous.
*/
private static final int QSORT_STACK_SIZE = 1000;
private static final int FALLBACK_QSORT_STACK_SIZE = 100;
private static final int STACK_SIZE = Math.max(QSORT_STACK_SIZE, FALLBACK_QSORT_STACK_SIZE);
private static final int FALLBACK_QSORT_SMALL_THRESH = 10;
/*
* LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here. Possibly because the number of elems to sort is usually small, typically <=
* 20.
*/
private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484 };
private static final int SMALL_THRESH = 20;
private static final int DEPTH_THRESH = 10;
private static final int WORK_FACTOR = 30;
private static final int SETMASK = 1 << 21;
private static final int CLEARMASK = ~SETMASK;
private static int med3(final int a, final int b, final int c) {
return a < b ? b < c ? b : a < c ? c : a : b > c ? b : a > c ? c : a;
}
private static void vswap(final int[] fmap, int p1, int p2, int n) {
n += p1;
while (p1 < n) {
final int t = fmap[p1];
fmap[p1++] = fmap[p2];
fmap[p2++] = t;
}
}
/*
* Used when sorting. If too many long comparisons happen, we stop sorting, and use fallbackSort instead.
*/
private int workDone;
private int workLimit;
private boolean firstAttempt;
private final int[] stack_ll = new int[STACK_SIZE]; // 4000 byte
private final int[] stack_hh = new int[STACK_SIZE]; // 4000 byte
/*---------------------------------------------*/
/*---------------------------------------------*/
/*--- LBZ2: Fallback O(N log(N)^2) sorting ---*/
/*--- algorithm, for repetitive blocks ---*/
/*---------------------------------------------*/
/*
* This is the fallback sorting algorithm libbzip2 1.0.6 uses for repetitive or very short inputs.
*
* The idea is inspired by Manber-Myers string suffix sorting algorithm. First a bucket sort places each permutation of the block into a bucket based on its
* first byte. Permutations are represented by pointers to their first character kept in (partially) sorted order inside the array ftab.
*
* The next step visits all buckets in order and performs a quicksort on all permutations of the bucket based on the index of the bucket the second byte of
* the permutation belongs to, thereby forming new buckets. When arrived here the permutations are sorted up to the second character and we have buckets of
* permutations that are identical up to two characters.
*
* Repeat the step of quicksorting each bucket, now based on the bucket holding the sequence of the third and forth character leading to four byte buckets.
* Repeat this doubling of bucket sizes until all buckets only contain single permutations or the bucket size exceeds the block size.
*
* I.e.
*
* "abraba" form three buckets for the chars "a", "b", and "r" in the first step with
*
* fmap = { 'a:' 5, 3, 0, 'b:' 4, 1, 'r', 2 }
*
* when looking at the bucket of "a"s the second characters are in the buckets that start with fmap-index 0 (rolled over), 3 and 3 respectively, forming two
* new buckets "aa" and "ab", so we get
*
* fmap = { 'aa:' 5, 'ab:' 3, 0, 'ba:' 4, 'br': 1, 'ra:' 2 }
*
* since the last bucket only contained a single item it didn't have to be sorted at all.
*
* There now is just one bucket with more than one permutation that remains to be sorted. For the permutation that starts with index 3 the third and forth
* char are in bucket 'aa' at index 0 and for the one starting at block index 0 they are in bucket 'ra' with sort index 5. The fully sorted order then
* becomes.
*
* fmap = { 5, 3, 0, 4, 1, 2 }
*/
private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
private final int[] mainSort_runningOrder = new int[256]; // 1024 byte
private final int[] mainSort_copy = new int[256]; // 1024 byte
private final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
private final int[] ftab = new int[FTAB_LENGTH]; // 262148 byte
/**
* Array instance identical to Data's sfmap, both are used only temporarily and indepently, so we do not need to allocate additional memory.
*/
private final char[] quadrant;
private int[] eclass;
/*---------------------------------------------*/
BlockSort(final BZip2CompressorOutputStream.Data data) {
this.quadrant = data.sfmap;
}
void blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
this.workLimit = WORK_FACTOR * last;
this.workDone = 0;
this.firstAttempt = true;
if (last + 1 < 10000) {
fallbackSort(data, last);
} else {
mainSort(data, last);
if (this.firstAttempt && this.workDone > this.workLimit) {
fallbackSort(data, last);
}
}
final int[] fmap = data.fmap;
data.origPtr = -1;
for (int i = 0; i <= last; i++) {
if (fmap[i] == 0) {
data.origPtr = i;
break;
}
}
// assert (data.origPtr != -1) : data.origPtr;
}
/*
* The C code uses an array of ints (each int holding 32 flags) to represents the bucket-start flags (bhtab). It also contains optimizations to skip over 32
* consecutively set or consecutively unset bits on word boundaries at once. For now I've chosen to use the simpler but potentially slower code using BitSet
* - also in the hope that using the BitSet#nextXXX methods may be fast enough.
*/
/**
* @param fmap points to the index of the starting point of a permutation inside the block of data in the current partially sorted order
* @param eclass points from the index of a character inside the block to the first index in fmap that contains the bucket of its suffix that is sorted in
* this step.
* @param loSt lower boundary of the fmap-interval to be sorted
* @param hiSt upper boundary of the fmap-interval to be sorted
*/
private void fallbackQSort3(final int[] fmap, final int[] eclass, final int loSt, final int hiSt) {
int lo, unLo, ltLo, hi, unHi, gtHi, n;
long r = 0;
int sp = 0;
fpush(sp++, loSt, hiSt);
while (sp > 0) {
final int[] s = fpop(--sp);
lo = s[0];
hi = s[1];
if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
fallbackSimpleSort(fmap, eclass, lo, hi);
continue;
}
/*
* LBZ2: Random partitioning. Median of 3 sometimes fails to avoid bad cases. Median of 9 seems to help but looks rather expensive. This too seems
* to work but is cheaper. Guidance for the magic constants 7621 and 32768 is taken from Sedgewick's algorithms book, chapter 35.
*/
r = (r * 7621 + 1) % 32768;
final long r3 = r % 3;
final long med;
if (r3 == 0) {
med = eclass[fmap[lo]];
} else if (r3 == 1) {
med = eclass[fmap[lo + hi >>> 1]];
} else {
med = eclass[fmap[hi]];
}
unLo = ltLo = lo;
unHi = gtHi = hi;
// looks like the ternary partition attributed to Wegner
// in the cited Sedgewick paper
while (true) {
while (true) {
if (unLo > unHi) {
break;
}
n = eclass[fmap[unLo]] - (int) med;
if (n == 0) {
fswap(fmap, unLo, ltLo);
ltLo++;
unLo++;
continue;
}
if (n > 0) {
break;
}
unLo++;
}
while (true) {
if (unLo > unHi) {
break;
}
n = eclass[fmap[unHi]] - (int) med;
if (n == 0) {
fswap(fmap, unHi, gtHi);
gtHi--;
unHi--;
continue;
}
if (n < 0) {
break;
}
unHi--;
}
if (unLo > unHi) {
break;
}
fswap(fmap, unLo, unHi);
unLo++;
unHi--;
}
if (gtHi < ltLo) {
continue;
}
n = Math.min(ltLo - lo, unLo - ltLo);
fvswap(fmap, lo, unLo - n, n);
int m = Math.min(hi - gtHi, gtHi - unHi);
fvswap(fmap, unHi + 1, hi - m + 1, m);
n = lo + unLo - ltLo - 1;
m = hi - (gtHi - unHi) + 1;
if (n - lo > hi - m) {
fpush(sp++, lo, n);
fpush(sp++, m, hi);
} else {
fpush(sp++, m, hi);
fpush(sp++, lo, n);
}
}
}
/*---------------------------------------------*/
/**
* @param fmap points to the index of the starting point of a permutation inside the block of data in the current partially sorted order
* @param eclass points from the index of a character inside the block to the first index in fmap that contains the bucket of its suffix that is sorted in
* this step.
* @param lo lower boundary of the fmap-interval to be sorted
* @param hi upper boundary of the fmap-interval to be sorted
*/
private void fallbackSimpleSort(final int[] fmap, final int[] eclass, final int lo, final int hi) {
if (lo == hi) {
return;
}
int j;
if (hi - lo > 3) {
for (int i = hi - 4; i >= lo; i--) {
final int tmp = fmap[i];
final int ec_tmp = eclass[tmp];
for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) {
fmap[j - 4] = fmap[j];
}
fmap[j - 4] = tmp;
}
}
for (int i = hi - 1; i >= lo; i--) {
final int tmp = fmap[i];
final int ec_tmp = eclass[tmp];
for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) {
fmap[j - 1] = fmap[j];
}
fmap[j - 1] = tmp;
}
}
/**
* Adapt fallbackSort to the expected interface of the rest of the code, in particular deal with the fact that block starts at offset 1 (in libbzip2 1.0.6
* it starts at 0).
*/
void fallbackSort(final BZip2CompressorOutputStream.Data data, final int last) {
data.block[0] = data.block[last + 1];
fallbackSort(data.fmap, data.block, last + 1);
for (int i = 0; i < last + 1; i++) {
--data.fmap[i];
}
for (int i = 0; i < last + 1; i++) {
if (data.fmap[i] == -1) {
data.fmap[i] = last;
break;
}
}
}
/*--
LBZ2: The following is an implementation of
an elegant 3-way quicksort for strings,
described in a paper "Fast Algorithms for
Sorting and Searching Strings", by Robert
Sedgewick and Jon L. Bentley.
--*/
/**
* @param fmap points to the index of the starting point of a permutation inside the block of data in the current partially sorted order
* @param block the original data
* @param nblock size of the block
*/
void fallbackSort(final int[] fmap, final byte[] block, final int nblock) {
final int[] ftab = new int[257];
int H, i, j, k, l, r, cc, cc1;
int nNotDone;
final int nBhtab;
final int[] eclass = getEclass();
for (i = 0; i < nblock; i++) {
eclass[i] = 0;
}
/*--
LBZ2: Initial 1-char radix sort to generate
initial fmap and initial BH bits.
--*/
for (i = 0; i < nblock; i++) {
ftab[block[i] & 0xff]++;
}
for (i = 1; i < 257; i++) {
ftab[i] += ftab[i - 1];
}
for (i = 0; i < nblock; i++) {
j = block[i] & 0xff;
k = ftab[j] - 1;
ftab[j] = k;
fmap[k] = i;
}
nBhtab = 64 + nblock;
final BitSet bhtab = new BitSet(nBhtab);
for (i = 0; i < 256; i++) {
bhtab.set(ftab[i]);
}
/*--
LBZ2: Inductively refine the buckets. Kind-of an
"exponential radix sort" (!), inspired by the
Manber-Myers suffix array construction algorithm.
--*/
/*-- LBZ2: set sentinel bits for block-end detection --*/
for (i = 0; i < 32; i++) {
bhtab.set(nblock + 2 * i);
bhtab.clear(nblock + 2 * i + 1);
}
/*-- LBZ2: the log(N) loop --*/
H = 1;
while (true) {
j = 0;
for (i = 0; i < nblock; i++) {
if (bhtab.get(i)) {
j = i;
}
k = fmap[i] - H;
if (k < 0) {
k += nblock;
}
eclass[k] = j;
}
nNotDone = 0;
r = -1;
while (true) {
/*-- LBZ2: find the next non-singleton bucket --*/
k = r + 1;
k = bhtab.nextClearBit(k);
l = k - 1;
if (l >= nblock) {
break;
}
k = bhtab.nextSetBit(k + 1);
r = k - 1;
if (r >= nblock) {
break;
}
/*-- LBZ2: now [l, r] bracket current bucket --*/
if (r > l) {
nNotDone += r - l + 1;
fallbackQSort3(fmap, eclass, l, r);
/*-- LBZ2: scan bucket and generate header bits-- */
cc = -1;
for (i = l; i <= r; i++) {
cc1 = eclass[fmap[i]];
if (cc != cc1) {
bhtab.set(i);
cc = cc1;
}
}
}
}
H *= 2;
if (H > nblock || nNotDone == 0) {
break;
}
}
}
private int[] fpop(final int sp) {
return new int[] { stack_ll[sp], stack_hh[sp] };
}
private void fpush(final int sp, final int lz, final int hz) {
stack_ll[sp] = lz;
stack_hh[sp] = hz;
}
/**
* swaps two values in fmap
*/
private void fswap(final int[] fmap, final int zz1, final int zz2) {
final int zztmp = fmap[zz1];
fmap[zz1] = fmap[zz2];
fmap[zz2] = zztmp;
}
/**
* swaps two intervals starting at yyp1 and yyp2 of length yyn inside fmap.
*/
private void fvswap(final int[] fmap, int yyp1, int yyp2, int yyn) {
while (yyn > 0) {
fswap(fmap, yyp1, yyp2);
yyp1++;
yyp2++;
yyn--;
}
}
private int[] getEclass() {
if (eclass == null) {
eclass = new int[quadrant.length / 2];
}
return eclass;
}
/**
* Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
*/
private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, final int loSt, final int hiSt, final int dSt, final int last) {
final int[] stack_ll = this.stack_ll;
final int[] stack_hh = this.stack_hh;
final int[] stack_dd = this.stack_dd;
final int[] fmap = dataShadow.fmap;
final byte[] block = dataShadow.block;
stack_ll[0] = loSt;
stack_hh[0] = hiSt;
stack_dd[0] = dSt;
for (int sp = 1; --sp >= 0;) {
final int lo = stack_ll[sp];
final int hi = stack_hh[sp];
final int d = stack_dd[sp];
if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
if (mainSimpleSort(dataShadow, lo, hi, d, last)) {
return;
}
} else {
final int d1 = d + 1;
final int med = med3(block[fmap[lo] + d1] & 0xff, block[fmap[hi] + d1] & 0xff, block[fmap[lo + hi >>> 1] + d1] & 0xff);
int unLo = lo;
int unHi = hi;
int ltLo = lo;
int gtHi = hi;
while (true) {
while (unLo <= unHi) {
final int n = (block[fmap[unLo] + d1] & 0xff) - med;
if (n == 0) {
final int temp = fmap[unLo];
fmap[unLo++] = fmap[ltLo];
fmap[ltLo++] = temp;
} else if (n < 0) {
unLo++;
} else {
break;
}
}
while (unLo <= unHi) {
final int n = (block[fmap[unHi] + d1] & 0xff) - med;
if (n == 0) {
final int temp = fmap[unHi];
fmap[unHi--] = fmap[gtHi];
fmap[gtHi--] = temp;
} else if (n > 0) {
unHi--;
} else {
break;
}
}
if (unLo > unHi) {
break;
}
final int temp = fmap[unLo];
fmap[unLo++] = fmap[unHi];
fmap[unHi--] = temp;
}
if (gtHi < ltLo) {
stack_ll[sp] = lo;
stack_hh[sp] = hi;
stack_dd[sp] = d1;
} else {
int n = Math.min(ltLo - lo, unLo - ltLo);
vswap(fmap, lo, unLo - n, n);
int m = Math.min(hi - gtHi, gtHi - unHi);
vswap(fmap, unLo, hi - m + 1, m);
n = lo + unLo - ltLo - 1;
m = hi - (gtHi - unHi) + 1;
stack_ll[sp] = lo;
stack_hh[sp] = n;
stack_dd[sp] = d;
sp++;
stack_ll[sp] = n + 1;
stack_hh[sp] = m - 1;
stack_dd[sp] = d1;
sp++;
stack_ll[sp] = m;
stack_hh[sp] = hi;
stack_dd[sp] = d;
}
sp++;
}
}
}
/**
* This is the most hammered method of this class.
*
* <p>
* This is the version using unrolled loops. Normally I never use such ones in Java code. The unrolling has shown a noticeable performance improvement on
* JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the JIT compiler of the vm.
* </p>
*/
private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow, final int lo, final int hi, final int d, final int lastShadow) {
final int bigN = hi - lo + 1;
if (bigN < 2) {
return this.firstAttempt && this.workDone > this.workLimit;
}
int hp = 0;
while (INCS[hp] < bigN) {
hp++;
}
final int[] fmap = dataShadow.fmap;
final char[] quadrant = this.quadrant;
final byte[] block = dataShadow.block;
final int lastPlus1 = lastShadow + 1;
final boolean firstAttemptShadow = this.firstAttempt;
final int workLimitShadow = this.workLimit;
int workDoneShadow = this.workDone;
// Following block contains unrolled code which could be shortened by
// coding it in additional loops.
HP: while (--hp >= 0) {
final int h = INCS[hp];
final int mj = lo + h - 1;
for (int i = lo + h; i <= hi;) {
// copy
for (int k = 3; i <= hi && --k >= 0; i++) {
final int v = fmap[i];
final int vd = v + d;
int j = i;
// for (int a;
// (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
// block, quadrant, lastShadow);
// j -= h) {
// fmap[j] = a;
// }
//
// unrolled version:
// start inline mainGTU
boolean onceRunned = false;
int a = 0;
HAMMER: while (true) {
if (onceRunned) {
fmap[j] = a;
if ((j -= h) <= mj) { // NOSONAR
break HAMMER;
}
} else {
onceRunned = true;
}
a = fmap[j - h];
int i1 = a + d;
int i2 = vd;
// following could be done in a loop, but
// unrolled it for performance:
if (block[i1 + 1] == block[i2 + 1]) {
if (block[i1 + 2] == block[i2 + 2]) {
if (block[i1 + 3] == block[i2 + 3]) {
if (block[i1 + 4] == block[i2 + 4]) {
if (block[i1 + 5] == block[i2 + 5]) {
if (block[i1 += 6] == block[i2 += 6]) { // NOSONAR
int x = lastShadow;
X: while (x > 0) {
x -= 4;
if (block[i1 + 1] == block[i2 + 1]) {
if (quadrant[i1] == quadrant[i2]) {
if (block[i1 + 2] == block[i2 + 2]) {
if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
if (block[i1 + 3] == block[i2 + 3]) {
if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
if (block[i1 + 4] == block[i2 + 4]) {
if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
if ((i1 += 4) >= lastPlus1) { // NOSONAR
i1 -= lastPlus1;
}
if ((i2 += 4) >= lastPlus1) { // NOSONAR
i2 -= lastPlus1;
}
workDoneShadow++;
continue X;
}
if (quadrant[i1 + 3] > quadrant[i2 + 3]) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
if (quadrant[i1 + 2] > quadrant[i2 + 2]) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
if (quadrant[i1 + 1] > quadrant[i2 + 1]) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
if (quadrant[i1] > quadrant[i2]) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
break HAMMER;
} // while x > 0
if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
continue HAMMER;
}
break HAMMER;
}
if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
continue HAMMER;
}
break HAMMER;
} // HAMMER
// end inline mainGTU
fmap[j] = v;
}
if (firstAttemptShadow && i <= hi && workDoneShadow > workLimitShadow) {
break HP;
}
}
}
this.workDone = workDoneShadow;
return firstAttemptShadow && workDoneShadow > workLimitShadow;
}
void mainSort(final BZip2CompressorOutputStream.Data dataShadow, final int lastShadow) {
final int[] runningOrder = this.mainSort_runningOrder;
final int[] copy = this.mainSort_copy;
final boolean[] bigDone = this.mainSort_bigDone;
final int[] ftab = this.ftab;
final byte[] block = dataShadow.block;
final int[] fmap = dataShadow.fmap;
final char[] quadrant = this.quadrant;
final int workLimitShadow = this.workLimit;
final boolean firstAttemptShadow = this.firstAttempt;
// LBZ2: Set up the 2-byte frequency table
Arrays.fill(ftab, 0);
/*
* In the various block-sized structures, live data runs from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area for block.
*/
for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
block[lastShadow + i + 2] = block[i % (lastShadow + 1) + 1];
}
for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES + 1; --i >= 0;) {
quadrant[i] = 0;
}
block[0] = block[lastShadow + 1];
// LBZ2: Complete the initial radix sort:
int c1 = block[0] & 0xff;
for (int i = 0; i <= lastShadow; i++) {
final int c2 = block[i + 1] & 0xff;
ftab[(c1 << 8) + c2]++;
c1 = c2;
}
for (int i = 1; i <= 65536; i++) {
ftab[i] += ftab[i - 1];
}
c1 = block[1] & 0xff;
for (int i = 0; i < lastShadow; i++) {
final int c2 = block[i + 2] & 0xff;
fmap[--ftab[(c1 << 8) + c2]] = i;
c1 = c2;
}
fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
/*
* LBZ2: Now ftab contains the first loc of every small bucket. Calculate the running order, from smallest to largest big bucket.
*/
for (int i = 256; --i >= 0;) {
bigDone[i] = false;
runningOrder[i] = i;
}
// h = 364, 121, 40, 13, 4, 1
for (int h = 364; h != 1;) { // NOSONAR
h /= 3;
for (int i = h; i <= 255; i++) {
final int vv = runningOrder[i];
final int a = ftab[vv + 1 << 8] - ftab[vv << 8];
final int b = h - 1;
int j = i;
for (int ro = runningOrder[j - h]; ftab[ro + 1 << 8] - ftab[ro << 8] > a; ro = runningOrder[j - h]) {
runningOrder[j] = ro;
j -= h;
if (j <= b) {
break;
}
}
runningOrder[j] = vv;
}
}
/*
* LBZ2: The main sorting loop.
*/
for (int i = 0; i <= 255; i++) {
/*
* LBZ2: Process big buckets, starting with the least full.
*/
final int ss = runningOrder[i];
// Step 1:
/*
* LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small buckets [ss, j]. Hopefully previous pointer-scanning phases have already
* completed many of the small buckets [ss, j], so we don't have to sort them at all.
*/
for (int j = 0; j <= 255; j++) {
final int sb = (ss << 8) + j;
final int ftab_sb = ftab[sb];
if ((ftab_sb & SETMASK) != SETMASK) {
final int lo = ftab_sb & CLEARMASK;
final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
if (hi > lo) {
mainQSort3(dataShadow, lo, hi, 2, lastShadow);
if (firstAttemptShadow && this.workDone > workLimitShadow) {
return;
}
}
ftab[sb] = ftab_sb | SETMASK;
}
}
// Step 2:
// LBZ2: Now scan this big bucket so as to synthesise the
// sorted order for small buckets [t, ss] for all t != ss.
for (int j = 0; j <= 255; j++) {
copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
}
for (int j = ftab[ss << 8] & CLEARMASK, hj = ftab[ss + 1 << 8] & CLEARMASK; j < hj; j++) {
final int fmap_j = fmap[j];
c1 = block[fmap_j] & 0xff;
if (!bigDone[c1]) {
fmap[copy[c1]] = fmap_j == 0 ? lastShadow : fmap_j - 1;
copy[c1]++;
}
}
for (int j = 256; --j >= 0;) {
ftab[(j << 8) + ss] |= SETMASK;
}
// Step 3:
/*
* LBZ2: The ss big bucket is now done. Record this fact, and update the quadrant descriptors. Remember to update quadrants in the overshoot area
* too, if necessary. The "if (i < 255)" test merely skips this updating for the last bucket processed, since updating for the last bucket is
* pointless.
*/
bigDone[ss] = true;
if (i < 255) {
final int bbStart = ftab[ss << 8] & CLEARMASK;
final int bbSize = (ftab[ss + 1 << 8] & CLEARMASK) - bbStart;
int shifts = 0;
while (bbSize >> shifts > 65534) {
shifts++;
}
for (int j = 0; j < bbSize; j++) {
final int a2update = fmap[bbStart + j];
final char qVal = (char) (j >> shifts);
quadrant[a2update] = qVal;
if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) {
quadrant[a2update + lastShadow + 1] = qVal;
}
}
}
}
}
}