View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.bzip2;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  import java.util.Arrays;
24  
25  import org.apache.commons.compress.compressors.CompressorOutputStream;
26  import org.apache.commons.io.IOUtils;
27  
28  /**
29   * An output stream that compresses into the BZip2 format into another stream.
30   *
31   * <p>
32   * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force
33   * {@code BZip2CompressorOutputStream} to release the allocated memory.
34   * </p>
35   *
36   * <p>
37   * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower
38   * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input.
39   * </p>
40   *
41   * <p>
42   * You can compute the memory usage for compressing by the following formula:
43   * </p>
44   *
45   * <pre>
46   * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
47   * </pre>
48   *
49   * <p>
50   * To get the memory required for decompression by {@link BZip2CompressorInputStream} use
51   * </p>
52   *
53   * <pre>
54   * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
55   * </pre>
56   *
57   * <table style="width:100%" border="1">
58   * <caption>Memory usage by blocksize</caption>
59   * <tr>
60   * <th colspan="3">Memory usage by blocksize</th>
61   * </tr>
62   * <tr>
63   * <th style="text-align: right">Blocksize</th>
64   * <th style="text-align: right">Compression<br>
65   * memory usage</th>
66   * <th style="text-align: right">Decompression<br>
67   * memory usage</th>
68   * </tr>
69   * <tr>
70   * <td style="text-align: right">100k</td>
71   * <td style="text-align: right">1300k</td>
72   * <td style="text-align: right">565k</td>
73   * </tr>
74   * <tr>
75   * <td style="text-align: right">200k</td>
76   * <td style="text-align: right">2200k</td>
77   * <td style="text-align: right">1065k</td>
78   * </tr>
79   * <tr>
80   * <td style="text-align: right">300k</td>
81   * <td style="text-align: right">3100k</td>
82   * <td style="text-align: right">1565k</td>
83   * </tr>
84   * <tr>
85   * <td style="text-align: right">400k</td>
86   * <td style="text-align: right">4000k</td>
87   * <td style="text-align: right">2065k</td>
88   * </tr>
89   * <tr>
90   * <td style="text-align: right">500k</td>
91   * <td style="text-align: right">4900k</td>
92   * <td style="text-align: right">2565k</td>
93   * </tr>
94   * <tr>
95   * <td style="text-align: right">600k</td>
96   * <td style="text-align: right">5800k</td>
97   * <td style="text-align: right">3065k</td>
98   * </tr>
99   * <tr>
100  * <td style="text-align: right">700k</td>
101  * <td style="text-align: right">6700k</td>
102  * <td style="text-align: right">3565k</td>
103  * </tr>
104  * <tr>
105  * <td style="text-align: right">800k</td>
106  * <td style="text-align: right">7600k</td>
107  * <td style="text-align: right">4065k</td>
108  * </tr>
109  * <tr>
110  * <td style="text-align: right">900k</td>
111  * <td style="text-align: right">8500k</td>
112  * <td style="text-align: right">4565k</td>
113  * </tr>
114  * </table>
115  *
116  * <p>
117  * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block.
118  * </p>
119  *
120  * <p>
121  * Instances of this class are not threadsafe.
122  * </p>
123  *
124  * <p>
125  * TODO: Update to BZip2 1.0.1
126  * </p>
127  *
128  * @NotThreadSafe
129  */
130 public class BZip2CompressorOutputStream extends CompressorOutputStream<OutputStream> implements BZip2Constants {
131 
132     static final class Data {
133 
134         // with blockSize 900k
135         /* maps unsigned byte => "does it occur in block" */
136         final boolean[] inUse = new boolean[256]; // 256 byte
137         final byte[] unseqToSeq = new byte[256]; // 256 byte
138         final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
139         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
140         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
141 
142         final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
143         final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
144         // byte
145         final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
146         // byte
147         final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
148         final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
149         final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
150         // byte
151         final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
152         final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
153 
154         final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
155         final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
156         final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
157 
158         // ------------
159         // 333408 byte
160 
161         /*
162          * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0.
163          */
164         final byte[] block; // 900021 byte
165         /*
166          * maps index in Burrows-Wheeler transformed block => index of byte in original block
167          */
168         final int[] fmap; // 3600000 byte
169         final char[] sfmap; // 3600000 byte
170         // ------------
171         // 8433529 byte
172         // ============
173 
174         /**
175          * Index of original line in Burrows-Wheeler table.
176          *
177          * <p>
178          * This is the index in fmap that points to the last byte of the original data.
179          * </p>
180          */
181         int origPtr;
182 
183         Data(final int blockSize100k) {
184             final int n = blockSize100k * BASEBLOCKSIZE;
185             this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES];
186             this.fmap = new int[n];
187             this.sfmap = new char[2 * n];
188         }
189 
190     }
191 
192     /**
193      * The minimum supported blocksize {@code  == 1}.
194      */
195     public static final int MIN_BLOCKSIZE = 1;
196 
197     /**
198      * The maximum supported blocksize {@code  == 9}.
199      */
200     public static final int MAX_BLOCKSIZE = 9;
201     private static final int GREATER_ICOST = 15;
202 
203     private static final int LESSER_ICOST = 0;
204 
205     /**
206      * Chooses a blocksize based on the given length of the data to compress.
207      *
208      * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns
209      *         {@code MAX_BLOCKSIZE} always.
210      *
211      * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}.
212      */
213     public static int chooseBlockSize(final long inputLength) {
214         return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE;
215     }
216 
217     private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) {
218         int vec = 0;
219         for (int n = minLen; n <= maxLen; n++) {
220             for (int i = 0; i < alphaSize; i++) {
221                 if ((length[i] & 0xff) == n) {
222                     code[i] = vec;
223                     vec++;
224                 }
225             }
226             vec <<= 1;
227         }
228     }
229 
230     private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) {
231         /*
232          * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel.
233          */
234         final int[] heap = dat.heap;
235         final int[] weight = dat.weight;
236         final int[] parent = dat.parent;
237 
238         for (int i = alphaSize; --i >= 0;) {
239             weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
240         }
241 
242         for (boolean tooLong = true; tooLong;) {
243             tooLong = false;
244 
245             int nNodes = alphaSize;
246             int nHeap = 0;
247             heap[0] = 0;
248             weight[0] = 0;
249             parent[0] = -2;
250 
251             for (int i = 1; i <= alphaSize; i++) {
252                 parent[i] = -1;
253                 nHeap++;
254                 heap[nHeap] = i;
255 
256                 int zz = nHeap;
257                 final int tmp = heap[zz];
258                 while (weight[tmp] < weight[heap[zz >> 1]]) {
259                     heap[zz] = heap[zz >> 1];
260                     zz >>= 1;
261                 }
262                 heap[zz] = tmp;
263             }
264 
265             while (nHeap > 1) {
266                 final int n1 = heap[1];
267                 heap[1] = heap[nHeap];
268                 nHeap--;
269 
270                 int yy = 0;
271                 int zz = 1;
272                 int tmp = heap[1];
273 
274                 while (true) {
275                     yy = zz << 1;
276 
277                     if (yy > nHeap) {
278                         break;
279                     }
280 
281                     if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) {
282                         yy++;
283                     }
284 
285                     if (weight[tmp] < weight[heap[yy]]) {
286                         break;
287                     }
288 
289                     heap[zz] = heap[yy];
290                     zz = yy;
291                 }
292 
293                 heap[zz] = tmp;
294 
295                 final int n2 = heap[1];
296                 heap[1] = heap[nHeap];
297                 nHeap--;
298 
299                 yy = 0;
300                 zz = 1;
301                 tmp = heap[1];
302 
303                 while (true) {
304                     yy = zz << 1;
305 
306                     if (yy > nHeap) {
307                         break;
308                     }
309 
310                     if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) {
311                         yy++;
312                     }
313 
314                     if (weight[tmp] < weight[heap[yy]]) {
315                         break;
316                     }
317 
318                     heap[zz] = heap[yy];
319                     zz = yy;
320                 }
321 
322                 heap[zz] = tmp;
323                 nNodes++;
324                 parent[n1] = parent[n2] = nNodes;
325 
326                 final int weight_n1 = weight[n1];
327                 final int weight_n2 = weight[n2];
328                 weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff);
329 
330                 parent[nNodes] = -1;
331                 nHeap++;
332                 heap[nHeap] = nNodes;
333 
334                 tmp = 0;
335                 zz = nHeap;
336                 tmp = heap[zz];
337                 final int weight_tmp = weight[tmp];
338                 while (weight_tmp < weight[heap[zz >> 1]]) {
339                     heap[zz] = heap[zz >> 1];
340                     zz >>= 1;
341                 }
342                 heap[zz] = tmp;
343 
344             }
345 
346             for (int i = 1; i <= alphaSize; i++) {
347                 int j = 0;
348                 int k = i;
349 
350                 for (int parent_k; (parent_k = parent[k]) >= 0;) {
351                     k = parent_k;
352                     j++;
353                 }
354 
355                 len[i - 1] = (byte) j;
356                 if (j > maxLen) {
357                     tooLong = true;
358                 }
359             }
360 
361             if (tooLong) {
362                 for (int i = 1; i < alphaSize; i++) {
363                     int j = weight[i] >> 8;
364                     j = 1 + (j >> 1);
365                     weight[i] = j << 8;
366                 }
367             }
368         }
369     }
370 
371     /**
372      * Index of the last char in the block, so the block size == last + 1.
373      */
374     private int last;
375     /**
376      * Always: in the range 0 .. 9. The current block size is 100000 * this number.
377      */
378     private final int blockSize100k;
379 
380     private int bsBuff;
381 
382     private int bsLive;
383 
384     private final CRC crc = new CRC();
385     private int nInUse;
386 
387     private int nMTF;
388     private int currentChar = -1;
389     private int runLength;
390 
391     private int combinedCRC;
392 
393     private final int allowableBlockSize;
394     /**
395      * All memory intensive stuff.
396      */
397     private Data data;
398 
399     private BlockSort blockSorter;
400 
401     private volatile boolean closed;
402 
403     /**
404      * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
405      *
406      * @param out the destination stream.
407      *
408      * @throws IOException          if an I/O error occurs in the specified stream.
409      * @throws NullPointerException if {@code out == null}.
410      */
411     public BZip2CompressorOutputStream(final OutputStream out) throws IOException {
412         this(out, MAX_BLOCKSIZE);
413     }
414 
415     /**
416      * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
417      *
418      * @param out       the destination stream.
419      * @param blockSize the blockSize as 100k units.
420      *
421      * @throws IOException              if an I/O error occurs in the specified stream.
422      * @throws IllegalArgumentException if {@code (blockSize &lt; 1) || (blockSize &gt; 9)}.
423      * @throws NullPointerException     if {@code out == null}.
424      *
425      * @see #MIN_BLOCKSIZE
426      * @see #MAX_BLOCKSIZE
427      */
428     public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
429         super(out);
430         if (blockSize < 1) {
431             throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
432         }
433         if (blockSize > 9) {
434             throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
435         }
436         this.blockSize100k = blockSize;
437         /* 20 is just a paranoia constant */
438         this.allowableBlockSize = this.blockSize100k * BASEBLOCKSIZE - 20;
439         init();
440     }
441 
442     private void blockSort() {
443         blockSorter.blockSort(data, last);
444     }
445 
446     private void bsFinishedWithStream() throws IOException {
447         while (this.bsLive > 0) {
448             final int ch = this.bsBuff >> 24;
449             this.out.write(ch); // write 8-bit
450             this.bsBuff <<= 8;
451             this.bsLive -= 8;
452         }
453     }
454 
455     private void bsPutInt(final int u) throws IOException {
456         bsW(8, u >> 24 & 0xff);
457         bsW(8, u >> 16 & 0xff);
458         bsW(8, u >> 8 & 0xff);
459         bsW(8, u & 0xff);
460     }
461 
462     private void bsPutUByte(final int c) throws IOException {
463         bsW(8, c);
464     }
465 
466     private void bsW(final int n, final int v) throws IOException {
467         final OutputStream outShadow = this.out;
468         int bsLiveShadow = this.bsLive;
469         int bsBuffShadow = this.bsBuff;
470 
471         while (bsLiveShadow >= 8) {
472             outShadow.write(bsBuffShadow >> 24); // write 8-bit
473             bsBuffShadow <<= 8;
474             bsLiveShadow -= 8;
475         }
476 
477         this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n;
478         this.bsLive = bsLiveShadow + n;
479     }
480 
481     private void checkClosed() throws IOException {
482         if (closed) {
483             throw new IOException("Stream closed");
484         }
485     }
486 
487     @Override
488     public void close() throws IOException {
489         if (!closed) {
490             try {
491                 finish();
492             } finally {
493                 IOUtils.close(out);
494             }
495         }
496     }
497 
498     private void endBlock() throws IOException {
499         final int blockCRC = this.crc.getValue();
500         this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31;
501         this.combinedCRC ^= blockCRC;
502 
503         // empty block at end of file
504         if (this.last == -1) {
505             return;
506         }
507 
508         /* sort the block and establish posn of original string */
509         blockSort();
510 
511         /*
512          * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the
513          * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits,
514          * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal
515          * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files.
516          */
517         bsPutUByte(0x31);
518         bsPutUByte(0x41);
519         bsPutUByte(0x59);
520         bsPutUByte(0x26);
521         bsPutUByte(0x53);
522         bsPutUByte(0x59);
523 
524         /* Now the block's CRC, so it is in a known place. */
525         bsPutInt(blockCRC);
526 
527         /* Now a single bit indicating no randomization. */
528         bsW(1, 0);
529 
530         /* Finally, block's contents proper. */
531         moveToFrontCodeAndSend();
532     }
533 
534     private void endCompression() throws IOException {
535         /*
536          * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it
537          * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.)
538          */
539         bsPutUByte(0x17);
540         bsPutUByte(0x72);
541         bsPutUByte(0x45);
542         bsPutUByte(0x38);
543         bsPutUByte(0x50);
544         bsPutUByte(0x90);
545 
546         bsPutInt(this.combinedCRC);
547         bsFinishedWithStream();
548     }
549 
550     public void finish() throws IOException {
551         if (!closed) {
552             closed = true;
553             try {
554                 if (this.runLength > 0) {
555                     writeRun();
556                 }
557                 this.currentChar = -1;
558                 endBlock();
559                 endCompression();
560             } finally {
561                 this.blockSorter = null;
562                 this.data = null;
563             }
564         }
565     }
566 
567     @Override
568     public void flush() throws IOException {
569         if (out != null) {
570             super.flush();
571         }
572     }
573 
574     /*
575      * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form.
576      *
577      * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
578      */
579     private void generateMTFValues() {
580         final int lastShadow = this.last;
581         final Data dataShadow = this.data;
582         final boolean[] inUse = dataShadow.inUse;
583         final byte[] block = dataShadow.block;
584         final int[] fmap = dataShadow.fmap;
585         final char[] sfmap = dataShadow.sfmap;
586         final int[] mtfFreq = dataShadow.mtfFreq;
587         final byte[] unseqToSeq = dataShadow.unseqToSeq;
588         final byte[] yy = dataShadow.generateMTFValues_yy;
589 
590         // make maps
591         int nInUseShadow = 0;
592         for (int i = 0; i < 256; i++) {
593             if (inUse[i]) {
594                 unseqToSeq[i] = (byte) nInUseShadow;
595                 nInUseShadow++;
596             }
597         }
598         this.nInUse = nInUseShadow;
599 
600         final int eob = nInUseShadow + 1;
601 
602         Arrays.fill(mtfFreq, 0, eob + 1, 0);
603 
604         for (int i = nInUseShadow; --i >= 0;) {
605             yy[i] = (byte) i;
606         }
607 
608         int wr = 0;
609         int zPend = 0;
610 
611         for (int i = 0; i <= lastShadow; i++) {
612             final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
613             byte tmp = yy[0];
614             int j = 0;
615 
616             while (ll_i != tmp) {
617                 j++;
618                 final byte tmp2 = tmp;
619                 tmp = yy[j];
620                 yy[j] = tmp2;
621             }
622             yy[0] = tmp;
623 
624             if (j == 0) {
625                 zPend++;
626             } else {
627                 if (zPend > 0) {
628                     zPend--;
629                     while (true) {
630                         if ((zPend & 1) == 0) {
631                             sfmap[wr] = RUNA;
632                             wr++;
633                             mtfFreq[RUNA]++;
634                         } else {
635                             sfmap[wr] = RUNB;
636                             wr++;
637                             mtfFreq[RUNB]++;
638                         }
639 
640                         if (zPend < 2) {
641                             break;
642                         }
643                         zPend = zPend - 2 >> 1;
644                     }
645                     zPend = 0;
646                 }
647                 sfmap[wr] = (char) (j + 1);
648                 wr++;
649                 mtfFreq[j + 1]++;
650             }
651         }
652 
653         if (zPend > 0) {
654             zPend--;
655             while (true) {
656                 if ((zPend & 1) == 0) {
657                     sfmap[wr] = RUNA;
658                     wr++;
659                     mtfFreq[RUNA]++;
660                 } else {
661                     sfmap[wr] = RUNB;
662                     wr++;
663                     mtfFreq[RUNB]++;
664                 }
665 
666                 if (zPend < 2) {
667                     break;
668                 }
669                 zPend = zPend - 2 >> 1;
670             }
671         }
672 
673         sfmap[wr] = (char) eob;
674         mtfFreq[eob]++;
675         this.nMTF = wr + 1;
676     }
677 
678     /**
679      * Returns the blocksize parameter specified at construction time.
680      *
681      * @return the blocksize parameter specified at construction time
682      */
683     public final int getBlockSize() {
684         return this.blockSize100k;
685     }
686 
687     /**
688      * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating
689      * blockSize100k.
690      *
691      * @throws IOException if the magic bytes could not been written
692      */
693     private void init() throws IOException {
694         bsPutUByte('B');
695         bsPutUByte('Z');
696 
697         this.data = new Data(this.blockSize100k);
698         this.blockSorter = new BlockSort(this.data);
699 
700         // huffmanized magic bytes
701         bsPutUByte('h');
702         bsPutUByte('0' + this.blockSize100k);
703 
704         this.combinedCRC = 0;
705         initBlock();
706     }
707 
708     private void initBlock() {
709         // blockNo++;
710         this.crc.reset();
711         this.last = -1;
712         // ch = 0;
713 
714         final boolean[] inUse = this.data.inUse;
715         for (int i = 256; --i >= 0;) {
716             inUse[i] = false;
717         }
718 
719     }
720 
721     private void moveToFrontCodeAndSend() throws IOException {
722         bsW(24, this.data.origPtr);
723         generateMTFValues();
724         sendMTFValues();
725     }
726 
727     private void sendMTFValues() throws IOException {
728         final byte[][] len = this.data.sendMTFValues_len;
729         final int alphaSize = this.nInUse + 2;
730 
731         for (int t = N_GROUPS; --t >= 0;) {
732             final byte[] len_t = len[t];
733             for (int v = alphaSize; --v >= 0;) {
734                 len_t[v] = GREATER_ICOST;
735             }
736         }
737 
738         /* Decide how many coding tables to use */
739         // assert (this.nMTF > 0) : this.nMTF;
740         final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6;
741 
742         /* Generate an initial set of coding tables */
743         sendMTFValues0(nGroups, alphaSize);
744 
745         /*
746          * Iterate up to N_ITERS times to improve the tables.
747          */
748         final int nSelectors = sendMTFValues1(nGroups, alphaSize);
749 
750         /* Compute MTF values for the selectors. */
751         sendMTFValues2(nGroups, nSelectors);
752 
753         /* Assign actual codes for the tables. */
754         sendMTFValues3(nGroups, alphaSize);
755 
756         /* Transmit the mapping table. */
757         sendMTFValues4();
758 
759         /* Now the selectors. */
760         sendMTFValues5(nGroups, nSelectors);
761 
762         /* Now the coding tables. */
763         sendMTFValues6(nGroups, alphaSize);
764 
765         /* And finally, the block data proper */
766         sendMTFValues7();
767     }
768 
769     private void sendMTFValues0(final int nGroups, final int alphaSize) {
770         final byte[][] len = this.data.sendMTFValues_len;
771         final int[] mtfFreq = this.data.mtfFreq;
772 
773         int remF = this.nMTF;
774         int gs = 0;
775 
776         for (int nPart = nGroups; nPart > 0; nPart--) {
777             final int tFreq = remF / nPart;
778             int ge = gs - 1;
779             int aFreq = 0;
780 
781             for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) {
782                 aFreq += mtfFreq[++ge];
783             }
784 
785             if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) {
786                 aFreq -= mtfFreq[ge--];
787             }
788 
789             final byte[] len_np = len[nPart - 1];
790             for (int v = alphaSize; --v >= 0;) {
791                 if (v >= gs && v <= ge) {
792                     len_np[v] = LESSER_ICOST;
793                 } else {
794                     len_np[v] = GREATER_ICOST;
795                 }
796             }
797 
798             gs = ge + 1;
799             remF -= aFreq;
800         }
801     }
802 
803     private int sendMTFValues1(final int nGroups, final int alphaSize) {
804         final Data dataShadow = this.data;
805         final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
806         final int[] fave = dataShadow.sendMTFValues_fave;
807         final short[] cost = dataShadow.sendMTFValues_cost;
808         final char[] sfmap = dataShadow.sfmap;
809         final byte[] selector = dataShadow.selector;
810         final byte[][] len = dataShadow.sendMTFValues_len;
811         final byte[] len_0 = len[0];
812         final byte[] len_1 = len[1];
813         final byte[] len_2 = len[2];
814         final byte[] len_3 = len[3];
815         final byte[] len_4 = len[4];
816         final byte[] len_5 = len[5];
817         final int nMTFShadow = this.nMTF;
818 
819         int nSelectors = 0;
820 
821         for (int iter = 0; iter < N_ITERS; iter++) {
822             for (int t = nGroups; --t >= 0;) {
823                 fave[t] = 0;
824                 final int[] rfreqt = rfreq[t];
825                 for (int i = alphaSize; --i >= 0;) {
826                     rfreqt[i] = 0;
827                 }
828             }
829 
830             nSelectors = 0;
831 
832             for (int gs = 0; gs < this.nMTF;) {
833                 // Set group start & end marks.
834 
835                 // Calculate the cost of this group as coded by each of the
836                 // coding tables.
837 
838                 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
839 
840                 final byte mask = (byte) 0xff;
841                 if (nGroups == N_GROUPS) {
842                     // unrolled version of the else-block
843 
844                     short cost0 = 0;
845                     short cost1 = 0;
846                     short cost2 = 0;
847                     short cost3 = 0;
848                     short cost4 = 0;
849                     short cost5 = 0;
850 
851                     for (int i = gs; i <= ge; i++) {
852                         final int icv = sfmap[i];
853                         cost0 += (short) (len_0[icv] & mask);
854                         cost1 += (short) (len_1[icv] & mask);
855                         cost2 += (short) (len_2[icv] & mask);
856                         cost3 += (short) (len_3[icv] & mask);
857                         cost4 += (short) (len_4[icv] & mask);
858                         cost5 += (short) (len_5[icv] & mask);
859                     }
860 
861                     cost[0] = cost0;
862                     cost[1] = cost1;
863                     cost[2] = cost2;
864                     cost[3] = cost3;
865                     cost[4] = cost4;
866                     cost[5] = cost5;
867 
868                 } else {
869                     for (int t = nGroups; --t >= 0;) {
870                         cost[t] = 0;
871                     }
872 
873                     for (int i = gs; i <= ge; i++) {
874                         final int icv = sfmap[i];
875                         for (int t = nGroups; --t >= 0;) {
876                             cost[t] += (short) (len[t][icv] & mask);
877                         }
878                     }
879                 }
880 
881                 /*
882                  * Find the coding table which is best for this group, and record its identity in the selector table.
883                  */
884                 int bt = -1;
885                 for (int t = nGroups, bc = 999999999; --t >= 0;) {
886                     final int cost_t = cost[t];
887                     if (cost_t < bc) {
888                         bc = cost_t;
889                         bt = t;
890                     }
891                 }
892 
893                 fave[bt]++;
894                 selector[nSelectors] = (byte) bt;
895                 nSelectors++;
896 
897                 /*
898                  * Increment the symbol frequencies for the selected table.
899                  */
900                 final int[] rfreq_bt = rfreq[bt];
901                 for (int i = gs; i <= ge; i++) {
902                     rfreq_bt[sfmap[i]]++;
903                 }
904 
905                 gs = ge + 1;
906             }
907 
908             /*
909              * Recompute the tables based on the accumulated frequencies.
910              */
911             for (int t = 0; t < nGroups; t++) {
912                 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
913             }
914         }
915 
916         return nSelectors;
917     }
918 
919     private void sendMTFValues2(final int nGroups, final int nSelectors) {
920         // assert (nGroups < 8) : nGroups;
921 
922         final Data dataShadow = this.data;
923         final byte[] pos = dataShadow.sendMTFValues2_pos;
924 
925         for (int i = nGroups; --i >= 0;) {
926             pos[i] = (byte) i;
927         }
928 
929         for (int i = 0; i < nSelectors; i++) {
930             final byte ll_i = dataShadow.selector[i];
931             byte tmp = pos[0];
932             int j = 0;
933 
934             while (ll_i != tmp) {
935                 j++;
936                 final byte tmp2 = tmp;
937                 tmp = pos[j];
938                 pos[j] = tmp2;
939             }
940 
941             pos[0] = tmp;
942             dataShadow.selectorMtf[i] = (byte) j;
943         }
944     }
945 
946     private void sendMTFValues3(final int nGroups, final int alphaSize) {
947         final int[][] code = this.data.sendMTFValues_code;
948         final byte[][] len = this.data.sendMTFValues_len;
949 
950         for (int t = 0; t < nGroups; t++) {
951             int minLen = 32;
952             int maxLen = 0;
953             final byte[] len_t = len[t];
954             for (int i = alphaSize; --i >= 0;) {
955                 final int l = len_t[i] & 0xff;
956                 if (l > maxLen) {
957                     maxLen = l;
958                 }
959                 if (l < minLen) {
960                     minLen = l;
961                 }
962             }
963 
964             // assert (maxLen <= 20) : maxLen;
965             // assert (minLen >= 1) : minLen;
966 
967             hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
968         }
969     }
970 
971     private void sendMTFValues4() throws IOException {
972         final boolean[] inUse = this.data.inUse;
973         final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
974 
975         for (int i = 16; --i >= 0;) {
976             inUse16[i] = false;
977             final int i16 = i * 16;
978             for (int j = 16; --j >= 0;) {
979                 if (inUse[i16 + j]) {
980                     inUse16[i] = true;
981                     break;
982                 }
983             }
984         }
985 
986         for (int i = 0; i < 16; i++) {
987             bsW(1, inUse16[i] ? 1 : 0);
988         }
989 
990         final OutputStream outShadow = this.out;
991         int bsLiveShadow = this.bsLive;
992         int bsBuffShadow = this.bsBuff;
993 
994         for (int i = 0; i < 16; i++) {
995             if (inUse16[i]) {
996                 final int i16 = i * 16;
997                 for (int j = 0; j < 16; j++) {
998                     // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
999                     while (bsLiveShadow >= 8) {
1000                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1001                         bsBuffShadow <<= 8;
1002                         bsLiveShadow -= 8;
1003                     }
1004                     if (inUse[i16 + j]) {
1005                         bsBuffShadow |= 1 << 32 - bsLiveShadow - 1;
1006                     }
1007                     bsLiveShadow++;
1008                 }
1009             }
1010         }
1011 
1012         this.bsBuff = bsBuffShadow;
1013         this.bsLive = bsLiveShadow;
1014     }
1015 
1016     private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException {
1017         bsW(3, nGroups);
1018         bsW(15, nSelectors);
1019 
1020         final OutputStream outShadow = this.out;
1021         final byte[] selectorMtf = this.data.selectorMtf;
1022 
1023         int bsLiveShadow = this.bsLive;
1024         int bsBuffShadow = this.bsBuff;
1025 
1026         for (int i = 0; i < nSelectors; i++) {
1027             for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1028                 // inlined: bsW(1, 1);
1029                 while (bsLiveShadow >= 8) {
1030                     outShadow.write(bsBuffShadow >> 24);
1031                     bsBuffShadow <<= 8;
1032                     bsLiveShadow -= 8;
1033                 }
1034                 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1;
1035                 bsLiveShadow++;
1036             }
1037 
1038             // inlined: bsW(1, 0);
1039             while (bsLiveShadow >= 8) {
1040                 outShadow.write(bsBuffShadow >> 24);
1041                 bsBuffShadow <<= 8;
1042                 bsLiveShadow -= 8;
1043             }
1044             // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1045             bsLiveShadow++;
1046         }
1047 
1048         this.bsBuff = bsBuffShadow;
1049         this.bsLive = bsLiveShadow;
1050     }
1051 
1052     private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException {
1053         final byte[][] len = this.data.sendMTFValues_len;
1054         final OutputStream outShadow = this.out;
1055 
1056         int bsLiveShadow = this.bsLive;
1057         int bsBuffShadow = this.bsBuff;
1058 
1059         for (int t = 0; t < nGroups; t++) {
1060             final byte[] len_t = len[t];
1061             int curr = len_t[0] & 0xff;
1062 
1063             // inlined: bsW(5, curr);
1064             while (bsLiveShadow >= 8) {
1065                 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1066                 bsBuffShadow <<= 8;
1067                 bsLiveShadow -= 8;
1068             }
1069             bsBuffShadow |= curr << 32 - bsLiveShadow - 5;
1070             bsLiveShadow += 5;
1071 
1072             for (int i = 0; i < alphaSize; i++) {
1073                 final int lti = len_t[i] & 0xff;
1074                 while (curr < lti) {
1075                     // inlined: bsW(2, 2);
1076                     while (bsLiveShadow >= 8) {
1077                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1078                         bsBuffShadow <<= 8;
1079                         bsLiveShadow -= 8;
1080                     }
1081                     bsBuffShadow |= 2 << 32 - bsLiveShadow - 2;
1082                     bsLiveShadow += 2;
1083 
1084                     curr++; /* 10 */
1085                 }
1086 
1087                 while (curr > lti) {
1088                     // inlined: bsW(2, 3);
1089                     while (bsLiveShadow >= 8) {
1090                         outShadow.write(bsBuffShadow >> 24); // write 8-bit
1091                         bsBuffShadow <<= 8;
1092                         bsLiveShadow -= 8;
1093                     }
1094                     bsBuffShadow |= 3 << 32 - bsLiveShadow - 2;
1095                     bsLiveShadow += 2;
1096 
1097                     curr--; /* 11 */
1098                 }
1099 
1100                 // inlined: bsW(1, 0);
1101                 while (bsLiveShadow >= 8) {
1102                     outShadow.write(bsBuffShadow >> 24); // write 8-bit
1103                     bsBuffShadow <<= 8;
1104                     bsLiveShadow -= 8;
1105                 }
1106                 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1107                 bsLiveShadow++;
1108             }
1109         }
1110 
1111         this.bsBuff = bsBuffShadow;
1112         this.bsLive = bsLiveShadow;
1113     }
1114 
1115     private void sendMTFValues7() throws IOException {
1116         final Data dataShadow = this.data;
1117         final byte[][] len = dataShadow.sendMTFValues_len;
1118         final int[][] code = dataShadow.sendMTFValues_code;
1119         final OutputStream outShadow = this.out;
1120         final byte[] selector = dataShadow.selector;
1121         final char[] sfmap = dataShadow.sfmap;
1122         final int nMTFShadow = this.nMTF;
1123 
1124         int selCtr = 0;
1125 
1126         int bsLiveShadow = this.bsLive;
1127         int bsBuffShadow = this.bsBuff;
1128 
1129         for (int gs = 0; gs < nMTFShadow;) {
1130             final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1131             final int selector_selCtr = selector[selCtr] & 0xff;
1132             final int[] code_selCtr = code[selector_selCtr];
1133             final byte[] len_selCtr = len[selector_selCtr];
1134 
1135             while (gs <= ge) {
1136                 final int sfmap_i = sfmap[gs];
1137 
1138                 //
1139                 // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1140                 // code_selCtr[sfmap_i]);
1141                 //
1142                 while (bsLiveShadow >= 8) {
1143                     outShadow.write(bsBuffShadow >> 24);
1144                     bsBuffShadow <<= 8;
1145                     bsLiveShadow -= 8;
1146                 }
1147                 final int n = len_selCtr[sfmap_i] & 0xFF;
1148                 bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n;
1149                 bsLiveShadow += n;
1150 
1151                 gs++;
1152             }
1153 
1154             gs = ge + 1;
1155             selCtr++;
1156         }
1157 
1158         this.bsBuff = bsBuffShadow;
1159         this.bsLive = bsLiveShadow;
1160     }
1161 
1162     @Override
1163     public void write(final byte[] buf, int offs, final int len) throws IOException {
1164         if (offs < 0) {
1165             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
1166         }
1167         if (len < 0) {
1168             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
1169         }
1170         if (offs + len > buf.length) {
1171             throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ").");
1172         }
1173         checkClosed();
1174         for (final int hi = offs + len; offs < hi;) {
1175             write0(buf[offs++]);
1176         }
1177     }
1178 
1179     @Override
1180     public void write(final int b) throws IOException {
1181         checkClosed();
1182         write0(b);
1183     }
1184 
1185     /**
1186      * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm.
1187      */
1188     private void write0(int b) throws IOException {
1189         if (this.currentChar != -1) {
1190             b &= 0xff;
1191             if (this.currentChar == b) {
1192                 if (++this.runLength > 254) {
1193                     writeRun();
1194                     this.currentChar = -1;
1195                     this.runLength = 0;
1196                 }
1197                 // else nothing to do
1198             } else {
1199                 writeRun();
1200                 this.runLength = 1;
1201                 this.currentChar = b;
1202             }
1203         } else {
1204             this.currentChar = b & 0xff;
1205             this.runLength++;
1206         }
1207     }
1208 
1209     /**
1210      * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four
1211      * identical bytes).
1212      *
1213      * <p>
1214      * Flushes the current block before writing data if it is full.
1215      * </p>
1216      *
1217      * <p>
1218      * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last
1219      * to point to the last index written minus 1.
1220      * </p>
1221      */
1222     private void writeRun() throws IOException {
1223         final int lastShadow = this.last;
1224 
1225         if (lastShadow < this.allowableBlockSize) {
1226             final int currentCharShadow = this.currentChar;
1227             final Data dataShadow = this.data;
1228             dataShadow.inUse[currentCharShadow] = true;
1229             final byte ch = (byte) currentCharShadow;
1230 
1231             int runLengthShadow = this.runLength;
1232             this.crc.update(currentCharShadow, runLengthShadow);
1233 
1234             switch (runLengthShadow) {
1235             case 1:
1236                 dataShadow.block[lastShadow + 2] = ch;
1237                 this.last = lastShadow + 1;
1238                 break;
1239 
1240             case 2:
1241                 dataShadow.block[lastShadow + 2] = ch;
1242                 dataShadow.block[lastShadow + 3] = ch;
1243                 this.last = lastShadow + 2;
1244                 break;
1245 
1246             case 3: {
1247                 final byte[] block = dataShadow.block;
1248                 block[lastShadow + 2] = ch;
1249                 block[lastShadow + 3] = ch;
1250                 block[lastShadow + 4] = ch;
1251                 this.last = lastShadow + 3;
1252             }
1253                 break;
1254 
1255             default: {
1256                 runLengthShadow -= 4;
1257                 dataShadow.inUse[runLengthShadow] = true;
1258                 final byte[] block = dataShadow.block;
1259                 block[lastShadow + 2] = ch;
1260                 block[lastShadow + 3] = ch;
1261                 block[lastShadow + 4] = ch;
1262                 block[lastShadow + 5] = ch;
1263                 block[lastShadow + 6] = (byte) runLengthShadow;
1264                 this.last = lastShadow + 5;
1265             }
1266                 break;
1267 
1268             }
1269         } else {
1270             endBlock();
1271             initBlock();
1272             writeRun();
1273         }
1274     }
1275 
1276 }