View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz77support;
20  
21  /**
22   * Parameters of the {@link LZ77Compressor compressor}.
23   */
24  public final class Parameters {
25  
26      /**
27       * Builder for {@link Parameters} instances.
28       */
29      public static class Builder {
30  
31          private final int windowSize;
32          private int minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength;
33          private Integer niceBackReferenceLength, maxCandidates, lazyThreshold;
34          private Boolean lazyMatches;
35  
36          private Builder(final int windowSize) {
37              if (windowSize < 2 || !isPowerOfTwo(windowSize)) {
38                  throw new IllegalArgumentException("windowSize must be a power of two");
39              }
40              this.windowSize = windowSize;
41              minBackReferenceLength = TRUE_MIN_BACK_REFERENCE_LENGTH;
42              maxBackReferenceLength = windowSize - 1;
43              maxOffset = windowSize - 1;
44              maxLiteralLength = windowSize;
45          }
46  
47          /**
48           * Creates the {@link Parameters} instance.
49           *
50           * @return the configured {@link Parameters} instance.
51           */
52          public Parameters build() {
53              // default settings tuned for a compromise of good compression and acceptable speed
54              final int niceLen = niceBackReferenceLength != null ? niceBackReferenceLength : Math.max(minBackReferenceLength, maxBackReferenceLength / 2);
55              final int candidates = maxCandidates != null ? maxCandidates : Math.max(256, windowSize / 128);
56              final boolean lazy = lazyMatches == null || lazyMatches;
57              final int threshold = lazy ? lazyThreshold != null ? lazyThreshold : niceLen : minBackReferenceLength;
58  
59              return new Parameters(windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength, niceLen, candidates, lazy,
60                      threshold);
61          }
62  
63          /**
64           * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved compression ratio at the cost of
65           * compression speed.
66           * <p>
67           * Use this method after configuring "maximum back-reference length".
68           * </p>
69           *
70           * @return the builder
71           */
72          public Builder tunedForCompressionRatio() {
73              niceBackReferenceLength = lazyThreshold = maxBackReferenceLength;
74              maxCandidates = Math.max(32, windowSize / 16);
75              lazyMatches = true;
76              return this;
77          }
78  
79          /**
80           * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved compression speed at the cost of
81           * compression ratio.
82           * <p>
83           * Use this method after configuring "maximum back-reference length".
84           * </p>
85           *
86           * @return the builder
87           */
88          public Builder tunedForSpeed() {
89              niceBackReferenceLength = Math.max(minBackReferenceLength, maxBackReferenceLength / 8);
90              maxCandidates = Math.max(32, windowSize / 1024);
91              lazyMatches = false;
92              lazyThreshold = minBackReferenceLength;
93              return this;
94          }
95  
96          /**
97           * Sets whether lazy matching should be performed.
98           * <p>
99           * Lazy matching means that after a back-reference for a certain position has been found the compressor will try to find a longer match for the next
100          * position.
101          * </p>
102          * <p>
103          * Lazy matching is enabled by default and disabled when tuning for speed.
104          * </p>
105          *
106          * @param lazy whether lazy matching should be performed
107          * @return the builder
108          */
109         public Builder withLazyMatching(final boolean lazy) {
110             lazyMatches = lazy;
111             return this;
112         }
113 
114         /**
115          * Sets the threshold for lazy matching.
116          * <p>
117          * Even if lazy matching is enabled it will not be performed if the length of the back-reference found for the current position is longer than this
118          * value.
119          * </p>
120          *
121          * @param threshold the threshold for lazy matching
122          * @return the builder
123          */
124         public Builder withLazyThreshold(final int threshold) {
125             lazyThreshold = threshold;
126             return this;
127         }
128 
129         /**
130          * Sets the maximal length of a back-reference.
131          * <p>
132          * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
133          * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
134          * </p>
135          *
136          * @param maxBackReferenceLength maximal length of a back-reference found. A value smaller than {@code minBackReferenceLength} is interpreted as
137          *                               {@code minBackReferenceLength}. {@code maxBackReferenceLength} is capped at {@code windowSize - 1}.
138          * @return the builder
139          */
140         public Builder withMaxBackReferenceLength(final int maxBackReferenceLength) {
141             this.maxBackReferenceLength = maxBackReferenceLength < minBackReferenceLength ? minBackReferenceLength
142                     : Math.min(maxBackReferenceLength, windowSize - 1);
143             return this;
144         }
145 
146         /**
147          * Sets the maximal length of a literal block.
148          * <p>
149          * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
150          * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
151          * </p>
152          *
153          * @param maxLiteralLength maximal length of a literal block. Negative numbers and 0 as well as values bigger than {@code windowSize} are interpreted as
154          *                         {@code windowSize}.
155          * @return the builder
156          */
157         public Builder withMaxLiteralLength(final int maxLiteralLength) {
158             this.maxLiteralLength = maxLiteralLength < 1 ? windowSize : Math.min(maxLiteralLength, windowSize);
159             return this;
160         }
161 
162         /**
163          * Sets the maximum number of back-reference candidates that should be consulted.
164          * <p>
165          * This settings can be used to tune the tradeoff between compression speed and compression ratio.
166          * </p>
167          *
168          * @param maxCandidates maximum number of back-reference candidates
169          * @return the builder
170          */
171         public Builder withMaxNumberOfCandidates(final int maxCandidates) {
172             this.maxCandidates = maxCandidates;
173             return this;
174         }
175 
176         /**
177          * Sets the maximal offset of a back-reference.
178          * <p>
179          * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
180          * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
181          * </p>
182          *
183          * @param maxOffset maximal offset of a back-reference. A non-positive value as well as values bigger than {@code windowSize - 1} are interpreted as
184          *                  {@code windowSize * - 1}.
185          * @return the builder
186          */
187         public Builder withMaxOffset(final int maxOffset) {
188             this.maxOffset = maxOffset < 1 ? windowSize - 1 : Math.min(maxOffset, windowSize - 1);
189             return this;
190         }
191 
192         /**
193          * Sets the minimal length of a back-reference.
194          * <p>
195          * Ensures {@code maxBackReferenceLength} is not smaller than {@code minBackReferenceLength}.
196          * </p>
197          * <p>
198          * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
199          * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
200          * </p>
201          *
202          * @param minBackReferenceLength the minimal length of a back-reference found. A true minimum of 3 is hard-coded inside of this implementation but
203          *                               bigger lengths can be configured.
204          * @throws IllegalArgumentException if {@code windowSize} is smaller than {@code minBackReferenceLength}.
205          * @return the builder
206          */
207         public Builder withMinBackReferenceLength(final int minBackReferenceLength) {
208             this.minBackReferenceLength = Math.max(TRUE_MIN_BACK_REFERENCE_LENGTH, minBackReferenceLength);
209             if (windowSize < this.minBackReferenceLength) {
210                 throw new IllegalArgumentException("minBackReferenceLength can't be bigger than windowSize");
211             }
212             if (maxBackReferenceLength < this.minBackReferenceLength) {
213                 maxBackReferenceLength = this.minBackReferenceLength;
214             }
215             return this;
216         }
217 
218         /**
219          * Sets the "nice length" of a back-reference.
220          * <p>
221          * When a back-references if this size has been found, stop searching for longer back-references.
222          * </p>
223          * <p>
224          * This settings can be used to tune the tradeoff between compression speed and compression ratio.
225          * </p>
226          *
227          * @param niceLen the "nice length" of a back-reference
228          * @return the builder
229          */
230         public Builder withNiceBackReferenceLength(final int niceLen) {
231             niceBackReferenceLength = niceLen;
232             return this;
233         }
234     }
235 
236     /**
237      * The hard-coded absolute minimal length of a back-reference.
238      */
239     public static final int TRUE_MIN_BACK_REFERENCE_LENGTH = LZ77Compressor.NUMBER_OF_BYTES_IN_HASH;
240 
241     /**
242      * Initializes the builder for the compressor's parameters with a {@code minBackReferenceLength} of 3 and {@code max*Length} equal to
243      * {@code windowSize - 1}.
244      * <p>
245      * It is recommended to not use this method directly but rather tune a pre-configured builder created by a format specific factory like
246      * {@link org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.
247      * </p>
248      *
249      * @param windowSize the size of the sliding window - this determines the maximum offset a back-reference can take. Must be a power of two.
250      * @throws IllegalArgumentException if windowSize is not a power of two.
251      * @return a builder configured for the given window size
252      */
253     public static Builder builder(final int windowSize) {
254         return new Builder(windowSize);
255     }
256 
257     private static boolean isPowerOfTwo(final int x) {
258         // pre-condition: x > 0
259         return (x & x - 1) == 0;
260     }
261 
262     private final int windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength, niceBackReferenceLength, maxCandidates,
263             lazyThreshold;
264 
265     private final boolean lazyMatching;
266 
267     private Parameters(final int windowSize, final int minBackReferenceLength, final int maxBackReferenceLength, final int maxOffset,
268             final int maxLiteralLength, final int niceBackReferenceLength, final int maxCandidates, final boolean lazyMatching, final int lazyThreshold) {
269         this.windowSize = windowSize;
270         this.minBackReferenceLength = minBackReferenceLength;
271         this.maxBackReferenceLength = maxBackReferenceLength;
272         this.maxOffset = maxOffset;
273         this.maxLiteralLength = maxLiteralLength;
274         this.niceBackReferenceLength = niceBackReferenceLength;
275         this.maxCandidates = maxCandidates;
276         this.lazyMatching = lazyMatching;
277         this.lazyThreshold = lazyThreshold;
278     }
279 
280     /**
281      * Gets whether to perform lazy matching.
282      *
283      * @return whether to perform lazy matching
284      */
285     public boolean getLazyMatching() {
286         return lazyMatching;
287     }
288 
289     /**
290      * Gets the threshold for lazy matching.
291      *
292      * @return the threshold for lazy matching
293      */
294     public int getLazyMatchingThreshold() {
295         return lazyThreshold;
296     }
297 
298     /**
299      * Gets the maximal length of a back-reference found.
300      *
301      * @return the maximal length of a back-reference found
302      */
303     public int getMaxBackReferenceLength() {
304         return maxBackReferenceLength;
305     }
306 
307     /**
308      * Gets the maximum number of back-reference candidates to consider.
309      *
310      * @return the maximum number of back-reference candidates to consider
311      */
312     public int getMaxCandidates() {
313         return maxCandidates;
314     }
315 
316     /**
317      * Gets the maximal length of a literal block.
318      *
319      * @return the maximal length of a literal block
320      */
321     public int getMaxLiteralLength() {
322         return maxLiteralLength;
323     }
324 
325     /**
326      * Gets the maximal offset of a back-reference found.
327      *
328      * @return the maximal offset of a back-reference found
329      */
330     public int getMaxOffset() {
331         return maxOffset;
332     }
333 
334     /**
335      * Gets the minimal length of a back-reference found.
336      *
337      * @return the minimal length of a back-reference found
338      */
339     public int getMinBackReferenceLength() {
340         return minBackReferenceLength;
341     }
342 
343     /**
344      * Gets the length of a back-reference that is considered nice enough to stop searching for longer ones.
345      *
346      * @return the length of a back-reference that is considered nice enough to stop searching
347      */
348     public int getNiceBackReferenceLength() {
349         return niceBackReferenceLength;
350     }
351 
352     /**
353      * Gets the size of the sliding window - this determines the maximum offset a back-reference can take.
354      *
355      * @return the size of the sliding window
356      */
357     public int getWindowSize() {
358         return windowSize;
359     }
360 }