View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.util.Arrays;
21  import java.util.Objects;
22  import java.util.function.Supplier;
23  
24  import org.apache.commons.codec.BinaryDecoder;
25  import org.apache.commons.codec.BinaryEncoder;
26  import org.apache.commons.codec.CodecPolicy;
27  import org.apache.commons.codec.DecoderException;
28  import org.apache.commons.codec.EncoderException;
29  
30  /**
31   * Abstract superclass for Base-N encoders and decoders.
32   *
33   * <p>
34   * This class is thread-safe.
35   * </p>
36   * <p>
37   * You can set the decoding behavior when the input bytes contain leftover trailing bits that cannot be created by a
38   * valid encoding. These can be bits that are unused from the final character or entire characters. The default mode is
39   * lenient decoding.
40   * </p>
41   * <ul>
42   * <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible. The remainder are discarded.
43   * <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits are not part of a valid
44   * encoding. Any unused bits from the final character must be zero. Impossible counts of entire final characters are not
45   * allowed.
46   * </ul>
47   * <p>
48   * When strict decoding is enabled it is expected that the decoded bytes will be re-encoded to a byte array that matches
49   * the original, i.e. no changes occur on the final character. This requires that the input bytes use the same padding
50   * and alphabet as the encoder.
51   * </p>
52   */
53  public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
54  
55      /**
56       * Builds {@link Base64} instances.
57       *
58       * @param <T> the codec type to build.
59       * @param <B> the codec builder subtype.
60       * @since 1.17.0
61       */
62      public abstract static class AbstractBuilder<T, B extends AbstractBuilder<T, B>> implements Supplier<T> {
63  
64          private CodecPolicy decodingPolicy = DECODING_POLICY_DEFAULT;
65          private int lineLength;
66          private byte[] lineSeparator = CHUNK_SEPARATOR;
67          private final byte[] defaultEncodeTable;
68          private byte[] encodeTable;
69          /** Padding byte. */
70          private byte padding = PAD_DEFAULT;
71  
72          AbstractBuilder(final byte[] defaultEncodeTable) {
73              this.defaultEncodeTable = defaultEncodeTable;
74              this.encodeTable = defaultEncodeTable;
75          }
76  
77          @SuppressWarnings("unchecked")
78          B asThis() {
79              return (B) this;
80          }
81  
82          CodecPolicy getDecodingPolicy() {
83              return decodingPolicy;
84          }
85  
86          byte[] getEncodeTable() {
87              return encodeTable;
88          }
89  
90          int getLineLength() {
91              return lineLength;
92          }
93  
94          byte[] getLineSeparator() {
95              return lineSeparator;
96          }
97  
98          byte getPadding() {
99              return padding;
100         }
101 
102         /**
103          * Sets the decoding policy.
104          *
105          * @param decodingPolicy the decoding policy, null resets to the default.
106          * @return {@code this} instance.
107          */
108         public B setDecodingPolicy(final CodecPolicy decodingPolicy) {
109             this.decodingPolicy = decodingPolicy != null ? decodingPolicy : DECODING_POLICY_DEFAULT;
110             return asThis();
111         }
112 
113         /**
114          * Sets the encode table.
115          *
116          * @param encodeTable the encode table, null resets to the default.
117          * @return {@code this} instance.
118          */
119         public B setEncodeTable(final byte... encodeTable) {
120             this.encodeTable = encodeTable != null ? encodeTable : defaultEncodeTable;
121             return asThis();
122         }
123 
124         /**
125          * Sets the line length.
126          *
127          * @param lineLength the line length, less than 0 resets to the default.
128          * @return {@code this} instance.
129          */
130         public B setLineLength(final int lineLength) {
131             this.lineLength = Math.max(0, lineLength);
132             return asThis();
133         }
134 
135         /**
136          * Sets the line separator.
137          *
138          * @param lineSeparator the line separator, null resets to the default.
139          * @return {@code this} instance.
140          */
141         public B setLineSeparator(final byte... lineSeparator) {
142             this.lineSeparator = lineSeparator != null ? lineSeparator : CHUNK_SEPARATOR;
143             return asThis();
144         }
145 
146         /**
147          * Sets the padding byte.
148          *
149          * @param padding the padding byte.
150          * @return {@code this} instance.
151          */
152         public B setPadding(final byte padding) {
153             this.padding = padding;
154             return asThis();
155         }
156 
157     }
158 
159     /**
160      * Holds thread context so classes can be thread-safe.
161      *
162      * This class is not itself thread-safe; each thread must allocate its own copy.
163      */
164     static class Context {
165 
166         /**
167          * Placeholder for the bytes we're dealing with for our based logic.
168          * Bitwise operations store and extract the encoding or decoding from this variable.
169          */
170         int ibitWorkArea;
171 
172         /**
173          * Placeholder for the bytes we're dealing with for our based logic.
174          * Bitwise operations store and extract the encoding or decoding from this variable.
175          */
176         long lbitWorkArea;
177 
178         /**
179          * Buffer for streaming.
180          */
181         byte[] buffer;
182 
183         /**
184          * Position where next character should be written in the buffer.
185          */
186         int pos;
187 
188         /**
189          * Position where next character should be read from the buffer.
190          */
191         int readPos;
192 
193         /**
194          * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
195          * and must be thrown away.
196          */
197         boolean eof;
198 
199         /**
200          * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
201          * it to make sure each encoded line never goes beyond lineLength (if lineLength &gt; 0).
202          */
203         int currentLinePos;
204 
205         /**
206          * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
207          * variable helps track that.
208          */
209         int modulus;
210 
211         /**
212          * Returns a String useful for debugging (especially within a debugger.)
213          *
214          * @return a String useful for debugging.
215          */
216         @Override
217         public String toString() {
218             return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
219                     "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
220                     currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
221         }
222     }
223 
224     /**
225      * EOF
226      *
227      * @since 1.7
228      */
229     static final int EOF = -1;
230 
231     /**
232      *  MIME chunk size per RFC 2045 section 6.8.
233      *
234      * <p>
235      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
236      * equal signs.
237      * </p>
238      *
239      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
240      */
241     public static final int MIME_CHUNK_SIZE = 76;
242 
243     /**
244      * PEM chunk size per RFC 1421 section 4.3.2.4.
245      *
246      * <p>
247      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
248      * equal signs.
249      * </p>
250      *
251      * @see <a href="https://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
252      */
253     public static final int PEM_CHUNK_SIZE = 64;
254 
255     private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
256 
257     /**
258      * Defines the default buffer size - currently {@value}
259      * - must be large enough for at least one encoded block+separator
260      */
261     private static final int DEFAULT_BUFFER_SIZE = 8192;
262 
263     /**
264      * The maximum size buffer to allocate.
265      *
266      * <p>This is set to the same size used in the JDK {@link java.util.ArrayList}:</p>
267      * <blockquote>
268      * Some VMs reserve some header words in an array.
269      * Attempts to allocate larger arrays may result in
270      * OutOfMemoryError: Requested array size exceeds VM limit.
271      * </blockquote>
272      */
273     private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8;
274 
275     /** Mask used to extract 8 bits, used in decoding bytes */
276     protected static final int MASK_8BITS = 0xff;
277 
278     /**
279      * Byte used to pad output.
280      */
281     protected static final byte PAD_DEFAULT = '='; // Allow static access to default
282 
283     /**
284      * The default decoding policy.
285      * @since 1.15
286      */
287     protected static final CodecPolicy DECODING_POLICY_DEFAULT = CodecPolicy.LENIENT;
288 
289     /**
290      * Chunk separator per RFC 2045 section 2.1.
291      *
292      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
293      */
294     static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
295 
296     /**
297      * Create a positive capacity at least as large the minimum required capacity.
298      * If the minimum capacity is negative then this throws an OutOfMemoryError as no array
299      * can be allocated.
300      *
301      * @param minCapacity the minimum capacity
302      * @return the capacity
303      * @throws OutOfMemoryError if the {@code minCapacity} is negative
304      */
305     private static int createPositiveCapacity(final int minCapacity) {
306         if (minCapacity < 0) {
307             // overflow
308             throw new OutOfMemoryError("Unable to allocate array size: " + (minCapacity & 0xffffffffL));
309         }
310         // This is called when we require buffer expansion to a very big array.
311         // Use the conservative maximum buffer size if possible, otherwise the biggest required.
312         //
313         // Note: In this situation JDK 1.8 java.util.ArrayList returns Integer.MAX_VALUE.
314         // This excludes some VMs that can exceed MAX_BUFFER_SIZE but not allocate a full
315         // Integer.MAX_VALUE length array.
316         // The result is that we may have to allocate an array of this size more than once if
317         // the capacity must be expanded again.
318         return Math.max(minCapacity, MAX_BUFFER_SIZE);
319     }
320 
321     /**
322      * Gets a copy of the chunk separator per RFC 2045 section 2.1.
323      *
324      * @return the chunk separator
325      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
326      * @since 1.15
327      */
328     public static byte[] getChunkSeparator() {
329         return CHUNK_SEPARATOR.clone();
330     }
331 
332     /**
333      * Checks if a byte value is whitespace or not.
334      * @param byteToCheck
335      *            the byte to check
336      * @return true if byte is whitespace, false otherwise
337      * @see Character#isWhitespace(int)
338      * @deprecated Use {@link Character#isWhitespace(int)}.
339      */
340     @Deprecated
341     protected static boolean isWhiteSpace(final byte byteToCheck) {
342         return Character.isWhitespace(byteToCheck);
343     }
344 
345     /**
346      * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
347      * @param context the context to be used
348      * @param minCapacity the minimum required capacity
349      * @return the resized byte[] buffer
350      * @throws OutOfMemoryError if the {@code minCapacity} is negative
351      */
352     private static byte[] resizeBuffer(final Context context, final int minCapacity) {
353         // Overflow-conscious code treats the min and new capacity as unsigned.
354         final int oldCapacity = context.buffer.length;
355         int newCapacity = oldCapacity * DEFAULT_BUFFER_RESIZE_FACTOR;
356         if (Integer.compareUnsigned(newCapacity, minCapacity) < 0) {
357             newCapacity = minCapacity;
358         }
359         if (Integer.compareUnsigned(newCapacity, MAX_BUFFER_SIZE) > 0) {
360             newCapacity = createPositiveCapacity(minCapacity);
361         }
362         final byte[] b = Arrays.copyOf(context.buffer, newCapacity);
363         context.buffer = b;
364         return b;
365     }
366 
367     /**
368      * Gets the array length or 0 if null.
369      *
370      * @param array the array or null.
371      * @return the array length or 0 if null.
372      */
373     static int toLength(final byte[] array) {
374         return array == null ? 0 : array.length;
375     }
376 
377     /**
378      * @deprecated Use {@link #pad}. Will be removed in 2.0.
379      */
380     @Deprecated
381     protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
382 
383     /** Pad byte. Instance variable just in case it needs to vary later. */
384     protected final byte pad;
385 
386     /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
387     private final int unencodedBlockSize;
388 
389     /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
390     private final int encodedBlockSize;
391 
392     /**
393      * Chunksize for encoding. Not used when decoding.
394      * A value of zero or less implies no chunking of the encoded data.
395      * Rounded down to the nearest multiple of encodedBlockSize.
396      */
397     protected final int lineLength;
398 
399     /**
400      * Size of chunk separator. Not used unless {@link #lineLength} &gt; 0.
401      */
402     private final int chunkSeparatorLength;
403 
404     /**
405      * Defines the decoding behavior when the input bytes contain leftover trailing bits that
406      * cannot be created by a valid encoding. These can be bits that are unused from the final
407      * character or entire characters. The default mode is lenient decoding. Set this to
408      * {@code true} to enable strict decoding.
409      * <ul>
410      * <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible.
411      *     The remainder are discarded.
412      * <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits
413      *     are not part of a valid encoding. Any unused bits from the final character must
414      *     be zero. Impossible counts of entire final characters are not allowed.
415      * </ul>
416      * <p>
417      * When strict decoding is enabled it is expected that the decoded bytes will be re-encoded
418      * to a byte array that matches the original, i.e. no changes occur on the final
419      * character. This requires that the input bytes use the same padding and alphabet
420      * as the encoder.
421      * </p>
422      */
423     private final CodecPolicy decodingPolicy;
424 
425     /**
426      * Constructs a new instance.
427      * <p>
428      * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size.
429      * If {@code chunkSeparatorLength} is zero, then chunking is disabled.
430      * </p>
431      *
432      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
433      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
434      * @param lineLength if &gt; 0, use chunking with a length {@code lineLength}
435      * @param chunkSeparatorLength the chunk separator length, if relevant
436      */
437     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength) {
438         this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
439     }
440 
441     /**
442      * Constructs a new instance.
443      * <p>
444      * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size.
445      * If {@code chunkSeparatorLength} is zero, then chunking is disabled.
446      * </p>
447      *
448      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
449      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
450      * @param lineLength if &gt; 0, use chunking with a length {@code lineLength}
451      * @param chunkSeparatorLength the chunk separator length, if relevant
452      * @param pad byte used as padding byte.
453      */
454     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength, final byte pad) {
455         this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, pad, DECODING_POLICY_DEFAULT);
456     }
457 
458     /**
459      * Constructs a new instance.
460      * <p>
461      * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size.
462      * If {@code chunkSeparatorLength} is zero, then chunking is disabled.
463      * </p>
464      *
465      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
466      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
467      * @param lineLength if &gt; 0, use chunking with a length {@code lineLength}
468      * @param chunkSeparatorLength the chunk separator length, if relevant
469      * @param pad byte used as padding byte.
470      * @param decodingPolicy Decoding policy.
471      * @since 1.15
472      */
473     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength, final byte pad,
474             final CodecPolicy decodingPolicy) {
475         this.unencodedBlockSize = unencodedBlockSize;
476         this.encodedBlockSize = encodedBlockSize;
477         final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
478         this.lineLength = useChunking ? lineLength / encodedBlockSize * encodedBlockSize : 0;
479         this.chunkSeparatorLength = chunkSeparatorLength;
480         this.pad = pad;
481         this.decodingPolicy = Objects.requireNonNull(decodingPolicy, "codecPolicy");
482     }
483 
484     /**
485      * Returns the amount of buffered data available for reading.
486      *
487      * @param context the context to be used
488      * @return The amount of buffered data available for reading.
489      */
490     int available(final Context context) {  // package protected for access from I/O streams
491         return hasData(context) ? context.pos - context.readPos : 0;
492     }
493 
494     /**
495      * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
496      *
497      * Intended for use in checking line-ending arrays
498      *
499      * @param arrayOctet
500      *            byte array to test
501      * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise
502      */
503     protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
504         if (arrayOctet != null) {
505             for (final byte element : arrayOctet) {
506                 if (pad == element || isInAlphabet(element)) {
507                     return true;
508                 }
509             }
510         }
511         return false;
512     }
513 
514     /**
515      * Decodes a byte[] containing characters in the Base-N alphabet.
516      *
517      * @param pArray
518      *            A byte array containing Base-N character data
519      * @return a byte array containing binary data
520      */
521     @Override
522     public byte[] decode(final byte[] pArray) {
523         if (BinaryCodec.isEmpty(pArray)) {
524             return pArray;
525         }
526         final Context context = new Context();
527         decode(pArray, 0, pArray.length, context);
528         decode(pArray, 0, EOF, context); // Notify decoder of EOF.
529         final byte[] result = new byte[context.pos];
530         readResults(result, 0, result.length, context);
531         return result;
532     }
533 
534     // package protected for access from I/O streams
535     abstract void decode(byte[] pArray, int i, int length, Context context);
536 
537     /**
538      * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
539      * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
540      *
541      * @param obj
542      *            Object to decode
543      * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
544      *         supplied.
545      * @throws DecoderException
546      *             if the parameter supplied is not of type byte[]
547      */
548     @Override
549     public Object decode(final Object obj) throws DecoderException {
550         if (obj instanceof byte[]) {
551             return decode((byte[]) obj);
552         }
553         if (obj instanceof String) {
554             return decode((String) obj);
555         }
556         throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
557     }
558 
559     /**
560      * Decodes a String containing characters in the Base-N alphabet.
561      *
562      * @param pArray
563      *            A String containing Base-N character data
564      * @return a byte array containing binary data
565      */
566     public byte[] decode(final String pArray) {
567         return decode(StringUtils.getBytesUtf8(pArray));
568     }
569 
570     /**
571      * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
572      *
573      * @param pArray
574      *            a byte array containing binary data
575      * @return A byte array containing only the base N alphabetic character data
576      */
577     @Override
578     public byte[] encode(final byte[] pArray) {
579         if (BinaryCodec.isEmpty(pArray)) {
580             return pArray;
581         }
582         return encode(pArray, 0, pArray.length);
583     }
584 
585     /**
586      * Encodes a byte[] containing binary data, into a byte[] containing
587      * characters in the alphabet.
588      *
589      * @param pArray
590      *            a byte array containing binary data
591      * @param offset
592      *            initial offset of the subarray.
593      * @param length
594      *            length of the subarray.
595      * @return A byte array containing only the base N alphabetic character data
596      * @since 1.11
597      */
598     public byte[] encode(final byte[] pArray, final int offset, final int length) {
599         if (BinaryCodec.isEmpty(pArray)) {
600             return pArray;
601         }
602         final Context context = new Context();
603         encode(pArray, offset, length, context);
604         encode(pArray, offset, EOF, context); // Notify encoder of EOF.
605         final byte[] buf = new byte[context.pos - context.readPos];
606         readResults(buf, 0, buf.length, context);
607         return buf;
608     }
609 
610     // package protected for access from I/O streams
611     abstract void encode(byte[] pArray, int i, int length, Context context);
612 
613     /**
614      * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
615      * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
616      *
617      * @param obj
618      *            Object to encode
619      * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
620      * @throws EncoderException
621      *             if the parameter supplied is not of type byte[]
622      */
623     @Override
624     public Object encode(final Object obj) throws EncoderException {
625         if (!(obj instanceof byte[])) {
626             throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
627         }
628         return encode((byte[]) obj);
629     }
630 
631     /**
632      * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
633      * Uses UTF8 encoding.
634      * <p>
635      * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring.
636      * </p>
637      *
638      * @param pArray a byte array containing binary data
639      * @return String containing only character data in the appropriate alphabet.
640      * @since 1.5
641     */
642     public String encodeAsString(final byte[] pArray) {
643         return StringUtils.newStringUtf8(encode(pArray));
644     }
645 
646     /**
647      * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
648      * Uses UTF8 encoding.
649      *
650      * @param pArray
651      *            a byte array containing binary data
652      * @return A String containing only Base-N character data
653      */
654     public String encodeToString(final byte[] pArray) {
655         return StringUtils.newStringUtf8(encode(pArray));
656     }
657 
658     /**
659      * Ensure that the buffer has room for {@code size} bytes
660      *
661      * @param size minimum spare space required
662      * @param context the context to be used
663      * @return the buffer
664      */
665     protected byte[] ensureBufferSize(final int size, final Context context) {
666         if (context.buffer == null) {
667             context.buffer = new byte[Math.max(size, getDefaultBufferSize())];
668             context.pos = 0;
669             context.readPos = 0;
670             // Overflow-conscious:
671             // x + y > z == x + y - z > 0
672         } else if (context.pos + size - context.buffer.length > 0) {
673             return resizeBuffer(context, context.pos + size);
674         }
675         return context.buffer;
676     }
677 
678     /**
679      * Returns the decoding behavior policy.
680      *
681      * <p>
682      * The default is lenient. If the decoding policy is strict, then decoding will raise an
683      * {@link IllegalArgumentException} if trailing bits are not part of a valid encoding. Decoding will compose
684      * trailing bits into 8-bit bytes and discard the remainder.
685      * </p>
686      *
687      * @return true if using strict decoding
688      * @since 1.15
689      */
690     public CodecPolicy getCodecPolicy() {
691         return decodingPolicy;
692     }
693 
694     /**
695      * Gets the default buffer size. Can be overridden.
696      *
697      * @return the default buffer size.
698      */
699     protected int getDefaultBufferSize() {
700         return DEFAULT_BUFFER_SIZE;
701     }
702 
703     /**
704      * Calculates the amount of space needed to encode the supplied array.
705      *
706      * @param pArray byte[] array which will later be encoded
707      *
708      * @return amount of space needed to encode the supplied array.
709      * Returns a long since a max-len array will require &gt; Integer.MAX_VALUE
710      */
711     public long getEncodedLength(final byte[] pArray) {
712         // Calculate non-chunked size - rounded up to allow for padding
713         // cast to long is needed to avoid possibility of overflow
714         long len = (pArray.length + unencodedBlockSize - 1) / unencodedBlockSize * (long) encodedBlockSize;
715         if (lineLength > 0) { // We're using chunking
716             // Round up to nearest multiple
717             len += (len + lineLength - 1) / lineLength * chunkSeparatorLength;
718         }
719         return len;
720     }
721 
722     /**
723      * Returns true if this object has buffered data for reading.
724      *
725      * @param context the context to be used
726      * @return true if there is data still available for reading.
727      */
728     boolean hasData(final Context context) {  // package protected for access from I/O streams
729         return context.pos > context.readPos;
730     }
731 
732     /**
733      * Returns whether or not the {@code octet} is in the current alphabet.
734      * Does not allow whitespace or pad.
735      *
736      * @param value The value to test
737      *
738      * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise.
739      */
740     protected abstract boolean isInAlphabet(byte value);
741 
742     /**
743      * Tests a given byte array to see if it contains only valid characters within the alphabet.
744      * The method optionally treats whitespace and pad as valid.
745      *
746      * @param arrayOctet byte array to test
747      * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed
748      *
749      * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty;
750      *         {@code false}, otherwise
751      */
752     public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
753         for (final byte octet : arrayOctet) {
754             if (!isInAlphabet(octet) && (!allowWSPad || octet != pad && !Character.isWhitespace(octet))) {
755                 return false;
756             }
757         }
758         return true;
759     }
760 
761     /**
762      * Tests a given String to see if it contains only valid characters within the alphabet.
763      * The method treats whitespace and PAD as valid.
764      *
765      * @param basen String to test
766      * @return {@code true} if all characters in the String are valid characters in the alphabet or if
767      *         the String is empty; {@code false}, otherwise
768      * @see #isInAlphabet(byte[], boolean)
769      */
770     public boolean isInAlphabet(final String basen) {
771         return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
772     }
773 
774     /**
775      * Returns true if decoding behavior is strict. Decoding will raise an {@link IllegalArgumentException} if trailing
776      * bits are not part of a valid encoding.
777      *
778      * <p>
779      * The default is false for lenient decoding. Decoding will compose trailing bits into 8-bit bytes and discard the
780      * remainder.
781      * </p>
782      *
783      * @return true if using strict decoding
784      * @since 1.15
785      */
786     public boolean isStrictDecoding() {
787         return decodingPolicy == CodecPolicy.STRICT;
788     }
789 
790     /**
791      * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
792      * bytes. Returns how many bytes were actually extracted.
793      * <p>
794      * Package private for access from I/O streams.
795      * </p>
796      *
797      * @param b
798      *            byte[] array to extract the buffered data into.
799      * @param bPos
800      *            position in byte[] array to start extraction at.
801      * @param bAvail
802      *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
803      * @param context
804      *            the context to be used
805      * @return The number of bytes successfully extracted into the provided byte[] array.
806      */
807     int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
808         if (hasData(context)) {
809             final int len = Math.min(available(context), bAvail);
810             System.arraycopy(context.buffer, context.readPos, b, bPos, len);
811             context.readPos += len;
812             if (!hasData(context)) {
813                 // All data read.
814                 // Reset position markers but do not set buffer to null to allow its reuse.
815                 // hasData(context) will still return false, and this method will return 0 until
816                 // more data is available, or -1 if EOF.
817                 context.pos = context.readPos = 0;
818             }
819             return len;
820         }
821         return context.eof ? EOF : 0;
822     }
823 }