View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.util.Objects;
21  
22  import org.apache.commons.codec.CodecPolicy;
23  
24  /**
25   * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
26   *
27   * <p>
28   * The class can be parameterized in the following manner with various constructors:
29   * </p>
30   * <ul>
31   * <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
32   * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
33   * <li>Line separator: Default is CRLF ("\r\n")</li>
34   * </ul>
35   * <p>
36   * This class operates directly on byte streams, and not character streams.
37   * </p>
38   * <p>
39   * This class is thread-safe.
40   * </p>
41   * <p>
42   * You can configure instances with the {@link Builder}.
43   * </p>
44   * <pre>
45   * Base32 base32 = Base32.builder()
46   *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
47   *   .setEncodeTable(customEncodeTable)
48   *   .setLineLength(0)                          // default is none
49   *   .setLineSeparator('\r', '\n')              // default is CR LF
50   *   .setPadding('=')                           // default is =
51   *   .get()
52   * </pre>
53   *
54   * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
55   * @since 1.5
56   */
57  public class Base32 extends BaseNCodec {
58  
59      /**
60       * Builds {@link Base32} instances.
61       *
62       * @since 1.17.0
63       */
64      public static class Builder extends AbstractBuilder<Base32, Builder> {
65  
66          /**
67           * Constructs a new instance.
68           */
69          public Builder() {
70              super(ENCODE_TABLE);
71          }
72  
73          @Override
74          public Base32 get() {
75              return new Base32(getLineLength(), getLineSeparator(), getEncodeTable(), getPadding(), getDecodingPolicy());
76          }
77  
78          /**
79           * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
80           * <p>
81           * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
82           * </p>
83           *
84           * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
85           * @return this instance.
86           * @since 1.18.0
87           */
88          public Builder setHexDecodeTable(final boolean useHex) {
89              return setEncodeTable(decodeTable(useHex));
90          }
91  
92          /**
93           * Sets the encode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
94           * <p>
95           * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
96           * </p>
97           *
98           * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
99           * @return this instance.
100          * @since 1.18.0
101          */
102         public Builder setHexEncodeTable(final boolean useHex) {
103             return setEncodeTable(encodeTable(useHex));
104         }
105     }
106 
107     /**
108      * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32
109      * characters.
110      */
111     private static final int BITS_PER_ENCODED_BYTE = 5;
112 
113     private static final int BYTES_PER_ENCODED_BLOCK = 8;
114     private static final int BYTES_PER_UNENCODED_BLOCK = 5;
115     /**
116      * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit
117      * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1.
118      */
119     // @formatter:off
120     private static final byte[] DECODE_TABLE = {
121          //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
122             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
123             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
124             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
125             -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
126             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
127             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
128                                                         -1, -1, -1, -1, -1, // 5b-5f
129             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
130             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
131     };
132     // @formatter:on
133 
134     /**
135      * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in Table 3 of RFC
136      * 4648.
137      */
138     // @formatter:off
139     private static final byte[] ENCODE_TABLE = {
140             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
141             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
142             '2', '3', '4', '5', '6', '7',
143     };
144     // @formatter:on
145 
146     /**
147      * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their
148      * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
149      */
150     // @formatter:off
151     private static final byte[] HEX_DECODE_TABLE = {
152          //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
153             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
154             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
155             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
156              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
157             -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
158             25, 26, 27, 28, 29, 30, 31,                                     // 50-56 P-V
159                                         -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
160             -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
161             25, 26, 27, 28, 29, 30, 31                                      // 70-76 p-v
162     };
163     // @formatter:on
164 
165     /**
166      * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" equivalents as specified in Table 4 of
167      * RFC 4648.
168      */
169     // @formatter:off
170     private static final byte[] HEX_ENCODE_TABLE = {
171             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
172             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
173             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
174     };
175     // @formatter:on
176 
177     /** Mask used to extract 5 bits, used when encoding Base32 bytes */
178     private static final int MASK_5BITS = 0x1f;
179 
180     /** Mask used to extract 4 bits, used when decoding final trailing character. */
181     private static final long MASK_4BITS = 0x0fL;
182 
183     /** Mask used to extract 3 bits, used when decoding final trailing character. */
184     private static final long MASK_3BITS = 0x07L;
185 
186     /** Mask used to extract 2 bits, used when decoding final trailing character. */
187     private static final long MASK_2BITS = 0x03L;
188 
189     /** Mask used to extract 1 bits, used when decoding final trailing character. */
190     private static final long MASK_1BITS = 0x01L;
191 
192     // The static final fields above are used for the original static byte[] methods on Base32.
193     // The private member fields below are used with the new streaming approach, which requires
194     // some state be preserved between calls of encode() and decode().
195 
196     /**
197      * Creates a new Builder.
198      *
199      * @return a new Builder.
200      * @since 1.17.0
201      */
202     public static Builder builder() {
203         return new Builder();
204     }
205 
206     private static byte[] decodeTable(final boolean useHex) {
207         return useHex ? HEX_DECODE_TABLE : DECODE_TABLE;
208     }
209 
210     private static byte[] encodeTable(final boolean useHex) {
211         return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE;
212     }
213 
214     /**
215      * Decode table to use.
216      */
217     private final byte[] decodeTable;
218 
219     /**
220      * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link
221      * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;}
222      */
223     private final int encodeSize;
224 
225     /**
226      * Encode table to use.
227      */
228     private final byte[] encodeTable;
229 
230     /**
231      * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
232      */
233     private final byte[] lineSeparator;
234 
235     /**
236      * Constructs a Base32 codec used for decoding and encoding.
237      * <p>
238      * When encoding the line length is 0 (no chunking).
239      * </p>
240      */
241     public Base32() {
242         this(false);
243     }
244 
245     /**
246      * Constructs a Base32 codec used for decoding and encoding.
247      * <p>
248      * When encoding the line length is 0 (no chunking).
249      * </p>
250      *
251      * @param useHex if {@code true} then use Base32 Hex alphabet
252      */
253     public Base32(final boolean useHex) {
254         this(0, null, useHex, PAD_DEFAULT);
255     }
256 
257     /**
258      * Constructs a Base32 codec used for decoding and encoding.
259      * <p>
260      * When encoding the line length is 0 (no chunking).
261      * </p>
262      *
263      * @param useHex  if {@code true} then use Base32 Hex alphabet
264      * @param padding byte used as padding byte.
265      */
266     public Base32(final boolean useHex, final byte padding) {
267         this(0, null, useHex, padding);
268     }
269 
270     /**
271      * Constructs a Base32 codec used for decoding and encoding.
272      * <p>
273      * When encoding the line length is 0 (no chunking).
274      * </p>
275      *
276      * @param pad byte used as padding byte.
277      */
278     public Base32(final byte pad) {
279         this(false, pad);
280     }
281 
282     /**
283      * Constructs a Base32 codec used for decoding and encoding.
284      * <p>
285      * When encoding the line length is given in the constructor, the line separator is CRLF.
286      * </p>
287      *
288      * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0, then
289      *                   the output will not be divided into lines (chunks). Ignored when decoding.
290      */
291     public Base32(final int lineLength) {
292         this(lineLength, CHUNK_SEPARATOR);
293     }
294 
295     /**
296      * Constructs a Base32 codec used for decoding and encoding.
297      * <p>
298      * When encoding the line length and line separator are given in the constructor.
299      * </p>
300      * <p>
301      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
302      * </p>
303      *
304      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
305      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
306      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
307      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters.
308      */
309     public Base32(final int lineLength, final byte[] lineSeparator) {
310         this(lineLength, lineSeparator, false, PAD_DEFAULT);
311     }
312 
313     /**
314      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
315      * <p>
316      * When encoding the line length and line separator are given in the constructor.
317      * </p>
318      * <p>
319      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
320      * </p>
321      *
322      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
323      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
324      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
325      * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
326      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
327      */
328     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
329         this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
330     }
331 
332     /**
333      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
334      * <p>
335      * When encoding the line length and line separator are given in the constructor.
336      * </p>
337      * <p>
338      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
339      * </p>
340      *
341      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
342      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
343      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
344      * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
345      * @param padding       padding byte.
346      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
347      */
348     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) {
349         this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT);
350     }
351 
352     /**
353      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
354      * <p>
355      * When encoding the line length and line separator are given in the constructor.
356      * </p>
357      * <p>
358      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
359      * </p>
360      *
361      * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
362      *                       then the output will not be divided into lines (chunks). Ignored when decoding.
363      * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
364      * @param useHex         use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
365      * @param padding        padding byte.
366      * @param decodingPolicy The decoding policy.
367      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
368      * @since 1.15
369      */
370     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) {
371         this(lineLength, lineSeparator, encodeTable(useHex), padding, decodingPolicy);
372     }
373 
374     /**
375      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
376      * <p>
377      * When encoding the line length and line separator are given in the constructor.
378      * </p>
379      * <p>
380      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
381      * </p>
382      *
383      * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
384      *                       then the output will not be divided into lines (chunks). Ignored when decoding.
385      * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
386      * @param encodeTable    A Base32 alphabet.
387      * @param padding        padding byte.
388      * @param decodingPolicy The decoding policy.
389      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
390      */
391     private Base32(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable, final byte padding, final CodecPolicy decodingPolicy) {
392         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy);
393         Objects.requireNonNull(encodeTable, "encodeTable");
394         this.encodeTable = encodeTable;
395         this.decodeTable = encodeTable == HEX_ENCODE_TABLE ? HEX_DECODE_TABLE : DECODE_TABLE;
396         if (lineLength > 0) {
397             if (lineSeparator == null) {
398                 throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null");
399             }
400             final byte[] lineSeparatorCopy = lineSeparator.clone();
401             // Must be done after initializing the tables
402             if (containsAlphabetOrPad(lineSeparatorCopy)) {
403                 final String sep = StringUtils.newStringUtf8(lineSeparatorCopy);
404                 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
405             }
406             this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length;
407             this.lineSeparator = lineSeparatorCopy;
408         } else {
409             this.encodeSize = BYTES_PER_ENCODED_BLOCK;
410             this.lineSeparator = null;
411         }
412         if (isInAlphabet(padding) || Character.isWhitespace(padding)) {
413             throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
414         }
415     }
416 
417     /**
418      * <p>
419      * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with
420      * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either.
421      * </p>
422      * <p>
423      * Ignores all non-Base32 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has implications
424      * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity.
425      * </p>
426      * <p>
427      * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using
428      * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position
429      * </p>
430      *
431      * @param input   byte[] array of ASCII data to Base32 decode.
432      * @param inPos   Position to start reading data from.
433      * @param inAvail Amount of bytes available from input for decoding.
434      * @param context the context to be used
435      */
436     @Override
437     void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
438         // package protected for access from I/O streams
439         if (context.eof) {
440             return;
441         }
442         if (inAvail < 0) {
443             context.eof = true;
444         }
445         final int decodeSize = this.encodeSize - 1;
446         for (int i = 0; i < inAvail; i++) {
447             final byte b = input[inPos++];
448             if (b == pad) {
449                 // We're done.
450                 context.eof = true;
451                 break;
452             }
453             final byte[] buffer = ensureBufferSize(decodeSize, context);
454             if (b >= 0 && b < this.decodeTable.length) {
455                 final int result = this.decodeTable[b];
456                 if (result >= 0) {
457                     context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
458                     // collect decoded bytes
459                     context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
460                     if (context.modulus == 0) { // we can output the 5 bytes
461                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS);
462                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
463                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
464                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
465                         buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
466                     }
467                 }
468             }
469         }
470         // Two forms of EOF as far as Base32 decoder is concerned: actual
471         // EOF (-1) and first time '=' character is encountered in stream.
472         // This approach makes the '=' padding characters completely optional.
473         if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
474             final byte[] buffer = ensureBufferSize(decodeSize, context);
475             // We ignore partial bytes, i.e. only multiples of 8 count.
476             // Any combination not part of a valid encoding is either partially decoded
477             // or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
478             // It is not possible to encode with 1, 3, 6 trailing characters.
479             // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
480             // See the encode(byte[]) method EOF section.
481             switch (context.modulus) {
482 //              case 0 : // impossible, as excluded above
483             case 1: // 5 bits - either ignore entirely, or raise an exception
484                 validateTrailingCharacters();
485             case 2: // 10 bits, drop 2 and output one byte
486                 validateCharacter(MASK_2BITS, context);
487                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS);
488                 break;
489             case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception
490                 validateTrailingCharacters();
491                 // Not possible from a valid encoding but decode anyway
492                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS);
493                 break;
494             case 4: // 20 bits = 2*8 + 4
495                 validateCharacter(MASK_4BITS, context);
496                 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
497                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
498                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
499                 break;
500             case 5: // 25 bits = 3*8 + 1
501                 validateCharacter(MASK_1BITS, context);
502                 context.lbitWorkArea = context.lbitWorkArea >> 1;
503                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
504                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
505                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
506                 break;
507             case 6: // 30 bits = 3*8 + 6, or raise an exception
508                 validateTrailingCharacters();
509                 // Not possible from a valid encoding but decode anyway
510                 context.lbitWorkArea = context.lbitWorkArea >> 6;
511                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
512                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
513                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
514                 break;
515             case 7: // 35 bits = 4*8 +3
516                 validateCharacter(MASK_3BITS, context);
517                 context.lbitWorkArea = context.lbitWorkArea >> 3;
518                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
519                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
520                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
521                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
522                 break;
523             default:
524                 // modulus can be 0-7, and we excluded 0,1 already
525                 throw new IllegalStateException("Impossible modulus " + context.modulus);
526             }
527         }
528     }
529 
530     /**
531      * <p>
532      * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with
533      * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5).
534      * </p>
535      *
536      * @param input   byte[] array of binary data to Base32 encode.
537      * @param inPos   Position to start reading data from.
538      * @param inAvail Amount of bytes available from input for encoding.
539      * @param context the context to be used
540      */
541     @Override
542     void encode(final byte[] input, int inPos, final int inAvail, final Context context) {
543         // package protected for access from I/O streams
544         if (context.eof) {
545             return;
546         }
547         // inAvail < 0 is how we're informed of EOF in the underlying data we're
548         // encoding.
549         if (inAvail < 0) {
550             context.eof = true;
551             if (0 == context.modulus && lineLength == 0) {
552                 return; // no leftovers to process and not using chunking
553             }
554             final byte[] buffer = ensureBufferSize(encodeSize, context);
555             final int savedPos = context.pos;
556             switch (context.modulus) { // % 5
557             case 0:
558                 break;
559             case 1: // Only 1 octet; take top 5 bits then remainder
560                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3
561                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2
562                 buffer[context.pos++] = pad;
563                 buffer[context.pos++] = pad;
564                 buffer[context.pos++] = pad;
565                 buffer[context.pos++] = pad;
566                 buffer[context.pos++] = pad;
567                 buffer[context.pos++] = pad;
568                 break;
569             case 2: // 2 octets = 16 bits to use
570                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11
571                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6
572                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1
573                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4
574                 buffer[context.pos++] = pad;
575                 buffer[context.pos++] = pad;
576                 buffer[context.pos++] = pad;
577                 buffer[context.pos++] = pad;
578                 break;
579             case 3: // 3 octets = 24 bits to use
580                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19
581                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14
582                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9
583                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4
584                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1
585                 buffer[context.pos++] = pad;
586                 buffer[context.pos++] = pad;
587                 buffer[context.pos++] = pad;
588                 break;
589             case 4: // 4 octets = 32 bits to use
590                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27
591                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22
592                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17
593                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12
594                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7
595                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2
596                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3
597                 buffer[context.pos++] = pad;
598                 break;
599             default:
600                 throw new IllegalStateException("Impossible modulus " + context.modulus);
601             }
602             context.currentLinePos += context.pos - savedPos; // keep track of current line position
603             // if currentPos == 0 we are at the start of a line, so don't add CRLF
604             if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required
605                 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
606                 context.pos += lineSeparator.length;
607             }
608         } else {
609             for (int i = 0; i < inAvail; i++) {
610                 final byte[] buffer = ensureBufferSize(encodeSize, context);
611                 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
612                 int b = input[inPos++];
613                 if (b < 0) {
614                     b += 256;
615                 }
616                 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
617                 if (0 == context.modulus) { // we have enough bytes to create our output
618                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5BITS];
619                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5BITS];
620                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5BITS];
621                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5BITS];
622                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5BITS];
623                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5BITS];
624                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5BITS];
625                     buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5BITS];
626                     context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
627                     if (lineLength > 0 && lineLength <= context.currentLinePos) {
628                         System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
629                         context.pos += lineSeparator.length;
630                         context.currentLinePos = 0;
631                     }
632                 }
633             }
634         }
635     }
636 
637     /**
638      * Gets the line separator (for testing only).
639      *
640      * @return the line separator.
641      */
642     byte[] getLineSeparator() {
643         return lineSeparator;
644     }
645 
646     /**
647      * Returns whether or not the {@code octet} is in the Base32 alphabet.
648      *
649      * @param octet The value to test
650      * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise.
651      */
652     @Override
653     public boolean isInAlphabet(final byte octet) {
654         return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
655     }
656 
657     /**
658      * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values.
659      * <p>
660      * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits
661      * that will be discarded.
662      * </p>
663      *
664      * @param emptyBitsMask The mask of the lower bits that should be empty
665      * @param context       the context to be used
666      * @throws IllegalArgumentException if the bits being checked contain any non-zero value
667      */
668     private void validateCharacter(final long emptyBitsMask, final Context context) {
669         // Use the long bit work area
670         if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
671             throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
672                     "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero.");
673         }
674     }
675 
676     /**
677      * Validates whether decoding allows final trailing characters that cannot be created during encoding.
678      *
679      * @throws IllegalArgumentException if strict decoding is enabled
680      */
681     private void validateTrailingCharacters() {
682         if (isStrictDecoding()) {
683             throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " +
684                     "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
685         }
686     }
687 }