001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.util.Objects;
021
022import org.apache.commons.codec.CodecPolicy;
023
024/**
025 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
026 *
027 * <p>
028 * The class can be parameterized in the following manner with various constructors:
029 * </p>
030 * <ul>
031 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
032 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
033 * <li>Line separator: Default is CRLF ("\r\n")</li>
034 * </ul>
035 * <p>
036 * This class operates directly on byte streams, and not character streams.
037 * </p>
038 * <p>
039 * This class is thread-safe.
040 * </p>
041 * <p>
042 * You can configure instances with the {@link Builder}.
043 * </p>
044 * <pre>
045 * Base32 base32 = Base32.builder()
046 *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
047 *   .setEncodeTable(customEncodeTable)
048 *   .setLineLength(0)                          // default is none
049 *   .setLineSeparator('\r', '\n')              // default is CR LF
050 *   .setPadding('=')                           // default is =
051 *   .get()
052 * </pre>
053 *
054 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
055 * @since 1.5
056 */
057public class Base32 extends BaseNCodec {
058
059    /**
060     * Builds {@link Base32} instances.
061     *
062     * @since 1.17.0
063     */
064    public static class Builder extends AbstractBuilder<Base32, Builder> {
065
066        /**
067         * Constructs a new instance.
068         */
069        public Builder() {
070            super(ENCODE_TABLE);
071        }
072
073        @Override
074        public Base32 get() {
075            return new Base32(getLineLength(), getLineSeparator(), getEncodeTable(), getPadding(), getDecodingPolicy());
076        }
077
078        /**
079         * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
080         * <p>
081         * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
082         * </p>
083         *
084         * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
085         * @return this instance.
086         * @since 1.18.0
087         */
088        public Builder setHexDecodeTable(final boolean useHex) {
089            return setEncodeTable(decodeTable(useHex));
090        }
091
092        /**
093         * Sets the encode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
094         * <p>
095         * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
096         * </p>
097         *
098         * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
099         * @return this instance.
100         * @since 1.18.0
101         */
102        public Builder setHexEncodeTable(final boolean useHex) {
103            return setEncodeTable(encodeTable(useHex));
104        }
105    }
106
107    /**
108     * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32
109     * characters.
110     */
111    private static final int BITS_PER_ENCODED_BYTE = 5;
112
113    private static final int BYTES_PER_ENCODED_BLOCK = 8;
114    private static final int BYTES_PER_UNENCODED_BLOCK = 5;
115    /**
116     * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit
117     * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1.
118     */
119    // @formatter:off
120    private static final byte[] DECODE_TABLE = {
121         //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
122            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
123            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
124            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
125            -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
126            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
127            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
128                                                        -1, -1, -1, -1, -1, // 5b-5f
129            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
130            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
131    };
132    // @formatter:on
133
134    /**
135     * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in Table 3 of RFC
136     * 4648.
137     */
138    // @formatter:off
139    private static final byte[] ENCODE_TABLE = {
140            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
141            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
142            '2', '3', '4', '5', '6', '7',
143    };
144    // @formatter:on
145
146    /**
147     * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their
148     * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
149     */
150    // @formatter:off
151    private static final byte[] HEX_DECODE_TABLE = {
152         //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
153            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
154            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
155            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
156             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
157            -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
158            25, 26, 27, 28, 29, 30, 31,                                     // 50-56 P-V
159                                        -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
160            -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
161            25, 26, 27, 28, 29, 30, 31                                      // 70-76 p-v
162    };
163    // @formatter:on
164
165    /**
166     * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" equivalents as specified in Table 4 of
167     * RFC 4648.
168     */
169    // @formatter:off
170    private static final byte[] HEX_ENCODE_TABLE = {
171            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
172            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
173            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
174    };
175    // @formatter:on
176
177    /** Mask used to extract 5 bits, used when encoding Base32 bytes */
178    private static final int MASK_5BITS = 0x1f;
179
180    /** Mask used to extract 4 bits, used when decoding final trailing character. */
181    private static final long MASK_4BITS = 0x0fL;
182
183    /** Mask used to extract 3 bits, used when decoding final trailing character. */
184    private static final long MASK_3BITS = 0x07L;
185
186    /** Mask used to extract 2 bits, used when decoding final trailing character. */
187    private static final long MASK_2BITS = 0x03L;
188
189    /** Mask used to extract 1 bits, used when decoding final trailing character. */
190    private static final long MASK_1BITS = 0x01L;
191
192    // The static final fields above are used for the original static byte[] methods on Base32.
193    // The private member fields below are used with the new streaming approach, which requires
194    // some state be preserved between calls of encode() and decode().
195
196    /**
197     * Creates a new Builder.
198     *
199     * @return a new Builder.
200     * @since 1.17.0
201     */
202    public static Builder builder() {
203        return new Builder();
204    }
205
206    private static byte[] decodeTable(final boolean useHex) {
207        return useHex ? HEX_DECODE_TABLE : DECODE_TABLE;
208    }
209
210    private static byte[] encodeTable(final boolean useHex) {
211        return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE;
212    }
213
214    /**
215     * Decode table to use.
216     */
217    private final byte[] decodeTable;
218
219    /**
220     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link
221     * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;}
222     */
223    private final int encodeSize;
224
225    /**
226     * Encode table to use.
227     */
228    private final byte[] encodeTable;
229
230    /**
231     * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
232     */
233    private final byte[] lineSeparator;
234
235    /**
236     * Constructs a Base32 codec used for decoding and encoding.
237     * <p>
238     * When encoding the line length is 0 (no chunking).
239     * </p>
240     */
241    public Base32() {
242        this(false);
243    }
244
245    /**
246     * Constructs a Base32 codec used for decoding and encoding.
247     * <p>
248     * When encoding the line length is 0 (no chunking).
249     * </p>
250     *
251     * @param useHex if {@code true} then use Base32 Hex alphabet
252     */
253    public Base32(final boolean useHex) {
254        this(0, null, useHex, PAD_DEFAULT);
255    }
256
257    /**
258     * Constructs a Base32 codec used for decoding and encoding.
259     * <p>
260     * When encoding the line length is 0 (no chunking).
261     * </p>
262     *
263     * @param useHex  if {@code true} then use Base32 Hex alphabet
264     * @param padding byte used as padding byte.
265     */
266    public Base32(final boolean useHex, final byte padding) {
267        this(0, null, useHex, padding);
268    }
269
270    /**
271     * Constructs a Base32 codec used for decoding and encoding.
272     * <p>
273     * When encoding the line length is 0 (no chunking).
274     * </p>
275     *
276     * @param pad byte used as padding byte.
277     */
278    public Base32(final byte pad) {
279        this(false, pad);
280    }
281
282    /**
283     * Constructs a Base32 codec used for decoding and encoding.
284     * <p>
285     * When encoding the line length is given in the constructor, the line separator is CRLF.
286     * </p>
287     *
288     * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0, then
289     *                   the output will not be divided into lines (chunks). Ignored when decoding.
290     */
291    public Base32(final int lineLength) {
292        this(lineLength, CHUNK_SEPARATOR);
293    }
294
295    /**
296     * Constructs a Base32 codec used for decoding and encoding.
297     * <p>
298     * When encoding the line length and line separator are given in the constructor.
299     * </p>
300     * <p>
301     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
302     * </p>
303     *
304     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
305     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
306     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
307     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters.
308     */
309    public Base32(final int lineLength, final byte[] lineSeparator) {
310        this(lineLength, lineSeparator, false, PAD_DEFAULT);
311    }
312
313    /**
314     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
315     * <p>
316     * When encoding the line length and line separator are given in the constructor.
317     * </p>
318     * <p>
319     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
320     * </p>
321     *
322     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
323     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
324     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
325     * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
326     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
327     */
328    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
329        this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
330    }
331
332    /**
333     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
334     * <p>
335     * When encoding the line length and line separator are given in the constructor.
336     * </p>
337     * <p>
338     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
339     * </p>
340     *
341     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
342     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
343     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
344     * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
345     * @param padding       padding byte.
346     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
347     */
348    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) {
349        this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT);
350    }
351
352    /**
353     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
354     * <p>
355     * When encoding the line length and line separator are given in the constructor.
356     * </p>
357     * <p>
358     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
359     * </p>
360     *
361     * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
362     *                       then the output will not be divided into lines (chunks). Ignored when decoding.
363     * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
364     * @param useHex         use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
365     * @param padding        padding byte.
366     * @param decodingPolicy The decoding policy.
367     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
368     * @since 1.15
369     */
370    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) {
371        this(lineLength, lineSeparator, encodeTable(useHex), padding, decodingPolicy);
372    }
373
374    /**
375     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
376     * <p>
377     * When encoding the line length and line separator are given in the constructor.
378     * </p>
379     * <p>
380     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
381     * </p>
382     *
383     * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
384     *                       then the output will not be divided into lines (chunks). Ignored when decoding.
385     * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
386     * @param encodeTable    A Base32 alphabet.
387     * @param padding        padding byte.
388     * @param decodingPolicy The decoding policy.
389     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
390     */
391    private Base32(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable, final byte padding, final CodecPolicy decodingPolicy) {
392        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy);
393        Objects.requireNonNull(encodeTable, "encodeTable");
394        this.encodeTable = encodeTable;
395        this.decodeTable = encodeTable == HEX_ENCODE_TABLE ? HEX_DECODE_TABLE : DECODE_TABLE;
396        if (lineLength > 0) {
397            if (lineSeparator == null) {
398                throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null");
399            }
400            final byte[] lineSeparatorCopy = lineSeparator.clone();
401            // Must be done after initializing the tables
402            if (containsAlphabetOrPad(lineSeparatorCopy)) {
403                final String sep = StringUtils.newStringUtf8(lineSeparatorCopy);
404                throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
405            }
406            this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length;
407            this.lineSeparator = lineSeparatorCopy;
408        } else {
409            this.encodeSize = BYTES_PER_ENCODED_BLOCK;
410            this.lineSeparator = null;
411        }
412        if (isInAlphabet(padding) || Character.isWhitespace(padding)) {
413            throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
414        }
415    }
416
417    /**
418     * <p>
419     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with
420     * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either.
421     * </p>
422     * <p>
423     * Ignores all non-Base32 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has implications
424     * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity.
425     * </p>
426     * <p>
427     * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using
428     * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position
429     * </p>
430     *
431     * @param input   byte[] array of ASCII data to Base32 decode.
432     * @param inPos   Position to start reading data from.
433     * @param inAvail Amount of bytes available from input for decoding.
434     * @param context the context to be used
435     */
436    @Override
437    void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
438        // package protected for access from I/O streams
439        if (context.eof) {
440            return;
441        }
442        if (inAvail < 0) {
443            context.eof = true;
444        }
445        final int decodeSize = this.encodeSize - 1;
446        for (int i = 0; i < inAvail; i++) {
447            final byte b = input[inPos++];
448            if (b == pad) {
449                // We're done.
450                context.eof = true;
451                break;
452            }
453            final byte[] buffer = ensureBufferSize(decodeSize, context);
454            if (b >= 0 && b < this.decodeTable.length) {
455                final int result = this.decodeTable[b];
456                if (result >= 0) {
457                    context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
458                    // collect decoded bytes
459                    context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
460                    if (context.modulus == 0) { // we can output the 5 bytes
461                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS);
462                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
463                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
464                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
465                        buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
466                    }
467                }
468            }
469        }
470        // Two forms of EOF as far as Base32 decoder is concerned: actual
471        // EOF (-1) and first time '=' character is encountered in stream.
472        // This approach makes the '=' padding characters completely optional.
473        if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
474            final byte[] buffer = ensureBufferSize(decodeSize, context);
475            // We ignore partial bytes, i.e. only multiples of 8 count.
476            // Any combination not part of a valid encoding is either partially decoded
477            // or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
478            // It is not possible to encode with 1, 3, 6 trailing characters.
479            // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
480            // See the encode(byte[]) method EOF section.
481            switch (context.modulus) {
482//              case 0 : // impossible, as excluded above
483            case 1: // 5 bits - either ignore entirely, or raise an exception
484                validateTrailingCharacters();
485            case 2: // 10 bits, drop 2 and output one byte
486                validateCharacter(MASK_2BITS, context);
487                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS);
488                break;
489            case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception
490                validateTrailingCharacters();
491                // Not possible from a valid encoding but decode anyway
492                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS);
493                break;
494            case 4: // 20 bits = 2*8 + 4
495                validateCharacter(MASK_4BITS, context);
496                context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
497                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
498                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
499                break;
500            case 5: // 25 bits = 3*8 + 1
501                validateCharacter(MASK_1BITS, context);
502                context.lbitWorkArea = context.lbitWorkArea >> 1;
503                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
504                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
505                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
506                break;
507            case 6: // 30 bits = 3*8 + 6, or raise an exception
508                validateTrailingCharacters();
509                // Not possible from a valid encoding but decode anyway
510                context.lbitWorkArea = context.lbitWorkArea >> 6;
511                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
512                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
513                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
514                break;
515            case 7: // 35 bits = 4*8 +3
516                validateCharacter(MASK_3BITS, context);
517                context.lbitWorkArea = context.lbitWorkArea >> 3;
518                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
519                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
520                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
521                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
522                break;
523            default:
524                // modulus can be 0-7, and we excluded 0,1 already
525                throw new IllegalStateException("Impossible modulus " + context.modulus);
526            }
527        }
528    }
529
530    /**
531     * <p>
532     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with
533     * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5).
534     * </p>
535     *
536     * @param input   byte[] array of binary data to Base32 encode.
537     * @param inPos   Position to start reading data from.
538     * @param inAvail Amount of bytes available from input for encoding.
539     * @param context the context to be used
540     */
541    @Override
542    void encode(final byte[] input, int inPos, final int inAvail, final Context context) {
543        // package protected for access from I/O streams
544        if (context.eof) {
545            return;
546        }
547        // inAvail < 0 is how we're informed of EOF in the underlying data we're
548        // encoding.
549        if (inAvail < 0) {
550            context.eof = true;
551            if (0 == context.modulus && lineLength == 0) {
552                return; // no leftovers to process and not using chunking
553            }
554            final byte[] buffer = ensureBufferSize(encodeSize, context);
555            final int savedPos = context.pos;
556            switch (context.modulus) { // % 5
557            case 0:
558                break;
559            case 1: // Only 1 octet; take top 5 bits then remainder
560                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3
561                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2
562                buffer[context.pos++] = pad;
563                buffer[context.pos++] = pad;
564                buffer[context.pos++] = pad;
565                buffer[context.pos++] = pad;
566                buffer[context.pos++] = pad;
567                buffer[context.pos++] = pad;
568                break;
569            case 2: // 2 octets = 16 bits to use
570                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11
571                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6
572                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1
573                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4
574                buffer[context.pos++] = pad;
575                buffer[context.pos++] = pad;
576                buffer[context.pos++] = pad;
577                buffer[context.pos++] = pad;
578                break;
579            case 3: // 3 octets = 24 bits to use
580                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19
581                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14
582                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9
583                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4
584                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1
585                buffer[context.pos++] = pad;
586                buffer[context.pos++] = pad;
587                buffer[context.pos++] = pad;
588                break;
589            case 4: // 4 octets = 32 bits to use
590                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27
591                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22
592                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17
593                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12
594                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7
595                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2
596                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3
597                buffer[context.pos++] = pad;
598                break;
599            default:
600                throw new IllegalStateException("Impossible modulus " + context.modulus);
601            }
602            context.currentLinePos += context.pos - savedPos; // keep track of current line position
603            // if currentPos == 0 we are at the start of a line, so don't add CRLF
604            if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required
605                System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
606                context.pos += lineSeparator.length;
607            }
608        } else {
609            for (int i = 0; i < inAvail; i++) {
610                final byte[] buffer = ensureBufferSize(encodeSize, context);
611                context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
612                int b = input[inPos++];
613                if (b < 0) {
614                    b += 256;
615                }
616                context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
617                if (0 == context.modulus) { // we have enough bytes to create our output
618                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5BITS];
619                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5BITS];
620                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5BITS];
621                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5BITS];
622                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5BITS];
623                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5BITS];
624                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5BITS];
625                    buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5BITS];
626                    context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
627                    if (lineLength > 0 && lineLength <= context.currentLinePos) {
628                        System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
629                        context.pos += lineSeparator.length;
630                        context.currentLinePos = 0;
631                    }
632                }
633            }
634        }
635    }
636
637    /**
638     * Gets the line separator (for testing only).
639     *
640     * @return the line separator.
641     */
642    byte[] getLineSeparator() {
643        return lineSeparator;
644    }
645
646    /**
647     * Returns whether or not the {@code octet} is in the Base32 alphabet.
648     *
649     * @param octet The value to test
650     * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise.
651     */
652    @Override
653    public boolean isInAlphabet(final byte octet) {
654        return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
655    }
656
657    /**
658     * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values.
659     * <p>
660     * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits
661     * that will be discarded.
662     * </p>
663     *
664     * @param emptyBitsMask The mask of the lower bits that should be empty
665     * @param context       the context to be used
666     * @throws IllegalArgumentException if the bits being checked contain any non-zero value
667     */
668    private void validateCharacter(final long emptyBitsMask, final Context context) {
669        // Use the long bit work area
670        if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
671            throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
672                    "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero.");
673        }
674    }
675
676    /**
677     * Validates whether decoding allows final trailing characters that cannot be created during encoding.
678     *
679     * @throws IllegalArgumentException if strict decoding is enabled
680     */
681    private void validateTrailingCharacters() {
682        if (isStrictDecoding()) {
683            throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " +
684                    "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
685        }
686    }
687}