001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Arrays; 021import java.util.Objects; 022import java.util.function.Supplier; 023 024import org.apache.commons.codec.BinaryDecoder; 025import org.apache.commons.codec.BinaryEncoder; 026import org.apache.commons.codec.CodecPolicy; 027import org.apache.commons.codec.DecoderException; 028import org.apache.commons.codec.EncoderException; 029 030/** 031 * Abstract superclass for Base-N encoders and decoders. 032 * 033 * <p> 034 * This class is thread-safe. 035 * </p> 036 * <p> 037 * You can set the decoding behavior when the input bytes contain leftover trailing bits that cannot be created by a 038 * valid encoding. These can be bits that are unused from the final character or entire characters. The default mode is 039 * lenient decoding. 040 * </p> 041 * <ul> 042 * <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible. The remainder are discarded. 043 * <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits are not part of a valid 044 * encoding. Any unused bits from the final character must be zero. Impossible counts of entire final characters are not 045 * allowed. 046 * </ul> 047 * <p> 048 * When strict decoding is enabled it is expected that the decoded bytes will be re-encoded to a byte array that matches 049 * the original, i.e. no changes occur on the final character. This requires that the input bytes use the same padding 050 * and alphabet as the encoder. 051 * </p> 052 */ 053public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 054 055 /** 056 * Builds {@link Base64} instances. 057 * 058 * @param <T> the codec type to build. 059 * @param <B> the codec builder subtype. 060 * @since 1.17.0 061 */ 062 public abstract static class AbstractBuilder<T, B extends AbstractBuilder<T, B>> implements Supplier<T> { 063 064 private CodecPolicy decodingPolicy = DECODING_POLICY_DEFAULT; 065 private int lineLength; 066 private byte[] lineSeparator = CHUNK_SEPARATOR; 067 private final byte[] defaultEncodeTable; 068 private byte[] encodeTable; 069 /** Padding byte. */ 070 private byte padding = PAD_DEFAULT; 071 072 AbstractBuilder(final byte[] defaultEncodeTable) { 073 this.defaultEncodeTable = defaultEncodeTable; 074 this.encodeTable = defaultEncodeTable; 075 } 076 077 /** 078 * Returns this instance typed as the subclass type {@code B}. 079 * <p> 080 * This is the same as the expression: 081 * </p> 082 * <pre> 083 * (B) this 084 * </pre> 085 * 086 * @return this instance typed as the subclass type {@code B}. 087 */ 088 @SuppressWarnings("unchecked") 089 B asThis() { 090 return (B) this; 091 } 092 093 CodecPolicy getDecodingPolicy() { 094 return decodingPolicy; 095 } 096 097 byte[] getEncodeTable() { 098 return encodeTable; 099 } 100 101 int getLineLength() { 102 return lineLength; 103 } 104 105 byte[] getLineSeparator() { 106 return lineSeparator; 107 } 108 109 byte getPadding() { 110 return padding; 111 } 112 113 /** 114 * Sets the decoding policy. 115 * 116 * @param decodingPolicy the decoding policy, null resets to the default. 117 * @return {@code this} instance. 118 */ 119 public B setDecodingPolicy(final CodecPolicy decodingPolicy) { 120 this.decodingPolicy = decodingPolicy != null ? decodingPolicy : DECODING_POLICY_DEFAULT; 121 return asThis(); 122 } 123 124 /** 125 * Sets the encode table. 126 * 127 * @param encodeTable the encode table, null resets to the default. 128 * @return {@code this} instance. 129 */ 130 public B setEncodeTable(final byte... encodeTable) { 131 this.encodeTable = encodeTable != null ? encodeTable : defaultEncodeTable; 132 return asThis(); 133 } 134 135 /** 136 * Sets the line length. 137 * 138 * @param lineLength the line length, less than 0 resets to the default. 139 * @return {@code this} instance. 140 */ 141 public B setLineLength(final int lineLength) { 142 this.lineLength = Math.max(0, lineLength); 143 return asThis(); 144 } 145 146 /** 147 * Sets the line separator. 148 * 149 * @param lineSeparator the line separator, null resets to the default. 150 * @return {@code this} instance. 151 */ 152 public B setLineSeparator(final byte... lineSeparator) { 153 this.lineSeparator = lineSeparator != null ? lineSeparator : CHUNK_SEPARATOR; 154 return asThis(); 155 } 156 157 /** 158 * Sets the padding byte. 159 * 160 * @param padding the padding byte. 161 * @return {@code this} instance. 162 */ 163 public B setPadding(final byte padding) { 164 this.padding = padding; 165 return asThis(); 166 } 167 168 } 169 170 /** 171 * Holds thread context so classes can be thread-safe. 172 * 173 * This class is not itself thread-safe; each thread must allocate its own copy. 174 */ 175 static class Context { 176 177 /** 178 * Placeholder for the bytes we're dealing with for our based logic. 179 * Bitwise operations store and extract the encoding or decoding from this variable. 180 */ 181 int ibitWorkArea; 182 183 /** 184 * Placeholder for the bytes we're dealing with for our based logic. 185 * Bitwise operations store and extract the encoding or decoding from this variable. 186 */ 187 long lbitWorkArea; 188 189 /** 190 * Buffer for streaming. 191 */ 192 byte[] buffer; 193 194 /** 195 * Position where next character should be written in the buffer. 196 */ 197 int pos; 198 199 /** 200 * Position where next character should be read from the buffer. 201 */ 202 int readPos; 203 204 /** 205 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 206 * and must be thrown away. 207 */ 208 boolean eof; 209 210 /** 211 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use 212 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). 213 */ 214 int currentLinePos; 215 216 /** 217 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This 218 * variable helps track that. 219 */ 220 int modulus; 221 222 /** 223 * Returns a String useful for debugging (especially within a debugger.) 224 * 225 * @return a String useful for debugging. 226 */ 227 @Override 228 public String toString() { 229 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + 230 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), 231 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); 232 } 233 } 234 235 /** 236 * EOF 237 * 238 * @since 1.7 239 */ 240 static final int EOF = -1; 241 242 /** 243 * MIME chunk size per RFC 2045 section 6.8. 244 * 245 * <p> 246 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 247 * equal signs. 248 * </p> 249 * 250 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 251 */ 252 public static final int MIME_CHUNK_SIZE = 76; 253 254 /** 255 * PEM chunk size per RFC 1421 section 4.3.2.4. 256 * 257 * <p> 258 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 259 * equal signs. 260 * </p> 261 * 262 * @see <a href="https://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 263 */ 264 public static final int PEM_CHUNK_SIZE = 64; 265 266 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 267 268 /** 269 * Defines the default buffer size - currently {@value} 270 * - must be large enough for at least one encoded block+separator 271 */ 272 private static final int DEFAULT_BUFFER_SIZE = 8192; 273 274 /** 275 * The maximum size buffer to allocate. 276 * 277 * <p>This is set to the same size used in the JDK {@link java.util.ArrayList}:</p> 278 * <blockquote> 279 * Some VMs reserve some header words in an array. 280 * Attempts to allocate larger arrays may result in 281 * OutOfMemoryError: Requested array size exceeds VM limit. 282 * </blockquote> 283 */ 284 private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; 285 286 /** Mask used to extract 8 bits, used in decoding bytes */ 287 protected static final int MASK_8BITS = 0xff; 288 289 /** 290 * Byte used to pad output. 291 */ 292 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 293 294 /** 295 * The default decoding policy. 296 * @since 1.15 297 */ 298 protected static final CodecPolicy DECODING_POLICY_DEFAULT = CodecPolicy.LENIENT; 299 300 /** 301 * Chunk separator per RFC 2045 section 2.1. 302 * 303 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 304 */ 305 static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 306 307 /** 308 * Create a positive capacity at least as large the minimum required capacity. 309 * If the minimum capacity is negative then this throws an OutOfMemoryError as no array 310 * can be allocated. 311 * 312 * @param minCapacity the minimum capacity 313 * @return the capacity 314 * @throws OutOfMemoryError if the {@code minCapacity} is negative 315 */ 316 private static int createPositiveCapacity(final int minCapacity) { 317 if (minCapacity < 0) { 318 // overflow 319 throw new OutOfMemoryError("Unable to allocate array size: " + (minCapacity & 0xffffffffL)); 320 } 321 // This is called when we require buffer expansion to a very big array. 322 // Use the conservative maximum buffer size if possible, otherwise the biggest required. 323 // 324 // Note: In this situation JDK 1.8 java.util.ArrayList returns Integer.MAX_VALUE. 325 // This excludes some VMs that can exceed MAX_BUFFER_SIZE but not allocate a full 326 // Integer.MAX_VALUE length array. 327 // The result is that we may have to allocate an array of this size more than once if 328 // the capacity must be expanded again. 329 return Math.max(minCapacity, MAX_BUFFER_SIZE); 330 } 331 332 /** 333 * Gets a copy of the chunk separator per RFC 2045 section 2.1. 334 * 335 * @return the chunk separator 336 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 337 * @since 1.15 338 */ 339 public static byte[] getChunkSeparator() { 340 return CHUNK_SEPARATOR.clone(); 341 } 342 343 /** 344 * Checks if a byte value is whitespace or not. 345 * @param byteToCheck 346 * the byte to check 347 * @return true if byte is whitespace, false otherwise 348 * @see Character#isWhitespace(int) 349 * @deprecated Use {@link Character#isWhitespace(int)}. 350 */ 351 @Deprecated 352 protected static boolean isWhiteSpace(final byte byteToCheck) { 353 return Character.isWhitespace(byteToCheck); 354 } 355 356 /** 357 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. 358 * @param context the context to be used 359 * @param minCapacity the minimum required capacity 360 * @return the resized byte[] buffer 361 * @throws OutOfMemoryError if the {@code minCapacity} is negative 362 */ 363 private static byte[] resizeBuffer(final Context context, final int minCapacity) { 364 // Overflow-conscious code treats the min and new capacity as unsigned. 365 final int oldCapacity = context.buffer.length; 366 int newCapacity = oldCapacity * DEFAULT_BUFFER_RESIZE_FACTOR; 367 if (Integer.compareUnsigned(newCapacity, minCapacity) < 0) { 368 newCapacity = minCapacity; 369 } 370 if (Integer.compareUnsigned(newCapacity, MAX_BUFFER_SIZE) > 0) { 371 newCapacity = createPositiveCapacity(minCapacity); 372 } 373 final byte[] b = Arrays.copyOf(context.buffer, newCapacity); 374 context.buffer = b; 375 return b; 376 } 377 378 /** 379 * Gets the array length or 0 if null. 380 * 381 * @param array the array or null. 382 * @return the array length or 0 if null. 383 */ 384 static int toLength(final byte[] array) { 385 return array == null ? 0 : array.length; 386 } 387 388 /** 389 * @deprecated Use {@link #pad}. Will be removed in 2.0. 390 */ 391 @Deprecated 392 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later 393 394 /** Pad byte. Instance variable just in case it needs to vary later. */ 395 protected final byte pad; 396 397 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ 398 private final int unencodedBlockSize; 399 400 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ 401 private final int encodedBlockSize; 402 403 /** 404 * Chunksize for encoding. Not used when decoding. 405 * A value of zero or less implies no chunking of the encoded data. 406 * Rounded down to the nearest multiple of encodedBlockSize. 407 */ 408 protected final int lineLength; 409 410 /** 411 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 412 */ 413 private final int chunkSeparatorLength; 414 415 /** 416 * Defines the decoding behavior when the input bytes contain leftover trailing bits that 417 * cannot be created by a valid encoding. These can be bits that are unused from the final 418 * character or entire characters. The default mode is lenient decoding. Set this to 419 * {@code true} to enable strict decoding. 420 * <ul> 421 * <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible. 422 * The remainder are discarded. 423 * <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits 424 * are not part of a valid encoding. Any unused bits from the final character must 425 * be zero. Impossible counts of entire final characters are not allowed. 426 * </ul> 427 * <p> 428 * When strict decoding is enabled it is expected that the decoded bytes will be re-encoded 429 * to a byte array that matches the original, i.e. no changes occur on the final 430 * character. This requires that the input bytes use the same padding and alphabet 431 * as the encoder. 432 * </p> 433 */ 434 private final CodecPolicy decodingPolicy; 435 436 /** 437 * Constructs a new instance. 438 * <p> 439 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 440 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 441 * </p> 442 * 443 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 444 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 445 * @param lineLength if > 0, use chunking with a length {@code lineLength} 446 * @param chunkSeparatorLength the chunk separator length, if relevant 447 */ 448 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength) { 449 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT); 450 } 451 452 /** 453 * Constructs a new instance. 454 * <p> 455 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 456 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 457 * </p> 458 * 459 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 460 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 461 * @param lineLength if > 0, use chunking with a length {@code lineLength} 462 * @param chunkSeparatorLength the chunk separator length, if relevant 463 * @param pad byte used as padding byte. 464 */ 465 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength, final byte pad) { 466 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, pad, DECODING_POLICY_DEFAULT); 467 } 468 469 /** 470 * Constructs a new instance. 471 * <p> 472 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 473 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 474 * </p> 475 * 476 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 477 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 478 * @param lineLength if > 0, use chunking with a length {@code lineLength} 479 * @param chunkSeparatorLength the chunk separator length, if relevant 480 * @param pad byte used as padding byte. 481 * @param decodingPolicy Decoding policy. 482 * @since 1.15 483 */ 484 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength, final byte pad, 485 final CodecPolicy decodingPolicy) { 486 this.unencodedBlockSize = unencodedBlockSize; 487 this.encodedBlockSize = encodedBlockSize; 488 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; 489 this.lineLength = useChunking ? lineLength / encodedBlockSize * encodedBlockSize : 0; 490 this.chunkSeparatorLength = chunkSeparatorLength; 491 this.pad = pad; 492 this.decodingPolicy = Objects.requireNonNull(decodingPolicy, "codecPolicy"); 493 } 494 495 /** 496 * Returns the amount of buffered data available for reading. 497 * 498 * @param context the context to be used 499 * @return The amount of buffered data available for reading. 500 */ 501 int available(final Context context) { // package protected for access from I/O streams 502 return hasData(context) ? context.pos - context.readPos : 0; 503 } 504 505 /** 506 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 507 * 508 * Intended for use in checking line-ending arrays 509 * 510 * @param arrayOctet 511 * byte array to test 512 * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise 513 */ 514 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { 515 if (arrayOctet != null) { 516 for (final byte element : arrayOctet) { 517 if (pad == element || isInAlphabet(element)) { 518 return true; 519 } 520 } 521 } 522 return false; 523 } 524 525 /** 526 * Decodes a byte[] containing characters in the Base-N alphabet. 527 * 528 * @param pArray 529 * A byte array containing Base-N character data 530 * @return a byte array containing binary data 531 */ 532 @Override 533 public byte[] decode(final byte[] pArray) { 534 if (BinaryCodec.isEmpty(pArray)) { 535 return pArray; 536 } 537 final Context context = new Context(); 538 decode(pArray, 0, pArray.length, context); 539 decode(pArray, 0, EOF, context); // Notify decoder of EOF. 540 final byte[] result = new byte[context.pos]; 541 readResults(result, 0, result.length, context); 542 return result; 543 } 544 545 // package protected for access from I/O streams 546 abstract void decode(byte[] pArray, int i, int length, Context context); 547 548 /** 549 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 550 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 551 * 552 * @param obj 553 * Object to decode 554 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String 555 * supplied. 556 * @throws DecoderException 557 * if the parameter supplied is not of type byte[] 558 */ 559 @Override 560 public Object decode(final Object obj) throws DecoderException { 561 if (obj instanceof byte[]) { 562 return decode((byte[]) obj); 563 } 564 if (obj instanceof String) { 565 return decode((String) obj); 566 } 567 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 568 } 569 570 /** 571 * Decodes a String containing characters in the Base-N alphabet. 572 * 573 * @param pArray 574 * A String containing Base-N character data 575 * @return a byte array containing binary data 576 */ 577 public byte[] decode(final String pArray) { 578 return decode(StringUtils.getBytesUtf8(pArray)); 579 } 580 581 /** 582 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 583 * 584 * @param pArray 585 * a byte array containing binary data 586 * @return A byte array containing only the base N alphabetic character data 587 */ 588 @Override 589 public byte[] encode(final byte[] pArray) { 590 if (BinaryCodec.isEmpty(pArray)) { 591 return pArray; 592 } 593 return encode(pArray, 0, pArray.length); 594 } 595 596 /** 597 * Encodes a byte[] containing binary data, into a byte[] containing 598 * characters in the alphabet. 599 * 600 * @param pArray 601 * a byte array containing binary data 602 * @param offset 603 * initial offset of the subarray. 604 * @param length 605 * length of the subarray. 606 * @return A byte array containing only the base N alphabetic character data 607 * @since 1.11 608 */ 609 public byte[] encode(final byte[] pArray, final int offset, final int length) { 610 if (BinaryCodec.isEmpty(pArray)) { 611 return pArray; 612 } 613 final Context context = new Context(); 614 encode(pArray, offset, length, context); 615 encode(pArray, offset, EOF, context); // Notify encoder of EOF. 616 final byte[] buf = new byte[context.pos - context.readPos]; 617 readResults(buf, 0, buf.length, context); 618 return buf; 619 } 620 621 // package protected for access from I/O streams 622 abstract void encode(byte[] pArray, int i, int length, Context context); 623 624 /** 625 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 626 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 627 * 628 * @param obj 629 * Object to encode 630 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 631 * @throws EncoderException 632 * if the parameter supplied is not of type byte[] 633 */ 634 @Override 635 public Object encode(final Object obj) throws EncoderException { 636 if (!(obj instanceof byte[])) { 637 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 638 } 639 return encode((byte[]) obj); 640 } 641 642 /** 643 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 644 * Uses UTF8 encoding. 645 * <p> 646 * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring. 647 * </p> 648 * 649 * @param pArray a byte array containing binary data 650 * @return String containing only character data in the appropriate alphabet. 651 * @since 1.5 652 */ 653 public String encodeAsString(final byte[] pArray) { 654 return StringUtils.newStringUtf8(encode(pArray)); 655 } 656 657 /** 658 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 659 * Uses UTF8 encoding. 660 * 661 * @param pArray 662 * a byte array containing binary data 663 * @return A String containing only Base-N character data 664 */ 665 public String encodeToString(final byte[] pArray) { 666 return StringUtils.newStringUtf8(encode(pArray)); 667 } 668 669 /** 670 * Ensure that the buffer has room for {@code size} bytes 671 * 672 * @param size minimum spare space required 673 * @param context the context to be used 674 * @return the buffer 675 */ 676 protected byte[] ensureBufferSize(final int size, final Context context) { 677 if (context.buffer == null) { 678 context.buffer = new byte[Math.max(size, getDefaultBufferSize())]; 679 context.pos = 0; 680 context.readPos = 0; 681 // Overflow-conscious: 682 // x + y > z == x + y - z > 0 683 } else if (context.pos + size - context.buffer.length > 0) { 684 return resizeBuffer(context, context.pos + size); 685 } 686 return context.buffer; 687 } 688 689 /** 690 * Returns the decoding behavior policy. 691 * 692 * <p> 693 * The default is lenient. If the decoding policy is strict, then decoding will raise an 694 * {@link IllegalArgumentException} if trailing bits are not part of a valid encoding. Decoding will compose 695 * trailing bits into 8-bit bytes and discard the remainder. 696 * </p> 697 * 698 * @return true if using strict decoding 699 * @since 1.15 700 */ 701 public CodecPolicy getCodecPolicy() { 702 return decodingPolicy; 703 } 704 705 /** 706 * Gets the default buffer size. Can be overridden. 707 * 708 * @return the default buffer size. 709 */ 710 protected int getDefaultBufferSize() { 711 return DEFAULT_BUFFER_SIZE; 712 } 713 714 /** 715 * Calculates the amount of space needed to encode the supplied array. 716 * 717 * @param pArray byte[] array which will later be encoded 718 * @return amount of space needed to encode the supplied array. 719 * Returns a long since a max-len array will require > Integer.MAX_VALUE 720 */ 721 public long getEncodedLength(final byte[] pArray) { 722 // Calculate non-chunked size - rounded up to allow for padding 723 // cast to long is needed to avoid possibility of overflow 724 long len = (pArray.length + unencodedBlockSize - 1) / unencodedBlockSize * (long) encodedBlockSize; 725 if (lineLength > 0) { // We're using chunking 726 // Round up to nearest multiple 727 len += (len + lineLength - 1) / lineLength * chunkSeparatorLength; 728 } 729 return len; 730 } 731 732 /** 733 * Returns true if this object has buffered data for reading. 734 * 735 * @param context the context to be used 736 * @return true if there is data still available for reading. 737 */ 738 boolean hasData(final Context context) { // package protected for access from I/O streams 739 return context.pos > context.readPos; 740 } 741 742 /** 743 * Returns whether or not the {@code octet} is in the current alphabet. 744 * Does not allow whitespace or pad. 745 * 746 * @param value The value to test 747 * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise. 748 */ 749 protected abstract boolean isInAlphabet(byte value); 750 751 /** 752 * Tests a given byte array to see if it contains only valid characters within the alphabet. 753 * The method optionally treats whitespace and pad as valid. 754 * 755 * @param arrayOctet byte array to test 756 * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed 757 * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty; 758 * {@code false}, otherwise 759 */ 760 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { 761 for (final byte octet : arrayOctet) { 762 if (!isInAlphabet(octet) && (!allowWSPad || octet != pad && !Character.isWhitespace(octet))) { 763 return false; 764 } 765 } 766 return true; 767 } 768 769 /** 770 * Tests a given String to see if it contains only valid characters within the alphabet. 771 * The method treats whitespace and PAD as valid. 772 * 773 * @param basen String to test 774 * @return {@code true} if all characters in the String are valid characters in the alphabet or if 775 * the String is empty; {@code false}, otherwise 776 * @see #isInAlphabet(byte[], boolean) 777 */ 778 public boolean isInAlphabet(final String basen) { 779 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 780 } 781 782 /** 783 * Returns true if decoding behavior is strict. Decoding will raise an {@link IllegalArgumentException} if trailing 784 * bits are not part of a valid encoding. 785 * 786 * <p> 787 * The default is false for lenient decoding. Decoding will compose trailing bits into 8-bit bytes and discard the 788 * remainder. 789 * </p> 790 * 791 * @return true if using strict decoding 792 * @since 1.15 793 */ 794 public boolean isStrictDecoding() { 795 return decodingPolicy == CodecPolicy.STRICT; 796 } 797 798 /** 799 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 800 * bytes. Returns how many bytes were actually extracted. 801 * <p> 802 * Package private for access from I/O streams. 803 * </p> 804 * 805 * @param b 806 * byte[] array to extract the buffered data into. 807 * @param bPos 808 * position in byte[] array to start extraction at. 809 * @param bAvail 810 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 811 * @param context 812 * the context to be used 813 * @return The number of bytes successfully extracted into the provided byte[] array. 814 */ 815 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { 816 if (hasData(context)) { 817 final int len = Math.min(available(context), bAvail); 818 System.arraycopy(context.buffer, context.readPos, b, bPos, len); 819 context.readPos += len; 820 if (!hasData(context)) { 821 // All data read. 822 // Reset position markers but do not set buffer to null to allow its reuse. 823 // hasData(context) will still return false, and this method will return 0 until 824 // more data is available, or -1 if EOF. 825 context.pos = context.readPos = 0; 826 } 827 return len; 828 } 829 return context.eof ? EOF : 0; 830 } 831}