1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.binary; 19 20 import java.util.Arrays; 21 import java.util.Objects; 22 import java.util.function.Supplier; 23 24 import org.apache.commons.codec.BinaryDecoder; 25 import org.apache.commons.codec.BinaryEncoder; 26 import org.apache.commons.codec.CodecPolicy; 27 import org.apache.commons.codec.DecoderException; 28 import org.apache.commons.codec.EncoderException; 29 30 /** 31 * Abstract superclass for Base-N encoders and decoders. 32 * 33 * <p> 34 * This class is thread-safe. 35 * </p> 36 * <p> 37 * You can set the decoding behavior when the input bytes contain leftover trailing bits that cannot be created by a 38 * valid encoding. These can be bits that are unused from the final character or entire characters. The default mode is 39 * lenient decoding. 40 * </p> 41 * <ul> 42 * <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible. The remainder are discarded. 43 * <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits are not part of a valid 44 * encoding. Any unused bits from the final character must be zero. Impossible counts of entire final characters are not 45 * allowed. 46 * </ul> 47 * <p> 48 * When strict decoding is enabled it is expected that the decoded bytes will be re-encoded to a byte array that matches 49 * the original, i.e. no changes occur on the final character. This requires that the input bytes use the same padding 50 * and alphabet as the encoder. 51 * </p> 52 */ 53 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 54 55 /** 56 * Builds {@link Base64} instances. 57 * 58 * @param <T> the codec type to build. 59 * @param <B> the codec builder subtype. 60 * @since 1.17.0 61 */ 62 public abstract static class AbstractBuilder<T, B extends AbstractBuilder<T, B>> implements Supplier<T> { 63 64 private CodecPolicy decodingPolicy = DECODING_POLICY_DEFAULT; 65 private int lineLength; 66 private byte[] lineSeparator = CHUNK_SEPARATOR; 67 private final byte[] defaultEncodeTable; 68 private byte[] encodeTable; 69 /** Padding byte. */ 70 private byte padding = PAD_DEFAULT; 71 72 AbstractBuilder(final byte[] defaultEncodeTable) { 73 this.defaultEncodeTable = defaultEncodeTable; 74 this.encodeTable = defaultEncodeTable; 75 } 76 77 @SuppressWarnings("unchecked") 78 B asThis() { 79 return (B) this; 80 } 81 82 CodecPolicy getDecodingPolicy() { 83 return decodingPolicy; 84 } 85 86 byte[] getEncodeTable() { 87 return encodeTable; 88 } 89 90 int getLineLength() { 91 return lineLength; 92 } 93 94 byte[] getLineSeparator() { 95 return lineSeparator; 96 } 97 98 byte getPadding() { 99 return padding; 100 } 101 102 /** 103 * Sets the decoding policy. 104 * 105 * @param decodingPolicy the decoding policy, null resets to the default. 106 * @return {@code this} instance. 107 */ 108 public B setDecodingPolicy(final CodecPolicy decodingPolicy) { 109 this.decodingPolicy = decodingPolicy != null ? decodingPolicy : DECODING_POLICY_DEFAULT; 110 return asThis(); 111 } 112 113 /** 114 * Sets the encode table. 115 * 116 * @param encodeTable the encode table, null resets to the default. 117 * @return {@code this} instance. 118 */ 119 public B setEncodeTable(final byte... encodeTable) { 120 this.encodeTable = encodeTable != null ? encodeTable : defaultEncodeTable; 121 return asThis(); 122 } 123 124 /** 125 * Sets the line length. 126 * 127 * @param lineLength the line length, less than 0 resets to the default. 128 * @return {@code this} instance. 129 */ 130 public B setLineLength(final int lineLength) { 131 this.lineLength = Math.max(0, lineLength); 132 return asThis(); 133 } 134 135 /** 136 * Sets the line separator. 137 * 138 * @param lineSeparator the line separator, null resets to the default. 139 * @return {@code this} instance. 140 */ 141 public B setLineSeparator(final byte... lineSeparator) { 142 this.lineSeparator = lineSeparator != null ? lineSeparator : CHUNK_SEPARATOR; 143 return asThis(); 144 } 145 146 /** 147 * Sets the padding byte. 148 * 149 * @param padding the padding byte. 150 * @return {@code this} instance. 151 */ 152 public B setPadding(final byte padding) { 153 this.padding = padding; 154 return asThis(); 155 } 156 157 } 158 159 /** 160 * Holds thread context so classes can be thread-safe. 161 * 162 * This class is not itself thread-safe; each thread must allocate its own copy. 163 */ 164 static class Context { 165 166 /** 167 * Placeholder for the bytes we're dealing with for our based logic. 168 * Bitwise operations store and extract the encoding or decoding from this variable. 169 */ 170 int ibitWorkArea; 171 172 /** 173 * Placeholder for the bytes we're dealing with for our based logic. 174 * Bitwise operations store and extract the encoding or decoding from this variable. 175 */ 176 long lbitWorkArea; 177 178 /** 179 * Buffer for streaming. 180 */ 181 byte[] buffer; 182 183 /** 184 * Position where next character should be written in the buffer. 185 */ 186 int pos; 187 188 /** 189 * Position where next character should be read from the buffer. 190 */ 191 int readPos; 192 193 /** 194 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 195 * and must be thrown away. 196 */ 197 boolean eof; 198 199 /** 200 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use 201 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). 202 */ 203 int currentLinePos; 204 205 /** 206 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This 207 * variable helps track that. 208 */ 209 int modulus; 210 211 /** 212 * Returns a String useful for debugging (especially within a debugger.) 213 * 214 * @return a String useful for debugging. 215 */ 216 @Override 217 public String toString() { 218 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + 219 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), 220 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); 221 } 222 } 223 224 /** 225 * EOF 226 * 227 * @since 1.7 228 */ 229 static final int EOF = -1; 230 231 /** 232 * MIME chunk size per RFC 2045 section 6.8. 233 * 234 * <p> 235 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 236 * equal signs. 237 * </p> 238 * 239 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 240 */ 241 public static final int MIME_CHUNK_SIZE = 76; 242 243 /** 244 * PEM chunk size per RFC 1421 section 4.3.2.4. 245 * 246 * <p> 247 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 248 * equal signs. 249 * </p> 250 * 251 * @see <a href="https://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 252 */ 253 public static final int PEM_CHUNK_SIZE = 64; 254 255 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 256 257 /** 258 * Defines the default buffer size - currently {@value} 259 * - must be large enough for at least one encoded block+separator 260 */ 261 private static final int DEFAULT_BUFFER_SIZE = 8192; 262 263 /** 264 * The maximum size buffer to allocate. 265 * 266 * <p>This is set to the same size used in the JDK {@link java.util.ArrayList}:</p> 267 * <blockquote> 268 * Some VMs reserve some header words in an array. 269 * Attempts to allocate larger arrays may result in 270 * OutOfMemoryError: Requested array size exceeds VM limit. 271 * </blockquote> 272 */ 273 private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; 274 275 /** Mask used to extract 8 bits, used in decoding bytes */ 276 protected static final int MASK_8BITS = 0xff; 277 278 /** 279 * Byte used to pad output. 280 */ 281 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 282 283 /** 284 * The default decoding policy. 285 * @since 1.15 286 */ 287 protected static final CodecPolicy DECODING_POLICY_DEFAULT = CodecPolicy.LENIENT; 288 289 /** 290 * Chunk separator per RFC 2045 section 2.1. 291 * 292 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 293 */ 294 static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 295 296 /** 297 * Create a positive capacity at least as large the minimum required capacity. 298 * If the minimum capacity is negative then this throws an OutOfMemoryError as no array 299 * can be allocated. 300 * 301 * @param minCapacity the minimum capacity 302 * @return the capacity 303 * @throws OutOfMemoryError if the {@code minCapacity} is negative 304 */ 305 private static int createPositiveCapacity(final int minCapacity) { 306 if (minCapacity < 0) { 307 // overflow 308 throw new OutOfMemoryError("Unable to allocate array size: " + (minCapacity & 0xffffffffL)); 309 } 310 // This is called when we require buffer expansion to a very big array. 311 // Use the conservative maximum buffer size if possible, otherwise the biggest required. 312 // 313 // Note: In this situation JDK 1.8 java.util.ArrayList returns Integer.MAX_VALUE. 314 // This excludes some VMs that can exceed MAX_BUFFER_SIZE but not allocate a full 315 // Integer.MAX_VALUE length array. 316 // The result is that we may have to allocate an array of this size more than once if 317 // the capacity must be expanded again. 318 return Math.max(minCapacity, MAX_BUFFER_SIZE); 319 } 320 321 /** 322 * Gets a copy of the chunk separator per RFC 2045 section 2.1. 323 * 324 * @return the chunk separator 325 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 326 * @since 1.15 327 */ 328 public static byte[] getChunkSeparator() { 329 return CHUNK_SEPARATOR.clone(); 330 } 331 332 /** 333 * Checks if a byte value is whitespace or not. 334 * @param byteToCheck 335 * the byte to check 336 * @return true if byte is whitespace, false otherwise 337 * @see Character#isWhitespace(int) 338 * @deprecated Use {@link Character#isWhitespace(int)}. 339 */ 340 @Deprecated 341 protected static boolean isWhiteSpace(final byte byteToCheck) { 342 return Character.isWhitespace(byteToCheck); 343 } 344 345 /** 346 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. 347 * @param context the context to be used 348 * @param minCapacity the minimum required capacity 349 * @return the resized byte[] buffer 350 * @throws OutOfMemoryError if the {@code minCapacity} is negative 351 */ 352 private static byte[] resizeBuffer(final Context context, final int minCapacity) { 353 // Overflow-conscious code treats the min and new capacity as unsigned. 354 final int oldCapacity = context.buffer.length; 355 int newCapacity = oldCapacity * DEFAULT_BUFFER_RESIZE_FACTOR; 356 if (Integer.compareUnsigned(newCapacity, minCapacity) < 0) { 357 newCapacity = minCapacity; 358 } 359 if (Integer.compareUnsigned(newCapacity, MAX_BUFFER_SIZE) > 0) { 360 newCapacity = createPositiveCapacity(minCapacity); 361 } 362 final byte[] b = Arrays.copyOf(context.buffer, newCapacity); 363 context.buffer = b; 364 return b; 365 } 366 367 /** 368 * Gets the array length or 0 if null. 369 * 370 * @param array the array or null. 371 * @return the array length or 0 if null. 372 */ 373 static int toLength(final byte[] array) { 374 return array == null ? 0 : array.length; 375 } 376 377 /** 378 * @deprecated Use {@link #pad}. Will be removed in 2.0. 379 */ 380 @Deprecated 381 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later 382 383 /** Pad byte. Instance variable just in case it needs to vary later. */ 384 protected final byte pad; 385 386 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ 387 private final int unencodedBlockSize; 388 389 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ 390 private final int encodedBlockSize; 391 392 /** 393 * Chunksize for encoding. Not used when decoding. 394 * A value of zero or less implies no chunking of the encoded data. 395 * Rounded down to the nearest multiple of encodedBlockSize. 396 */ 397 protected final int lineLength; 398 399 /** 400 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 401 */ 402 private final int chunkSeparatorLength; 403 404 /** 405 * Defines the decoding behavior when the input bytes contain leftover trailing bits that 406 * cannot be created by a valid encoding. These can be bits that are unused from the final 407 * character or entire characters. The default mode is lenient decoding. Set this to 408 * {@code true} to enable strict decoding. 409 * <ul> 410 * <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible. 411 * The remainder are discarded. 412 * <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits 413 * are not part of a valid encoding. Any unused bits from the final character must 414 * be zero. Impossible counts of entire final characters are not allowed. 415 * </ul> 416 * <p> 417 * When strict decoding is enabled it is expected that the decoded bytes will be re-encoded 418 * to a byte array that matches the original, i.e. no changes occur on the final 419 * character. This requires that the input bytes use the same padding and alphabet 420 * as the encoder. 421 * </p> 422 */ 423 private final CodecPolicy decodingPolicy; 424 425 /** 426 * Constructs a new instance. 427 * <p> 428 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 429 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 430 * </p> 431 * 432 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 433 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 434 * @param lineLength if > 0, use chunking with a length {@code lineLength} 435 * @param chunkSeparatorLength the chunk separator length, if relevant 436 */ 437 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength) { 438 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT); 439 } 440 441 /** 442 * Constructs a new instance. 443 * <p> 444 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 445 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 446 * </p> 447 * 448 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 449 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 450 * @param lineLength if > 0, use chunking with a length {@code lineLength} 451 * @param chunkSeparatorLength the chunk separator length, if relevant 452 * @param pad byte used as padding byte. 453 */ 454 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength, final byte pad) { 455 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, pad, DECODING_POLICY_DEFAULT); 456 } 457 458 /** 459 * Constructs a new instance. 460 * <p> 461 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 462 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 463 * </p> 464 * 465 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 466 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 467 * @param lineLength if > 0, use chunking with a length {@code lineLength} 468 * @param chunkSeparatorLength the chunk separator length, if relevant 469 * @param pad byte used as padding byte. 470 * @param decodingPolicy Decoding policy. 471 * @since 1.15 472 */ 473 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength, final byte pad, 474 final CodecPolicy decodingPolicy) { 475 this.unencodedBlockSize = unencodedBlockSize; 476 this.encodedBlockSize = encodedBlockSize; 477 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; 478 this.lineLength = useChunking ? lineLength / encodedBlockSize * encodedBlockSize : 0; 479 this.chunkSeparatorLength = chunkSeparatorLength; 480 this.pad = pad; 481 this.decodingPolicy = Objects.requireNonNull(decodingPolicy, "codecPolicy"); 482 } 483 484 /** 485 * Returns the amount of buffered data available for reading. 486 * 487 * @param context the context to be used 488 * @return The amount of buffered data available for reading. 489 */ 490 int available(final Context context) { // package protected for access from I/O streams 491 return hasData(context) ? context.pos - context.readPos : 0; 492 } 493 494 /** 495 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 496 * 497 * Intended for use in checking line-ending arrays 498 * 499 * @param arrayOctet 500 * byte array to test 501 * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise 502 */ 503 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { 504 if (arrayOctet != null) { 505 for (final byte element : arrayOctet) { 506 if (pad == element || isInAlphabet(element)) { 507 return true; 508 } 509 } 510 } 511 return false; 512 } 513 514 /** 515 * Decodes a byte[] containing characters in the Base-N alphabet. 516 * 517 * @param pArray 518 * A byte array containing Base-N character data 519 * @return a byte array containing binary data 520 */ 521 @Override 522 public byte[] decode(final byte[] pArray) { 523 if (BinaryCodec.isEmpty(pArray)) { 524 return pArray; 525 } 526 final Context context = new Context(); 527 decode(pArray, 0, pArray.length, context); 528 decode(pArray, 0, EOF, context); // Notify decoder of EOF. 529 final byte[] result = new byte[context.pos]; 530 readResults(result, 0, result.length, context); 531 return result; 532 } 533 534 // package protected for access from I/O streams 535 abstract void decode(byte[] pArray, int i, int length, Context context); 536 537 /** 538 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 539 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 540 * 541 * @param obj 542 * Object to decode 543 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String 544 * supplied. 545 * @throws DecoderException 546 * if the parameter supplied is not of type byte[] 547 */ 548 @Override 549 public Object decode(final Object obj) throws DecoderException { 550 if (obj instanceof byte[]) { 551 return decode((byte[]) obj); 552 } 553 if (obj instanceof String) { 554 return decode((String) obj); 555 } 556 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 557 } 558 559 /** 560 * Decodes a String containing characters in the Base-N alphabet. 561 * 562 * @param pArray 563 * A String containing Base-N character data 564 * @return a byte array containing binary data 565 */ 566 public byte[] decode(final String pArray) { 567 return decode(StringUtils.getBytesUtf8(pArray)); 568 } 569 570 /** 571 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 572 * 573 * @param pArray 574 * a byte array containing binary data 575 * @return A byte array containing only the base N alphabetic character data 576 */ 577 @Override 578 public byte[] encode(final byte[] pArray) { 579 if (BinaryCodec.isEmpty(pArray)) { 580 return pArray; 581 } 582 return encode(pArray, 0, pArray.length); 583 } 584 585 /** 586 * Encodes a byte[] containing binary data, into a byte[] containing 587 * characters in the alphabet. 588 * 589 * @param pArray 590 * a byte array containing binary data 591 * @param offset 592 * initial offset of the subarray. 593 * @param length 594 * length of the subarray. 595 * @return A byte array containing only the base N alphabetic character data 596 * @since 1.11 597 */ 598 public byte[] encode(final byte[] pArray, final int offset, final int length) { 599 if (BinaryCodec.isEmpty(pArray)) { 600 return pArray; 601 } 602 final Context context = new Context(); 603 encode(pArray, offset, length, context); 604 encode(pArray, offset, EOF, context); // Notify encoder of EOF. 605 final byte[] buf = new byte[context.pos - context.readPos]; 606 readResults(buf, 0, buf.length, context); 607 return buf; 608 } 609 610 // package protected for access from I/O streams 611 abstract void encode(byte[] pArray, int i, int length, Context context); 612 613 /** 614 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 615 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 616 * 617 * @param obj 618 * Object to encode 619 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 620 * @throws EncoderException 621 * if the parameter supplied is not of type byte[] 622 */ 623 @Override 624 public Object encode(final Object obj) throws EncoderException { 625 if (!(obj instanceof byte[])) { 626 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 627 } 628 return encode((byte[]) obj); 629 } 630 631 /** 632 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 633 * Uses UTF8 encoding. 634 * <p> 635 * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring. 636 * </p> 637 * 638 * @param pArray a byte array containing binary data 639 * @return String containing only character data in the appropriate alphabet. 640 * @since 1.5 641 */ 642 public String encodeAsString(final byte[] pArray) { 643 return StringUtils.newStringUtf8(encode(pArray)); 644 } 645 646 /** 647 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 648 * Uses UTF8 encoding. 649 * 650 * @param pArray 651 * a byte array containing binary data 652 * @return A String containing only Base-N character data 653 */ 654 public String encodeToString(final byte[] pArray) { 655 return StringUtils.newStringUtf8(encode(pArray)); 656 } 657 658 /** 659 * Ensure that the buffer has room for {@code size} bytes 660 * 661 * @param size minimum spare space required 662 * @param context the context to be used 663 * @return the buffer 664 */ 665 protected byte[] ensureBufferSize(final int size, final Context context) { 666 if (context.buffer == null) { 667 context.buffer = new byte[Math.max(size, getDefaultBufferSize())]; 668 context.pos = 0; 669 context.readPos = 0; 670 // Overflow-conscious: 671 // x + y > z == x + y - z > 0 672 } else if (context.pos + size - context.buffer.length > 0) { 673 return resizeBuffer(context, context.pos + size); 674 } 675 return context.buffer; 676 } 677 678 /** 679 * Returns the decoding behavior policy. 680 * 681 * <p> 682 * The default is lenient. If the decoding policy is strict, then decoding will raise an 683 * {@link IllegalArgumentException} if trailing bits are not part of a valid encoding. Decoding will compose 684 * trailing bits into 8-bit bytes and discard the remainder. 685 * </p> 686 * 687 * @return true if using strict decoding 688 * @since 1.15 689 */ 690 public CodecPolicy getCodecPolicy() { 691 return decodingPolicy; 692 } 693 694 /** 695 * Gets the default buffer size. Can be overridden. 696 * 697 * @return the default buffer size. 698 */ 699 protected int getDefaultBufferSize() { 700 return DEFAULT_BUFFER_SIZE; 701 } 702 703 /** 704 * Calculates the amount of space needed to encode the supplied array. 705 * 706 * @param pArray byte[] array which will later be encoded 707 * 708 * @return amount of space needed to encode the supplied array. 709 * Returns a long since a max-len array will require > Integer.MAX_VALUE 710 */ 711 public long getEncodedLength(final byte[] pArray) { 712 // Calculate non-chunked size - rounded up to allow for padding 713 // cast to long is needed to avoid possibility of overflow 714 long len = (pArray.length + unencodedBlockSize - 1) / unencodedBlockSize * (long) encodedBlockSize; 715 if (lineLength > 0) { // We're using chunking 716 // Round up to nearest multiple 717 len += (len + lineLength - 1) / lineLength * chunkSeparatorLength; 718 } 719 return len; 720 } 721 722 /** 723 * Returns true if this object has buffered data for reading. 724 * 725 * @param context the context to be used 726 * @return true if there is data still available for reading. 727 */ 728 boolean hasData(final Context context) { // package protected for access from I/O streams 729 return context.pos > context.readPos; 730 } 731 732 /** 733 * Returns whether or not the {@code octet} is in the current alphabet. 734 * Does not allow whitespace or pad. 735 * 736 * @param value The value to test 737 * 738 * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise. 739 */ 740 protected abstract boolean isInAlphabet(byte value); 741 742 /** 743 * Tests a given byte array to see if it contains only valid characters within the alphabet. 744 * The method optionally treats whitespace and pad as valid. 745 * 746 * @param arrayOctet byte array to test 747 * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed 748 * 749 * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty; 750 * {@code false}, otherwise 751 */ 752 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { 753 for (final byte octet : arrayOctet) { 754 if (!isInAlphabet(octet) && (!allowWSPad || octet != pad && !Character.isWhitespace(octet))) { 755 return false; 756 } 757 } 758 return true; 759 } 760 761 /** 762 * Tests a given String to see if it contains only valid characters within the alphabet. 763 * The method treats whitespace and PAD as valid. 764 * 765 * @param basen String to test 766 * @return {@code true} if all characters in the String are valid characters in the alphabet or if 767 * the String is empty; {@code false}, otherwise 768 * @see #isInAlphabet(byte[], boolean) 769 */ 770 public boolean isInAlphabet(final String basen) { 771 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 772 } 773 774 /** 775 * Returns true if decoding behavior is strict. Decoding will raise an {@link IllegalArgumentException} if trailing 776 * bits are not part of a valid encoding. 777 * 778 * <p> 779 * The default is false for lenient decoding. Decoding will compose trailing bits into 8-bit bytes and discard the 780 * remainder. 781 * </p> 782 * 783 * @return true if using strict decoding 784 * @since 1.15 785 */ 786 public boolean isStrictDecoding() { 787 return decodingPolicy == CodecPolicy.STRICT; 788 } 789 790 /** 791 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 792 * bytes. Returns how many bytes were actually extracted. 793 * <p> 794 * Package private for access from I/O streams. 795 * </p> 796 * 797 * @param b 798 * byte[] array to extract the buffered data into. 799 * @param bPos 800 * position in byte[] array to start extraction at. 801 * @param bAvail 802 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 803 * @param context 804 * the context to be used 805 * @return The number of bytes successfully extracted into the provided byte[] array. 806 */ 807 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { 808 if (hasData(context)) { 809 final int len = Math.min(available(context), bAvail); 810 System.arraycopy(context.buffer, context.readPos, b, bPos, len); 811 context.readPos += len; 812 if (!hasData(context)) { 813 // All data read. 814 // Reset position markers but do not set buffer to null to allow its reuse. 815 // hasData(context) will still return false, and this method will return 0 until 816 // more data is available, or -1 if EOF. 817 context.pos = context.readPos = 0; 818 } 819 return len; 820 } 821 return context.eof ? EOF : 0; 822 } 823 }