From c2358e969363cc3cae50ca7b3ada324da1a15546 Mon Sep 17 00:00:00 2001 From: Aleksandr Beliakov Date: Tue, 30 Dec 2025 12:21:31 +0100 Subject: [PATCH 1/2] Add separate methods for strict Standard and URL Safe Base64 decoding --- .../apache/commons/codec/binary/Base64.java | 310 +++++++++++++++++- .../commons/codec/binary/Base64Test.java | 110 ++++++- 2 files changed, 413 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/binary/Base64.java b/src/main/java/org/apache/commons/codec/binary/Base64.java index 0b10809158..6521fd96ca 100644 --- a/src/main/java/org/apache/commons/codec/binary/Base64.java +++ b/src/main/java/org/apache/commons/codec/binary/Base64.java @@ -127,6 +127,58 @@ public Builder setUrlSafe(final boolean urlSafe) { return setEncodeTable(toUrlSafeEncodeTable(urlSafe)); } + /** + * Sets the format of the decoding table. + * This method allows to explicitly state whether a "standard" or "URL Safe" Base64 decoding is expected. + *

+ * Note: By default, the implementation uses the MIXED approach, allowing a seamless handling of + * both URL_SAFE and STANDARD base64. + *

+ * + * @param format table format to be used on Base64 decoding. + * @return {@code this} instance. + */ + public Builder setDecodeTableFormat(final DecodeTableFormat format) { + switch (format) { + case STANDARD: + return super.setDecodeTableRaw(STANDARD_DECODE_TABLE); + case URL_SAFE: + return super.setDecodeTableRaw(URL_SAFE_DECODE_TABLE); + case MIXED: + default: + return super.setDecodeTableRaw(DECODE_TABLE); + } + } + + } + + /** + * Defines the Base64 table format to be used on decoding + *

+ * Note: By default, the MIXED approach is used, allowing a seamless handling of both URL_SAFE and STANDARD base64. + *

+ */ + public enum DecodeTableFormat { + + /** + * Corresponds to the "standard" Base64 coding table, as specified in Table 1 of RFC 2045. + */ + STANDARD, + + /** + * Corresponds to the "URL Safe" Base64 coding table, as specified in Table 2 of RFC 4648. + */ + URL_SAFE, + + /** + * Represents a joint approach, allowing a seamless decoding of both character sets, + * corresponding to either Table 1 of RFC 2045 or Table 2 of RFC 4648. + *

+ * Note: This decoding table is used by default. + *

+ */ + MIXED + } /** @@ -170,7 +222,7 @@ public Builder setUrlSafe(final boolean urlSafe) { /** * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64 - * alphabet but fall within the bounds of the array are translated to -1. + * or Base64 URL Safe alphabets but fall within the bounds of the array are translated to -1. *

* Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit). @@ -181,7 +233,7 @@ public Builder setUrlSafe(final boolean urlSafe) { *

*/ private static final byte[] DECODE_TABLE = { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F + // 0 1 2 3 4 5 6 7 8 9 A B C D E F -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - / @@ -192,6 +244,49 @@ public Builder setUrlSafe(final boolean urlSafe) { 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z }; + /** + * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified + * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64 + * alphabet but fall within the bounds of the array are translated to -1. + *

+ * Note: This decoding table handles only the "standard" base64 characters, such as '+' and '/'. + * The "url-safe" characters such as '-' and '_' are not supported by the table. + *

+ */ + private static final byte[] STANDARD_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, // 20-2f + / + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 50-5f P-Z + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z + }; + + /** + * This array is a lookup table that translates Unicode characters drawn from the "Base64 URL Safe Alphabet" + * (as specified in Table 2 of RFC 4648) into their 6-bit positive integer equivalents. + * Characters that are not in the Base64 URL Safe alphabet but fall within the bounds of the array + * are translated to -1. + *

+ * Note: This decoding table handles only the "URL Safe" base64 characters, such as '-' and '_'. + * The "standard" characters such as '+' and '/' are not supported by the table. + *

+ */ + private static final byte[] URL_SAFE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, // 20-2f - + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _ + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z + }; + /** * Base64 uses 6-bit fields. */ @@ -251,6 +346,11 @@ private static byte[] calculateDecodeTable(final byte[] encodeTable) { * Decodes Base64 data into octets. *

* Note: this method seamlessly handles data encoded in URL-safe or normal mode. + * For enforcing verification against strict standard Base64 or Base64 URL Safe tables, + * please use {@code #decodeBase64Standard} or {@code decodeBase64Url} methods respectively. + *

+ *

+ * Note 2: this method skips any unknown or not supported bytes. *

* * @param base64Data @@ -265,6 +365,11 @@ public static byte[] decodeBase64(final byte[] base64Data) { * Decodes a Base64 String into octets. *

* Note: this method seamlessly handles data encoded in URL-safe or normal mode. + * For enforcing verification against strict standard Base64 or Base64 URL Safe tables, + * please use {@code #decodeBase64Standard} or {@code decodeBase64Url} methods respectively. + *

+ *

+ * Note 2: this method skips any unknown or not supported characters. *

* * @param base64String @@ -276,6 +381,78 @@ public static byte[] decodeBase64(final String base64String) { return new Base64().decode(base64String); } + /** + * Decodes standard Base64 data into octets. + *

+ * Note: implementation of this method is aligned with the Table 1 of RFC 2045. + *

+ *

+ * Note 2 this method skips any unknown or not supported bytes. + *

+ * + * @param base64Data + * Byte array containing Base64 data + * @return Array containing decoded data. + * @since 1.21 + */ + public static byte[] decodeBase64Standard(final byte[] base64Data) { + return builder().setDecodeTableFormat(DecodeTableFormat.STANDARD).get().decode(base64Data); + } + + /** + * Decodes a standard Base64 String into octets. + *

+ * Note: implementation of this method is aligned with the Table 1 of RFC 2045. + *

+ *

+ * Note 2: this method skips any unknown or not supported characters. + *

+ * + * @param base64String + * String containing Base64 data + * @return Array containing decoded data. + * @since 1.21 + */ + public static byte[] decodeBase64Standard(final String base64String) { + return builder().setDecodeTableFormat(DecodeTableFormat.STANDARD).get().decode(base64String); + } + + /** + * Decodes URL Safe Base64 data into octets. + *

+ * Note: implementation of this method is aligned with the Table 2 of RFC 4648. + *

+ *

+ * Note 2 this method skips any unknown or not supported bytes. + *

+ * + * @param base64Data + * Byte array containing Base64 data + * @return Array containing decoded data. + * @since 1.21 + */ + public static byte[] decodeBase64Url(final byte[] base64Data) { + return builder().setDecodeTableFormat(DecodeTableFormat.URL_SAFE).get().decode(base64Data); + } + + /** + * Decodes a URL Safe Base64 String into octets. + *

+ * Note: implementation of this method is aligned with the Table 2 of RFC 4648. + *

+ *

+ * Note 2 this method skips any unknown or not supported characters. + *

+ * + * @param base64String + * String containing Base64 data + * @return Array containing decoded data. + * @since 1.21 + */ + public static byte[] decodeBase64Url(final String base64String) { + return builder().setDecodeTableFormat(DecodeTableFormat.URL_SAFE).get().decode(base64String); + } + /** * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. * @@ -452,6 +629,11 @@ public static boolean isArrayByteBase64(final byte[] arrayOctet) { /** * Returns whether or not the {@code octet} is in the base 64 alphabet. + *

+ * Note: this method threats both characters '+' and '/' and '-' and '_' as valid base64 characters. + * For enforcing verification against strict standard Base64 or Base64 URL Safe tables, + * please use {@code #isBase64Standard} or {@code isBase64Url} methods respectively. + *

* * @param octet * The value to test @@ -465,6 +647,11 @@ public static boolean isBase64(final byte octet) { /** * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the * method treats whitespace as valid. + *

+ * Note: this method threats both characters '+' and '/' and '-' and '_' as valid base64 characters. + * For enforcing verification against strict standard Base64 or Base64 URL Safe tables, + * please use {@code #isBase64Standard} or {@code isBase64Url} methods respectively. + *

* * @param arrayOctet * byte array to test @@ -484,17 +671,134 @@ public static boolean isBase64(final byte[] arrayOctet) { /** * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the * method treats whitespace as valid. + *

+ * Note: this method threats both characters '+' and '/' and '-' and '_' as valid base64 characters. + * For enforcing verification against strict standard Base64 or Base64 URL Safe tables, + * please use {@code #isBase64Standard} or {@code isBase64Url} methods respectively. + *

* * @param base64 * String to test * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if * the String is empty; {@code false}, otherwise - * @since 1.5 + * @since 1.5 */ public static boolean isBase64(final String base64) { return isBase64(StringUtils.getBytesUtf8(base64)); } + /** + * Returns whether or not the {@code octet} is in the standard base 64 alphabet. + *

+ * Note: implementation of this method is aligned with the Table 1 of RFC 2045. + *

+ * + * @param octet + * The value to test + * @return {@code true} if the value is defined in the standard base 64 alphabet, + * {@code false} otherwise. + * @since 1.21 + */ + public static boolean isBase64Standard(final byte octet) { + return octet == PAD_DEFAULT || octet >= 0 && octet < STANDARD_DECODE_TABLE.length && STANDARD_DECODE_TABLE[octet] != -1; + } + + /** + * Tests a given byte array to see if it contains only valid characters within the standard Base64 alphabet. + * The method treats whitespace as valid. + *

+ * Note: implementation of this method is aligned with the Table 1 of RFC 2045. + *

+ * + * @param arrayOctet + * byte array to test + * @return {@code true} if all bytes are valid characters in the standard Base64 alphabet. + * {@code false}, otherwise + * @since 1.21 + */ + public static boolean isBase64Standard(final byte[] arrayOctet) { + for (final byte element : arrayOctet) { + if (!isBase64Standard(element) && !Character.isWhitespace(element)) { + return false; + } + } + return true; + } + + /** + * Tests a given String to see if it contains only valid characters within the standard Base64 alphabet. + * The method treats whitespace as valid. + *

+ * Note: implementation of this method is aligned with the Table 1 of RFC 2045. + *

+ * + * @param base64 + * String to test + * @return {@code true} if all characters in the String are valid characters in the standard Base64 alphabet or + * if the String is empty; + * {@code false}, otherwise + * @since 1.21 + */ + public static boolean isBase64Standard(final String base64) { + return isBase64Standard(StringUtils.getBytesUtf8(base64)); + } + + /** + * Returns whether or not the {@code octet} is in the url safe base 64 alphabet. + *

+ * Note: implementation of this method is aligned with the Table 2 of RFC 4648. + *

+ * + * @param octet + * The value to test + * @return {@code true} if the value is defined in the url safe base 64 alphabet, + * {@code false} otherwise. + * @since 1.21 + */ + public static boolean isBase64Url(final byte octet) { + return octet == PAD_DEFAULT || octet >= 0 && octet < URL_SAFE_DECODE_TABLE.length && URL_SAFE_DECODE_TABLE[octet] != -1; + } + + /** + * Tests a given byte array to see if it contains only valid characters within the URL Safe Base64 alphabet. + * The method treats whitespace as valid. + *

+ * Note: implementation of this method is aligned with the Table 2 of RFC 4648. + *

+ * + * @param arrayOctet + * byte array to test + * @return {@code true} if all bytes are valid characters in the URL Safe Base64 alphabet. + * {@code false}, otherwise + * @since 1.21 + */ + public static boolean isBase64Url(final byte[] arrayOctet) { + for (final byte element : arrayOctet) { + if (!isBase64Url(element) && !Character.isWhitespace(element)) { + return false; + } + } + return true; + } + + /** + * Tests a given String to see if it contains only valid characters within the URL Safe Base64 alphabet. + * The method treats whitespace as valid. + *

+ * Note: implementation of this method is aligned with the Table 2 of RFC 4648. + *

+ * + * @param base64 + * String to test + * @return {@code true} if all characters in the String are valid characters in the URL Safe Base64 alphabet or + * if the String is empty; + * {@code false}, otherwise + * @since 1.21 + */ + public static boolean isBase64Url(final String base64) { + return isBase64Url(StringUtils.getBytesUtf8(base64)); + } + /** * Returns a byte-array representation of a {@code BigInteger} without sign bit. * diff --git a/src/test/java/org/apache/commons/codec/binary/Base64Test.java b/src/test/java/org/apache/commons/codec/binary/Base64Test.java index 47d0fdcae2..f733227eeb 100644 --- a/src/test/java/org/apache/commons/codec/binary/Base64Test.java +++ b/src/test/java/org/apache/commons/codec/binary/Base64Test.java @@ -711,10 +711,52 @@ void testIsStringBase64() { final String emptyString = ""; final String validString = "abc===defg\n\r123456\r789\r\rABC\n\nDEF==GHI\r\nJKL=============="; final String invalidString = validString + (char) 0; // append null character - assertThrows(NullPointerException.class, () -> Base64.isBase64(nullString), "Base64.isStringBase64() should not be null-safe."); - assertTrue(Base64.isBase64(emptyString), "Base64.isStringBase64(empty-string) is true"); - assertTrue(Base64.isBase64(validString), "Base64.isStringBase64(valid-string) is true"); - assertFalse(Base64.isBase64(invalidString), "Base64.isStringBase64(invalid-string) is false"); + final String standardString = "++AQIDBA/U=="; + final String urlSafeString = "--AQIDBA_U=="; + assertThrows(NullPointerException.class, () -> Base64.isBase64(nullString), "Base64.isBase64() should not be null-safe."); + assertTrue(Base64.isBase64(emptyString), "Base64.isBase64(empty-string) is true"); + assertTrue(Base64.isBase64(validString), "Base64.isBase64(valid-string) is true"); + assertFalse(Base64.isBase64(invalidString), "Base64.isBase64(invalid-string) is false"); + assertTrue(Base64.isBase64(standardString), "Base64.isBase64(standard-string) is true"); + assertTrue(Base64.isBase64(urlSafeString), "Base64.isBase64(urlSafe-string) is true"); + } + + /** + * Test the isStringBase64Standard method. + */ + @Test + void testIsStringBase64Standard() { + final String nullString = null; + final String emptyString = ""; + final String validString = "abc===defg\n\r123456\r789\r\rABC\n\nDEF==GHI\r\nJKL=============="; + final String invalidString = validString + (char) 0; // append null character + final String standardString = "++AQIDBA/U=="; + final String urlSafeString = "--AQIDBA_U=="; + assertThrows(NullPointerException.class, () -> Base64.isBase64Standard(nullString), "Base64.isBase64Standard() should not be null-safe."); + assertTrue(Base64.isBase64Standard(emptyString), "Base64.isBase64Standard(empty-string) is true"); + assertTrue(Base64.isBase64Standard(validString), "Base64.isBase64Standard(valid-string) is true"); + assertFalse(Base64.isBase64Standard(invalidString), "Base64.isBase64Standard(invalid-string) is false"); + assertTrue(Base64.isBase64Standard(standardString), "Base64.isBase64Standard(standard-string) is true"); + assertFalse(Base64.isBase64Standard(urlSafeString), "Base64.isBase64Standard(urlSafe-string) is false"); + } + + /** + * Test the isStringBase64Url method. + */ + @Test + void testIsStringBase64Url() { + final String nullString = null; + final String emptyString = ""; + final String validString = "abc===defg\n\r123456\r789\r\rABC\n\nDEF==GHI\r\nJKL=============="; + final String invalidString = validString + (char) 0; // append null character + final String standardString = "++AQIDBA/U=="; + final String urlSafeString = "--AQIDBA_U=="; + assertThrows(NullPointerException.class, () -> Base64.isBase64Url(nullString), "Base64.isBase64Url() should not be null-safe."); + assertTrue(Base64.isBase64Url(emptyString), "Base64.isBase64Url(empty-string) is true"); + assertTrue(Base64.isBase64Url(validString), "Base64.isBase64Url(valid-string) is true"); + assertFalse(Base64.isBase64Url(invalidString), "Base64.isBase64Url(invalid-string) is false"); + assertFalse(Base64.isBase64Url(standardString), "Base64.isBase64Url(standard-string) is false"); + assertTrue(Base64.isBase64Url(urlSafeString), "Base64.isBase64Url(urlSafe-string) is true"); } /** @@ -910,6 +952,66 @@ void testRfc4648Section10DecodeEncode(final String input) { testDecodeEncode(input); } + @ParameterizedTest + @ValueSource(strings = { + "", + "Zg==", + "Zm8=", + "Zm9v", + "Zm9vYg==", + "Zm9vYmE=", + "Zm9vYmFy", + "Zm9vYmF+", + "Zm9vYmF/" + }) + void testDecodeEncodeStandard(final String encodedText) { + final String decodedText = StringUtils.newStringUsAscii(Base64.decodeBase64Standard(encodedText)); + final String encodedText2 = Base64.encodeBase64String(StringUtils.getBytesUtf8(decodedText)); + assertEquals(encodedText, encodedText2); + } + + @ParameterizedTest + @ValueSource(strings = { + "", + "Zg", + "Zm8", + "Zm9v", + "Zm9vYg", + "Zm9vYmE", + "Zm9vYmFy", + "Zm9vYmF-", + "Zm9vYmF_" + }) + void testDecodeEncodeUrl(final String encodedText) { + final String decodedText = StringUtils.newStringUsAscii(Base64.decodeBase64Url(encodedText)); + final String encodedText2 = Base64.encodeBase64URLSafeString(StringUtils.getBytesUtf8(decodedText)); + assertEquals(encodedText, encodedText2); + } + + @Test + void testDecodeBase64DiffChars() { + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97 }, Base64.decodeBase64("Zm9vYmF")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97, 126 }, Base64.decodeBase64("Zm9vYmF+")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97, 126 }, Base64.decodeBase64("Zm9vYmF-")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97 }, Base64.decodeBase64("Zm9vYmF~")); + } + + @Test + void testDecodeBase64StandardDiffChars() { + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97 }, Base64.decodeBase64Standard("Zm9vYmF")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97, 126 }, Base64.decodeBase64Standard("Zm9vYmF+")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97 }, Base64.decodeBase64Standard("Zm9vYmF-")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97 }, Base64.decodeBase64("Zm9vYmF~")); + } + + @Test + void testDecodeBase64UrlDiffChars() { + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97 }, Base64.decodeBase64Url("Zm9vYmF")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97 }, Base64.decodeBase64Url("Zm9vYmF+")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97, 126 }, Base64.decodeBase64Url("Zm9vYmF-")); + assertArrayEquals(new byte[] { 102, 111, 111, 98, 97 }, Base64.decodeBase64("Zm9vYmF~")); + } + /** * Tests RFC 4648 section 10 test vectors. *