@@ -45,59 +45,63 @@ public class HTTPToolsTest {
4545 @ Test
4646 public void parseEncodedData () {
4747 // Happy path
48- assertEncoding ("bar" , "bar" , StandardCharsets .UTF_8 );
48+ assertEncodedData ("bar" , "bar" , StandardCharsets .UTF_8 );
4949
5050 // Note, there are 3 try/catches in the parseEncodedDate. This tests them in order, each hitting a specific try/catch.
5151
5252 // Bad name encoding
53- assertEncoding ("bar&%%%=baz" , "bar" , StandardCharsets .UTF_8 );
53+ assertEncodedData ("bar&%%%=baz" , "bar" , StandardCharsets .UTF_8 );
5454
5555 // Bad value encoding
56- assertEncoding ("bar&bar=ba%å&=boom" , "bar" , StandardCharsets .UTF_8 );
56+ assertEncodedData ("bar&bar=ba%å&=boom" , "bar" , StandardCharsets .UTF_8 );
5757
5858 // Bad value encoding
59- assertEncoding ("bar&bar=% % %" , "bar" , StandardCharsets .UTF_8 );
59+ assertEncodedData ("bar&bar=% % %" , "bar" , StandardCharsets .UTF_8 );
6060
6161 // UTF-8 encoding of characters not in the ISO-8859-1 character set
62- assertEncoding ("😎" , "😎" , StandardCharsets .UTF_8 );
63- assertEncoding ("é" , "é" , StandardCharsets .UTF_8 );
64- assertEncoding ("€" , "€" , StandardCharsets .UTF_8 );
65- assertEncoding ("Héllö" , "Héllö" , StandardCharsets .UTF_8 );
62+ assertEncodedData ("😎" , "😎" , StandardCharsets .UTF_8 );
63+ assertEncodedData ("€" , "€" , StandardCharsets .UTF_8 );
6664
67- // Double byte values are outside ISO-88559-1, so we should expect them to not render correctly. See next test.
65+ // UTF-8 encoding of characters are that also in the ISO-8859-1 character set but have different mappings
66+ assertEncodedData ("é" , "é" , StandardCharsets .UTF_8 );
67+ assertEncodedData ("Héllö" , "Héllö" , StandardCharsets .UTF_8 );
68+
69+ // These UTF-8 double byte values are outside ISO-88559-1, so we should expect them to not render correctly. See next test.
6870 assertHexValue ("😎" , "D83D DE0E" );
6971 assertHexValue ("€" , "20AC" );
7072
71- // ISO-8559-1 encoding of characters outside the character set
72- assertEncoding ("😎" , "?" , StandardCharsets .ISO_8859_1 );
73- assertEncoding ("€" , "?" , StandardCharsets .ISO_8859_1 );
73+ // ISO-8559-1 encoding of characters outside the ISO-8559-1 character set
74+ assertEncodedData ("😎" , "?" , StandardCharsets .ISO_8859_1 );
75+ assertEncodedData ("€" , "?" , StandardCharsets .ISO_8859_1 );
7476
7577 // These values are within the ISO-8559-1 charset, expect them to render correctly.
7678 assertHexValue ("é" , "E9" );
7779 assertHexValue ("Héllö" , "48 E9 6C 6C F6" );
7880
7981 // ISO-8559-1 encoding of non-ASCII characters inside the character set
80- assertEncoding ("é" , "é" , StandardCharsets .ISO_8859_1 );
81- assertEncoding ("Héllö" , "Héllö" , StandardCharsets .ISO_8859_1 );
82+ assertEncodedData ("é" , "é" , StandardCharsets .ISO_8859_1 );
83+ assertEncodedData ("Héllö" , "Héllö" , StandardCharsets .ISO_8859_1 );
8284
8385 // Mixing and matching. Expect some wonky behavior.
8486 // - Encoded using ISO-8559-1 and decoded as UTF-8
85- assertEncoding ("Héllö" , "H�ll�" , StandardCharsets .ISO_8859_1 , StandardCharsets .UTF_8 );
86- assertEncoding ("Hello world" , "Hello world" , StandardCharsets .ISO_8859_1 , StandardCharsets .UTF_8 );
87+ assertEncodedData ("Héllö" , "H�ll�" , StandardCharsets .ISO_8859_1 , StandardCharsets .UTF_8 );
88+ assertEncodedData ("Hello world" , "Hello world" , StandardCharsets .ISO_8859_1 , StandardCharsets .UTF_8 );
89+ // The é and the ö will fail to render because while this character exists in both character sets, they are encoded differently.
90+ // - So we should expec them to render incorrectly.
91+ // - See below, this is just here to validate why the above assertions are accurate.
92+ assertHexValue ("Héllö" , "48 E9 6C 6C F6 " , StandardCharsets .ISO_8859_1 );
93+ assertHexValue ("Héllö" , "48 C3 A9 6C 6C C3 B6" , StandardCharsets .UTF_8 );
94+
8795 // - Reverse
88- // The é fails here because while it does exist in both UTF-8 and ISO-8859-1, it is not the same byte. So expect the rendering to be off.
96+ assertEncodedData ("Héllö" , "Héllö" , StandardCharsets .UTF_8 , StandardCharsets .ISO_8859_1 );
97+ assertEncodedData ("Hello world" , "Hello world" , StandardCharsets .UTF_8 , StandardCharsets .ISO_8859_1 );
98+ // The é and the ö will fail to render because while this character exists in both character sets, they are encoded differently.
99+ // - So we should expec them to render incorrectly.
100+ // - See below, this is just here to validate why the above assertions are accurate.
89101 assertHexValue ("é" , "C3 A9" , StandardCharsets .UTF_8 );
90- assertHexValue ("é" , "E9" , StandardCharsets .ISO_8859_1 );
91- assertHexValue ("Ã" , "C3" , StandardCharsets .ISO_8859_1 );
92- assertHexValue ("©" , "A9" , StandardCharsets .ISO_8859_1 );
93- assertEncoding ("é" , "é" , StandardCharsets .UTF_8 , StandardCharsets .ISO_8859_1 );
94-
95- // The ö fails here because while it does exist in both UTF-8 and ISO-8859-1, it is not the same byte. So expect the rendering to be off.
102+ assertHexValue ("é" , "C3 A9" , StandardCharsets .ISO_8859_1 );
96103 assertHexValue ("ö" , "C3 B6" , StandardCharsets .UTF_8 );
97- assertHexValue ("ö" , "F6" , StandardCharsets .ISO_8859_1 );
98- assertHexValue ("¶" , "B6" , StandardCharsets .ISO_8859_1 );
99- assertEncoding ("Héllö" , "Héllö" , StandardCharsets .UTF_8 , StandardCharsets .ISO_8859_1 );
100- assertEncoding ("Hello world" , "Hello world" , StandardCharsets .UTF_8 , StandardCharsets .ISO_8859_1 );
104+ assertHexValue ("ö" , "C3 B6" , StandardCharsets .ISO_8859_1 );
101105 }
102106
103107 @ Test
@@ -214,12 +218,12 @@ public void parsePreamble() throws Exception {
214218 assertEquals (nextRequestRead , 16 );
215219 }
216220
217- private void assertEncoding (String actualValue , String expectedValue , Charset charset ) {
218- assertEncoding (actualValue , expectedValue , charset , charset );
221+ private void assertEncodedData (String actualValue , String expectedValue , Charset charset ) {
222+ assertEncodedData (actualValue , expectedValue , charset , charset );
219223
220224 }
221225
222- private void assertEncoding (String actualValue , String expectedValue , Charset encodingCharset , Charset decodingCharset ) {
226+ private void assertEncodedData (String actualValue , String expectedValue , Charset encodingCharset , Charset decodingCharset ) {
223227 Map <String , List <String >> result = new HashMap <>(1 );
224228 byte [] encoded = ("foo=" + actualValue ).getBytes (encodingCharset );
225229 HTTPTools .parseEncodedData (encoded , 0 , encoded .length , decodingCharset , result );
@@ -231,7 +235,9 @@ private void assertHexValue(String s, String expected) {
231235 }
232236
233237 private void assertHexValue (String s , String expected , Charset charset ) {
234- assertEquals (hex (s .getBytes (charset )), expected );
238+ var trimmed = expected .trim ();
239+ trimmed = trimmed .replaceAll (" +" , " " );
240+ assertEquals (hex (s .getBytes (charset )), trimmed );
235241 }
236242
237243 private String hex (byte [] bytes ) {
0 commit comments