Skip to content

Commit 49fef7f

Browse files
authored
This bumps IDNA with a recent fix to to_ascii. (#351)
1 parent 1fd49eb commit 49fef7f

File tree

3 files changed

+36
-24
lines changed

3 files changed

+36
-24
lines changed

include/ada/ada_idna.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2023-03-28 11:03:13 -0400. Do not edit! */
1+
/* auto-generated on 2023-04-26 14:14:42 -0400. Do not edit! */
22
/* begin file include/idna.h */
33
#ifndef ADA_IDNA_H
44
#define ADA_IDNA_H
@@ -30,6 +30,7 @@ size_t utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output);
3030

3131
#include <string>
3232
#include <string_view>
33+
3334
namespace ada::idna {
3435

3536
// If the input is ascii, then the mapping is just -> lower case.
@@ -49,6 +50,7 @@ std::u32string map(std::u32string_view input);
4950

5051
#include <string>
5152
#include <string_view>
53+
5254
namespace ada::idna {
5355

5456
// Normalize the characters according to IDNA (Unicode Normalization Form C).
@@ -63,6 +65,7 @@ void normalize(std::u32string& input);
6365

6466
#include <string>
6567
#include <string_view>
68+
6669
namespace ada::idna {
6770

6871
bool punycode_to_utf32(std::string_view input, std::u32string& out);
@@ -99,23 +102,31 @@ bool is_label_valid(const std::u32string_view label);
99102
#include <string_view>
100103

101104
namespace ada::idna {
105+
102106
// Converts a domain (e.g., www.google.com) possibly containing international
103107
// characters to an ascii domain (with punycode). It will not do percent
104108
// decoding: percent decoding should be done prior to calling this function. We
105109
// do not remove tabs and spaces, they should have been removed prior to calling
106110
// this function. We also do not trim control characters. We also assume that
107-
// the input is not empty. We return "" on error. For now.
111+
// the input is not empty. We return "" on error.
112+
//
113+
// Example: "www.öbb.at" -> "www.xn--bb-eka.at"
114+
//
115+
// This function may accept or even produce invalid domains.
108116
std::string to_ascii(std::string_view ut8_string);
109117

118+
// Returns true if the string contains a forbidden code point according to the
119+
// WHATGL URL specification:
120+
// https://url.spec.whatwg.org/#forbidden-domain-code-point
121+
bool contains_forbidden_domain_code_point(std::string_view ascii_string);
122+
110123
bool constexpr begins_with(std::u32string_view view,
111124
std::u32string_view prefix);
112125
bool constexpr begins_with(std::string_view view, std::string_view prefix);
113126

114127
bool constexpr is_ascii(std::u32string_view view);
115128
bool constexpr is_ascii(std::string_view view);
116129

117-
std::string from_ascii_to_ascii(std::string_view ut8_string);
118-
119130
} // namespace ada::idna
120131

121132
#endif // ADA_IDNA_TO_ASCII_H
@@ -125,8 +136,12 @@ std::string from_ascii_to_ascii(std::string_view ut8_string);
125136
#ifndef ADA_IDNA_TO_UNICODE_H
126137
#define ADA_IDNA_TO_UNICODE_H
127138

139+
#include <string_view>
140+
128141
namespace ada::idna {
142+
129143
std::string to_unicode(std::string_view input);
144+
130145
} // namespace ada::idna
131146

132147
#endif // ADA_IDNA_TO_UNICODE_H

src/ada_idna.cpp

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
/* auto-generated on 2023-03-28 11:03:13 -0400. Do not edit! */
1+
/* auto-generated on 2023-04-26 14:14:42 -0400. Do not edit! */
22
/* begin file src/idna.cpp */
33
/* begin file src/unicode_transcoding.cpp */
44

55
#include <cstdint>
66
#include <cstring>
7+
78
namespace ada::idna {
89

910
size_t utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
@@ -7885,9 +7886,10 @@ const char32_t uninorms::decomposition_data[] = {
78857886
namespace ada::idna {
78867887

78877888
void normalize(std::u32string& input) {
7888-
// [Normalize](https://www.unicode.org/reports/tr46/#ProcessingStepNormalize).
7889-
// Normalize
7890-
// the domain_name string to Unicode Normalization Form C.
7889+
/**
7890+
* Normalize the domain_name string to Unicode Normalization Form C.
7891+
* @see https://www.unicode.org/reports/tr46/#ProcessingStepNormalize
7892+
*/
78917893
ufal::unilib::uninorms::nfc(input);
78927894
}
78937895

@@ -8115,7 +8117,6 @@ bool utf32_to_punycode(std::u32string_view input, std::string &out) {
81158117
} // namespace ada::idna
81168118
/* end file src/punycode.cpp */
81178119
/* begin file src/validity.cpp */
8118-
81198120
#include <algorithm>
81208121
#include <string_view>
81218122

@@ -9503,18 +9504,18 @@ constexpr static uint8_t is_forbidden_domain_code_point_table[] = {
95039504

95049505
static_assert(sizeof(is_forbidden_domain_code_point_table) == 256);
95059506

9506-
inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept {
9507+
inline bool is_forbidden_domain_code_point(const char c) noexcept {
95079508
return is_forbidden_domain_code_point_table[uint8_t(c)];
95089509
}
95099510

9510-
// We return "" on error. For now.
9511-
std::string from_ascii_to_ascii(std::string_view ut8_string) {
9512-
static const std::string error = "";
9513-
if (std::any_of(ut8_string.begin(), ut8_string.end(),
9514-
is_forbidden_domain_code_point)) {
9515-
return error;
9516-
}
9511+
bool contains_forbidden_domain_code_point(std::string_view view) {
9512+
return (
9513+
std::any_of(view.begin(), view.end(), is_forbidden_domain_code_point));
9514+
}
95179515

9516+
// We return "" on error.
9517+
static std::string from_ascii_to_ascii(std::string_view ut8_string) {
9518+
static const std::string error = "";
95189519
// copy and map
95199520
// we could be more efficient by avoiding the copy when unnecessary.
95209521
std::string mapped_string = std::string(ut8_string);
@@ -9568,7 +9569,7 @@ std::string from_ascii_to_ascii(std::string_view ut8_string) {
95689569
return out;
95699570
}
95709571

9571-
// We return "" on error. For now.
9572+
// We return "" on error.
95729573
std::string to_ascii(std::string_view ut8_string) {
95739574
if (is_ascii(ut8_string)) {
95749575
return from_ascii_to_ascii(ut8_string);
@@ -9655,11 +9656,6 @@ std::string to_ascii(std::string_view ut8_string) {
96559656
out.push_back('.');
96569657
}
96579658
}
9658-
9659-
if (std::any_of(out.begin(), out.end(), is_forbidden_domain_code_point)) {
9660-
return error;
9661-
}
9662-
96639659
return out;
96649660
}
96659661
} // namespace ada::idna

src/unicode.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,8 @@ bool to_ascii(std::optional<std::string>& out, const std::string_view plain,
426426
}
427427
// input is a non-empty UTF-8 string, must be percent decoded
428428
std::string idna_ascii = ada::idna::to_ascii(input);
429-
if (idna_ascii.empty()) {
429+
if (idna_ascii.empty() || contains_forbidden_domain_code_point(
430+
idna_ascii.data(), idna_ascii.size())) {
430431
return false;
431432
}
432433
out = std::move(idna_ascii);

0 commit comments

Comments
 (0)