1212#include < optional>
1313
1414/* *
15+ * Unicode operations. These functions are not part of our public API and may
16+ * change at any time.
17+ *
18+ * @private
1519 * @namespace ada::unicode
1620 * @brief Includes the definitions for unicode operations
1721 */
1822namespace ada ::unicode {
1923
2024/* *
25+ * @private
2126 * We receive a UTF-8 string representing a domain name.
2227 * If the string is percent encoded, we apply percent decoding.
2328 *
@@ -61,11 +66,13 @@ bool to_ascii(std::optional<std::string>& out, std::string_view plain,
6166 size_t first_percent);
6267
6368/* *
69+ * @private
6470 * @see https://www.unicode.org/reports/tr46/#ToUnicode
6571 */
6672std::string to_unicode (std::string_view input);
6773
6874/* *
75+ * @private
6976 * Checks if the input has tab or newline characters.
7077 *
7178 * @attention The has_tabs_or_newline function is a bottleneck and it is simple
@@ -75,19 +82,22 @@ ada_really_inline bool has_tabs_or_newline(
7582 std::string_view user_input) noexcept ;
7683
7784/* *
85+ * @private
7886 * Checks if the input is a forbidden host code point.
7987 * @see https://url.spec.whatwg.org/#forbidden-host-code-point
8088 */
8189ada_really_inline constexpr bool is_forbidden_host_code_point (char c) noexcept ;
8290
8391/* *
92+ * @private
8493 * Checks if the input contains a forbidden domain code point.
8594 * @see https://url.spec.whatwg.org/#forbidden-domain-code-point
8695 */
8796ada_really_inline constexpr bool contains_forbidden_domain_code_point (
8897 const char * input, size_t length) noexcept ;
8998
9099/* *
100+ * @private
91101 * Checks if the input contains a forbidden domain code point in which case
92102 * the first bit is set to 1. If the input contains an upper case ASCII letter,
93103 * then the second bit is set to 1.
@@ -98,18 +108,21 @@ contains_forbidden_domain_code_point_or_upper(const char* input,
98108 size_t length) noexcept ;
99109
100110/* *
111+ * @private
101112 * Checks if the input is a forbidden domain code point.
102113 * @see https://url.spec.whatwg.org/#forbidden-domain-code-point
103114 */
104115ada_really_inline constexpr bool is_forbidden_domain_code_point (
105116 char c) noexcept ;
106117
107118/* *
119+ * @private
108120 * Checks if the input is alphanumeric, '+', '-' or '.'
109121 */
110122ada_really_inline constexpr bool is_alnum_plus (char c) noexcept ;
111123
112124/* *
125+ * @private
113126 * @details An ASCII hex digit is an ASCII upper hex digit or ASCII lower hex
114127 * digit. An ASCII upper hex digit is an ASCII digit or a code point in the
115128 * range U+0041 (A) to U+0046 (F), inclusive. An ASCII lower hex digit is an
@@ -118,6 +131,7 @@ ada_really_inline constexpr bool is_alnum_plus(char c) noexcept;
118131ada_really_inline constexpr bool is_ascii_hex_digit (char c) noexcept ;
119132
120133/* *
134+ * @private
121135 * Checks if the input is a C0 control or space character.
122136 *
123137 * @details A C0 control or space is a C0 control or U+0020 SPACE.
@@ -127,38 +141,44 @@ ada_really_inline constexpr bool is_ascii_hex_digit(char c) noexcept;
127141ada_really_inline constexpr bool is_c0_control_or_space (char c) noexcept ;
128142
129143/* *
144+ * @private
130145 * Checks if the input is a ASCII tab or newline character.
131146 *
132147 * @details An ASCII tab or newline is U+0009 TAB, U+000A LF, or U+000D CR.
133148 */
134149ada_really_inline constexpr bool is_ascii_tab_or_newline (char c) noexcept ;
135150
136151/* *
152+ * @private
137153 * @details A double-dot path segment must be ".." or an ASCII case-insensitive
138154 * match for ".%2e", "%2e.", or "%2e%2e".
139155 */
140156ada_really_inline ada_constexpr bool is_double_dot_path_segment (
141157 std::string_view input) noexcept ;
142158
143159/* *
160+ * @private
144161 * @details A single-dot path segment must be "." or an ASCII case-insensitive
145162 * match for "%2e".
146163 */
147164ada_really_inline constexpr bool is_single_dot_path_segment (
148165 std::string_view input) noexcept ;
149166
150167/* *
168+ * @private
151169 * @details ipv4 character might contain 0-9 or a-f character ranges.
152170 */
153171ada_really_inline constexpr bool is_lowercase_hex (char c) noexcept ;
154172
155173/* *
174+ * @private
156175 * @details Convert hex to binary. Caller is responsible to ensure that
157176 * the parameter is an hexadecimal digit (0-9, A-F, a-f).
158177 */
159178ada_really_inline unsigned constexpr convert_hex_to_binary (char c) noexcept ;
160179
161180/* *
181+ * @private
162182 * first_percent should be = input.find('%')
163183 *
164184 * @todo It would be faster as noexcept maybe, but it could be unsafe since.
@@ -169,19 +189,22 @@ ada_really_inline unsigned constexpr convert_hex_to_binary(char c) noexcept;
169189std::string percent_decode (std::string_view input, size_t first_percent);
170190
171191/* *
192+ * @private
172193 * Returns a percent-encoding string whether percent encoding was needed or not.
173194 * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226
174195 */
175196std::string percent_encode (std::string_view input,
176197 const uint8_t character_set[]);
177198/* *
199+ * @private
178200 * Returns a percent-encoded string version of input, while starting the percent
179201 * encoding at the provided index.
180202 * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226
181203 */
182204std::string percent_encode (std::string_view input,
183205 const uint8_t character_set[], size_t index);
184206/* *
207+ * @private
185208 * Returns true if percent encoding was needed, in which case, we store
186209 * the percent-encoded content in 'out'. If the boolean 'append' is set to
187210 * true, the content is appended to 'out'.
@@ -192,12 +215,14 @@ template <bool append>
192215bool percent_encode (std::string_view input, const uint8_t character_set[],
193216 std::string& out);
194217/* *
218+ * @private
195219 * Returns the index at which percent encoding should start, or (equivalently),
196220 * the length of the prefix that does not require percent encoding.
197221 */
198222ada_really_inline size_t percent_encode_index (std::string_view input,
199223 const uint8_t character_set[]);
200224/* *
225+ * @private
201226 * Lowers the string in-place, assuming that the content is ASCII.
202227 * Return true if the content was ASCII.
203228 */
0 commit comments