Skip to content

Commit e3725a9

Browse files
authored
Add support for explicit length string duplication (#1450)
* add `string_dup_n` to C generator * c: add `string_dup_n` runtime tests and docs
1 parent 366de65 commit e3725a9

File tree

4 files changed

+88
-2
lines changed

4 files changed

+88
-2
lines changed

crates/c/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,11 +195,30 @@ void my_world_string_set(my_world_string_t *ret, const char8_t*s);
195195
// stores it into the component model string `ret`.
196196
void my_world_string_dup(my_world_string_t *ret, const char8_t*s);
197197

198+
// Creates a copy of the input string `s` with explicit length `len` and
199+
// stores it into the component model string `ret`.
200+
// The length is specified in code units (bytes for UTF-8, 16-bit values for UTF-16).
201+
void my_world_string_dup_n(my_world_string_t *ret, const char8_t*s, size_t len);
202+
198203
// Deallocates the string pointed to by `ret`, deallocating
199204
// the memory behind the string.
200205
void my_world_string_free(my_world_string_t *ret);
201206
```
202207
208+
The `string_dup_n` function is useful when working with strings that include embedded null characters or when handling substrings without first copying them into a null terminated buffer. It’s also helpful for length prefixed data or binary formats, and in general anytime the string length is already known, avoiding the cost of scanning for a terminator
209+
210+
```c
211+
// Extract a substring from a larger string
212+
const char *full_string = "Hello, World!";
213+
my_world_string_t substring;
214+
my_world_string_dup_n(&substring, full_string, 5); // Creates "Hello"
215+
216+
// Work with strings containing embedded nulls
217+
const char data_with_nulls[] = {'H', 'i', '\0', '!'};
218+
my_world_string_t binary_string;
219+
my_world_string_dup_n(&binary_string, data_with_nulls, 4); // Copies all 4 bytes
220+
```
221+
203222
For UTF-16 strings, those `char8_t*`s become `char16_t*`s and the following function is also supplied:
204223

205224
```c

crates/c/src/lib.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,11 @@ impl WorldGenerator for C {
368368
// stores it into the component model string `ret`.
369369
void {snake}_string_dup({snake}_string_t *ret, const {c_string_ty} *s);
370370
371+
// Creates a copy of the input string `s` with length `len` and
372+
// stores it into the component model string `ret`.
373+
// The length is specified in code units (bytes for UTF-8, 16-bit values for UTF-16).
374+
void {snake}_string_dup_n({snake}_string_t *ret, const {c_string_ty} *s, size_t len);
375+
371376
// Deallocates the string pointed to by `ret`, deallocating
372377
// the memory behind the string.
373378
void {snake}_string_free({snake}_string_t *ret);\
@@ -387,6 +392,12 @@ impl WorldGenerator for C {
387392
memcpy(ret->ptr, s, ret->len * {size});
388393
}}
389394
395+
void {snake}_string_dup_n({snake}_string_t *ret, const {c_string_ty} *s, size_t len) {{
396+
ret->len = len;
397+
ret->ptr = ({ty}*) cabi_realloc(NULL, 0, {size}, ret->len * {size});
398+
memcpy(ret->ptr, s, ret->len * {size});
399+
}}
400+
390401
void {snake}_string_free({snake}_string_t *ret) {{
391402
if (ret->len > 0) {{
392403
free(ret->ptr);

tests/runtime/strings/runner.c

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//@ args = '--string-encoding utf16'
22
//@ [lang]
3-
//@ cflags = '-Wno-c++-keyword'
3+
//@ cflags = '-Wno-c++-compat'
44

55
#include <assert.h>
66
#include <stdlib.h>
@@ -40,4 +40,60 @@ void exports_runner_run() {
4040
test_strings_to_test_roundtrip(&str4, &str5);
4141
assert_str(&str5, u"🚀🚀🚀 𠈄𓀀");
4242
runner_string_free(&str5);
43+
44+
// Basic substring extraction
45+
runner_string_t str6;
46+
const char16_t *source = u"hello world";
47+
runner_string_dup_n(&str6, source, 5);
48+
assert(str6.len == 5);
49+
assert(memcmp(str6.ptr, u"hello", 5 * 2) == 0);
50+
runner_string_free(&str6);
51+
52+
// Zero length (edge case - boundary condition)
53+
runner_string_t str7;
54+
runner_string_dup_n(&str7, u"test", 0);
55+
assert(str7.len == 0);
56+
runner_string_free(&str7);
57+
58+
// Full string length
59+
runner_string_t str8;
60+
const char16_t *full_str = u"complete";
61+
size_t full_len = 8;
62+
runner_string_dup_n(&str8, full_str, full_len);
63+
assert(str8.len == full_len);
64+
assert(memcmp(str8.ptr, full_str, full_len * 2) == 0);
65+
runner_string_free(&str8);
66+
67+
// Substring from middle (pointer offset)
68+
runner_string_t str9;
69+
const char16_t *middle_source = u"prefix_target_suffix";
70+
runner_string_dup_n(&str9, middle_source + 7, 6);
71+
assert(str9.len == 6);
72+
assert(memcmp(str9.ptr, u"target", 6 * 2) == 0);
73+
runner_string_free(&str9);
74+
75+
// Unicode content with explicit length
76+
runner_string_t str10;
77+
const char16_t *unicode_src = u"🚀🚀🚀 test";
78+
// Each rocket emoji is 2 UTF-16 code units (surrogate pair), space is 1, "test" is 4
79+
// Total: 6 + 1 + 4 = 11 code units, extract first 7 (3 rockets + space)
80+
runner_string_dup_n(&str10, unicode_src, 7);
81+
assert(str10.len == 7);
82+
assert(memcmp(str10.ptr, u"🚀🚀🚀 ", 7 * 2) == 0);
83+
runner_string_free(&str10);
84+
85+
// Single character
86+
runner_string_t str11;
87+
runner_string_dup_n(&str11, u"x", 1);
88+
assert(str11.len == 1);
89+
assert(str11.ptr[0] == u'x');
90+
runner_string_free(&str11);
91+
92+
// Verify data independence (modification doesn't affect original)
93+
runner_string_t str12;
94+
char16_t mutable_src[] = u"original";
95+
runner_string_dup_n(&str12, mutable_src, 8);
96+
mutable_src[0] = u'X';
97+
assert(str12.ptr[0] == u'o');
98+
runner_string_free(&str12);
4399
}

tests/runtime/strings/test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//@ args = '--string-encoding utf16'
22
//@ [lang]
3-
//@ cflags = '-Wno-c++-keyword'
3+
//@ cflags = '-Wno-c++-compat'
44

55
#include <assert.h>
66
#include <stdlib.h>

0 commit comments

Comments
 (0)