Skip to content

Commit 1b2bab2

Browse files
committed
Some improvements: unrolled 2x
1 parent 45a1086 commit 1b2bab2

File tree

2 files changed

+95
-27
lines changed

2 files changed

+95
-27
lines changed

src/Ascii.cs

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,6 @@
66
using System.Runtime.InteropServices;
77

88

9-
/* PAR:
10-
| Method | N | Mean | Error | StdDev |
11-
|----------------------- |---- |-----------:|---------:|---------:|
12-
| FastUnicodeIsAscii | 100 | 652.6 ns | 2.20 ns | 1.95 ns |
13-
| StandardUnicodeIsAscii | 100 | 2,466.5 ns | 21.77 ns | 20.36 ns |
14-
| RuntimeIsAscii | 100 | 2,502.7 ns | 29.81 ns | 27.89 ns |
15-
| FastUnicodeIsAscii | 200 | 1,300.8 ns | 17.95 ns | 14.99 ns |
16-
| StandardUnicodeIsAscii | 200 | 5,216.6 ns | 62.48 ns | 55.38 ns |
17-
| RuntimeIsAscii | 200 | 5,293.2 ns | 41.50 ns | 38.82 ns |
18-
| FastUnicodeIsAscii | 500 | 2,978.6 ns | 34.99 ns | 32.73 ns |
19-
| StandardUnicodeIsAscii | 500 | 6,172.9 ns | 74.53 ns | 69.71 ns |
20-
| RuntimeIsAscii | 500 | 6,210.8 ns | 80.82 ns | 63.10 ns | */
219

2210

2311
// Ideally, we would want to implement something that looks like
@@ -104,7 +92,22 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
10492
fixed (char* pStart = &MemoryMarshal.GetReference(s))
10593
{
10694
int i = 0;
107-
if (s.Length > 8)
95+
96+
/* PAR: not unrolled
97+
| Method | N | Mean | Error | StdDev |
98+
|----------------------- |---- |-----------:|---------:|---------:|
99+
| FastUnicodeIsAscii | 100 | 652.6 ns | 2.20 ns | 1.95 ns |
100+
| StandardUnicodeIsAscii | 100 | 2,466.5 ns | 21.77 ns | 20.36 ns |
101+
| RuntimeIsAscii | 100 | 2,502.7 ns | 29.81 ns | 27.89 ns |
102+
| FastUnicodeIsAscii | 200 | 1,300.8 ns | 17.95 ns | 14.99 ns |
103+
| StandardUnicodeIsAscii | 200 | 5,216.6 ns | 62.48 ns | 55.38 ns |
104+
| RuntimeIsAscii | 200 | 5,293.2 ns | 41.50 ns | 38.82 ns |
105+
| FastUnicodeIsAscii | 500 | 2,978.6 ns | 34.99 ns | 32.73 ns |
106+
| StandardUnicodeIsAscii | 500 | 6,172.9 ns | 74.53 ns | 69.71 ns |
107+
| RuntimeIsAscii | 500 | 6,210.8 ns | 80.82 ns | 63.10 ns | */
108+
109+
110+
/* if (s.Length > 8)
108111
{
109112
Vector128<ushort> total = Sse41.LoadDquVector128((ushort*)pStart);
110113
i += 8;
@@ -113,7 +116,37 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
113116
{
114117
Vector128<ushort> raw = Sse41.LoadDquVector128((ushort*)pStart + i);
115118
total = Sse2.Or(total, raw);
119+
} */
120+
121+
122+
/*
123+
| Method | N | Mean | Error | StdDev |
124+
|----------------------- |---- |-----------:|---------:|---------:|
125+
| FastUnicodeIsAscii | 100 | 905.7 ns | 17.95 ns | 20.67 ns |
126+
| StandardUnicodeIsAscii | 100 | 2,502.4 ns | 49.67 ns | 66.31 ns |
127+
| RuntimeIsAscii | 100 | 2,522.8 ns | 32.70 ns | 30.59 ns |
128+
| FastUnicodeIsAscii | 200 | 649.3 ns | 10.24 ns | 9.57 ns |
129+
| StandardUnicodeIsAscii | 200 | 5,299.7 ns | 64.91 ns | 57.54 ns |
130+
| RuntimeIsAscii | 200 | 5,307.2 ns | 49.18 ns | 46.00 ns |
131+
| FastUnicodeIsAscii | 500 | 1,382.2 ns | 9.40 ns | 8.79 ns |
132+
| StandardUnicodeIsAscii | 500 | 6,127.7 ns | 57.69 ns | 48.18 ns |
133+
| RuntimeIsAscii | 500 | 6,258.2 ns | 62.05 ns | 58.05 ns | */
134+
135+
if (s.Length > 16) // Adjusted for the unrolled loop
136+
{
137+
Vector128<ushort> total = Sse41.LoadDquVector128((ushort*)pStart);
138+
i += 8;
139+
140+
// Unrolling the loop by 2x
141+
for (; i + 15 < s.Length; i += 16)
142+
{
143+
Vector128<ushort> raw1 = Sse41.LoadDquVector128((ushort*)pStart + i);
144+
Vector128<ushort> raw2 = Sse41.LoadDquVector128((ushort*)pStart + i + 8);
145+
146+
total = Sse2.Or(total, raw1);
147+
total = Sse2.Or(total, raw2);
116148
}
149+
117150
Vector128<ushort> b127 = Vector128.Create((ushort)127);
118151
Vector128<ushort> b = Sse41.Max(b127, total);
119152
Vector128<ushort> b16 = Sse41.CompareEqual(b, b127);

test/AsciiTest.cs

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@ public class AsciiTest
66
[Fact]
77
public void Test1()
88
{
9-
// Assert.True(SimdUnicode.Ascii.IsAscii("absads12323123232131231232132132132312321321312321"));
10-
// Assert.False(SimdUnicode.Ascii.IsAscii("absaé12323123232131231232132132132312321321312321"));
11-
// Assert.True(SimdUnicode.Ascii.SIMDIsAscii("absads12323123232131231232132132132312321321312321"));
12-
// Assert.True(SimdUnicode.Ascii.SIMDIsAscii("12345678")); // 8 characters pass
13-
// Assert.True(SimdUnicode.Ascii.SIMDIsAscii("123456789")); // 9 characters fails
14-
Assert.True(SimdUnicode.Ascii.SIMDIsAscii("1234567890123456")); //fails
15-
// Assert.False(SimdUnicode.Ascii.SIMDIsAscii("absaé12323123232131231232132132132312321321312321"));
16-
// Assert.False(SimdUnicode.Ascii.SIMDIsAscii("absa12323123232131231232132132132312321321312321é")); // pass
9+
Assert.True(SimdUnicode.Ascii.IsAscii("absads12323123232131231232132132132312321321312321"));
10+
Assert.False(SimdUnicode.Ascii.IsAscii("absaé12323123232131231232132132132312321321312321"));
11+
Assert.True(SimdUnicode.Ascii.SIMDIsAscii("absads12323123232131231232132132132312321321312321"));
12+
Assert.True(SimdUnicode.Ascii.SIMDIsAscii("12345678"));
13+
Assert.True(SimdUnicode.Ascii.SIMDIsAscii("123456789"));
14+
Assert.True(SimdUnicode.Ascii.SIMDIsAscii("1234567890123456"));
15+
Assert.False(SimdUnicode.Ascii.SIMDIsAscii("absaé12323123232131231232132132132312321321312321"));
16+
Assert.False(SimdUnicode.Ascii.SIMDIsAscii("absa12323123232131231232132132132312321321312321é"));
1717
}
1818

19-
/* [Fact]
19+
[Fact]
2020
public void HardCodedSequencesTest()
2121
{
2222
string[] goodsequences = {
@@ -33,8 +33,45 @@ public void HardCodedSequencesTest()
3333
"\xe2\x28\xa1",
3434
"\xe2\x82\x28",
3535
"\xf0\x28\x8c\xbc",
36-
// ... (continue with all sequences)
37-
};
36+
"\xf0\x90\x28\xbc",
37+
"\xf0\x28\x8c\x28",
38+
"\xc0\x9f",
39+
"\xf5\xff\xff\xff",
40+
"\xed\xa0\x81",
41+
"\xf8\x90\x80\x80\x80",
42+
"123456789012345\xed",
43+
"123456789012345\xf1",
44+
"123456789012345\xc2",
45+
"\xC2\x7F",
46+
"\xce",
47+
"\xce\xba\xe1",
48+
"\xce\xba\xe1\xbd",
49+
"\xce\xba\xe1\xbd\xb9\xcf",
50+
"\xce\xba\xe1\xbd\xb9\xcf\x83\xce",
51+
"\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce",
52+
"\xdf",
53+
"\xef\xbf",
54+
"\x80",
55+
"\x91\x85\x95\x9e",
56+
"\x6c\x02\x8e\x18",
57+
"\x25\x5b\x6e\x2c\x32\x2c\x5b\x5b\x33\x2c\x34\x2c\x05\x29\x2c\x33\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5d\x2c\x35\x2e\x33\x2c\x39\x2e\x33\x2c\x37\x2e\x33\x2c\x39\x2e\x34\x2c\x37\x2e\x33\x2c\x39\x2e\x33\x2c\x37\x2e\x33\x2c\x39\x2e\x34\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x20\x01\x01\x01\x01\x01\x02\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x23\x0a\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x7e\x7e\x0a\x0a\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5d\x2c\x37\x2e\x33\x2c\x39\x2e\x33\x2c\x37\x2e\x33\x2c\x39\x2e\x34\x2c\x37\x2e\x33\x2c\x39\x2e\x33\x2c\x37\x2e\x33\x2c\x39\x2e\x34\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x5d\x01\x01\x80\x01\x01\x01\x79\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01",
58+
"[[[[[[[[[[[[[[[\x80\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x010\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01",
59+
"\x20\x0b\x01\x01\x01\x64\x3a\x64\x3a\x64\x3a\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x5b\x30\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x80\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01",
60+
"\x80",
61+
"\x90",
62+
"\xa1",
63+
"\xb2",
64+
"\xc3",
65+
"\xd4",
66+
"\xe5",
67+
"\xf6",
68+
"\xc3\xb1",
69+
"\xe2\x82\xa1",
70+
"\xf0\x90\x8c\xbc",
71+
"\xc2\x80",
72+
"\xf0\x90\x80\x80",
73+
"\xee\x80\x80",
74+
"\xef\xbb\xbf"};
3875

3976
foreach (var sequence in goodsequences)
4077
{
@@ -47,7 +84,5 @@ public void HardCodedSequencesTest()
4784
Assert.False(SimdUnicode.Ascii.IsAscii(sequence), "Expected non-valid ASCII sequence");
4885
Assert.False(SimdUnicode.Ascii.SIMDIsAscii(sequence), "Expected SIMDIsAscii to invalidate non-ASCII sequence");
4986
}
50-
} */
51-
52-
87+
}
5388
}

0 commit comments

Comments
 (0)