Skip to content

Commit 2080e0d

Browse files
committed
document zero initialization vs. loading
1 parent 47c8984 commit 2080e0d

File tree

1 file changed

+58
-4
lines changed

1 file changed

+58
-4
lines changed

src/Ascii.cs

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,69 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
9292
fixed (char* pStart = &MemoryMarshal.GetReference(s))
9393
{
9494
int i = 0;
95+
96+
/* PAR Unrolled twice:
97+
| Method | N | Mean | Error | StdDev |
98+
|----------------------- |---- |-----------:|---------:|---------:|
99+
| FastUnicodeIsAscii | 100 | 905.7 ns | 17.95 ns | 20.67 ns |
100+
| StandardUnicodeIsAscii | 100 | 2,502.4 ns | 49.67 ns | 66.31 ns |
101+
| RuntimeIsAscii | 100 | 2,522.8 ns | 32.70 ns | 30.59 ns |
102+
| FastUnicodeIsAscii | 200 | 649.3 ns | 10.24 ns | 9.57 ns |
103+
| StandardUnicodeIsAscii | 200 | 5,299.7 ns | 64.91 ns | 57.54 ns |
104+
| RuntimeIsAscii | 200 | 5,307.2 ns | 49.18 ns | 46.00 ns |
105+
| FastUnicodeIsAscii | 500 | 1,382.2 ns | 9.40 ns | 8.79 ns |
106+
| StandardUnicodeIsAscii | 500 | 6,127.7 ns | 57.69 ns | 48.18 ns |
107+
| RuntimeIsAscii | 500 | 6,258.2 ns | 62.05 ns | 58.05 ns | */
108+
109+
// if (s.Length > 16) // Adjusted for the unrolled loop
110+
// {
111+
// Vector128<ushort> total = Sse41.LoadDquVector128((ushort*)pStart);
112+
// i += 8;
113+
114+
// // Unrolling the loop by 2x
115+
// for (; i + 15 < s.Length; i += 16)
116+
// {
117+
// Vector128<ushort> raw1 = Sse41.LoadDquVector128((ushort*)pStart + i);
118+
// Vector128<ushort> raw2 = Sse41.LoadDquVector128((ushort*)pStart + i + 8);
119+
120+
// total = Sse2.Or(total, raw1);
121+
// total = Sse2.Or(total, raw2);
122+
// }
123+
124+
// Vector128<ushort> b127 = Vector128.Create((ushort)127);
125+
// Vector128<ushort> b = Sse41.Max(b127, total);
126+
// Vector128<ushort> b16 = Sse41.CompareEqual(b, b127);
127+
// int movemask = Sse2.MoveMask(b16.AsByte());
128+
// if (movemask != 0xffff)
129+
// {
130+
// return false;
131+
// }
132+
// }
133+
134+
// | Method | N | Mean | Error | StdDev |
135+
// |----------------------- |---- |-----------:|---------:|---------:|
136+
// | FastUnicodeIsAscii | 100 | 904.0 ns | 9.22 ns | 8.17 ns |
137+
// | StandardUnicodeIsAscii | 100 | 2,396.5 ns | 11.33 ns | 10.04 ns |
138+
// | RuntimeIsAscii | 100 | 2,498.8 ns | 42.35 ns | 37.54 ns |
139+
// | FastUnicodeIsAscii | 200 | 1,270.0 ns | 7.69 ns | 6.01 ns |
140+
// | StandardUnicodeIsAscii | 200 | 5,173.0 ns | 57.82 ns | 54.08 ns |
141+
// | RuntimeIsAscii | 200 | 5,197.5 ns | 15.40 ns | 13.65 ns |
142+
// | FastUnicodeIsAscii | 500 | 1,412.0 ns | 24.22 ns | 21.47 ns |
143+
// | StandardUnicodeIsAscii | 500 | 6,196.5 ns | 60.78 ns | 53.88 ns |
144+
// | RuntimeIsAscii | 500 | 6,215.5 ns | 96.43 ns | 90.20 ns |
145+
146+
95147
if (s.Length > 16) // Adjusted for the unrolled loop
96148
{
97-
Vector128<ushort> total = Sse41.LoadDquVector128((ushort*)pStart);
149+
// Using zeroed vector as initialization
150+
Vector128<ushort> total = Vector128<ushort>.Zero;
98151
i += 8;
99152

100153
// Unrolling the loop by 2x
101-
for (; i + 15 < s.Length; i += 16)
154+
for (; i + 16 < s.Length; i += 16)
102155
{
103-
Vector128<ushort> raw1 = Sse41.LoadDquVector128((ushort*)pStart + i);
104-
Vector128<ushort> raw2 = Sse41.LoadDquVector128((ushort*)pStart + i + 8);
156+
Vector128<ushort> raw1 = Sse41.LoadDquVector128((ushort*)pStart);
157+
Vector128<ushort> raw2 = Sse41.LoadDquVector128((ushort*)pStart + i);
105158

106159
total = Sse2.Or(total, raw1);
107160
total = Sse2.Or(total, raw2);
@@ -116,6 +169,7 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
116169
return false;
117170
}
118171
}
172+
119173
for (; i < s.Length; i++)
120174
{
121175
if (pStart[i] >= 128) return false;

0 commit comments

Comments
 (0)