Skip to content

Commit 5a9b6bf

Browse files
committed
More unrolling attempts
1 parent 1b2bab2 commit 5a9b6bf

File tree

3 files changed

+103
-2
lines changed

3 files changed

+103
-2
lines changed

.vscode/launch.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
// Use IntelliSense to find out which attributes exist for C# debugging
6+
// Use hover for the description of the existing attributes
7+
// For further information visit https://github.com/dotnet/vscode-csharp/blob/main/debugger-launchjson.md
8+
"name": ".NET Core Launch (console)",
9+
"type": "coreclr",
10+
"request": "launch",
11+
"preLaunchTask": "build",
12+
// If you have changed target frameworks, make sure to update the program path.
13+
"program": "${workspaceFolder}/benchmark/bin/Debug/net7.0/benchmark.dll",
14+
"args": [],
15+
"cwd": "${workspaceFolder}/benchmark",
16+
// For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console
17+
"console": "internalConsole",
18+
"stopAtEntry": false
19+
},
20+
{
21+
"name": ".NET Core Attach",
22+
"type": "coreclr",
23+
"request": "attach"
24+
}
25+
]
26+
}

.vscode/tasks.json

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"version": "2.0.0",
3+
"tasks": [
4+
{
5+
"label": "build",
6+
"command": "dotnet",
7+
"type": "process",
8+
"args": [
9+
"build",
10+
"${workspaceFolder}/benchmark/benchmark.csproj",
11+
"/property:GenerateFullPaths=true",
12+
"/consoleloggerparameters:NoSummary;ForceNoAlign"
13+
],
14+
"problemMatcher": "$msCompile"
15+
},
16+
{
17+
"label": "publish",
18+
"command": "dotnet",
19+
"type": "process",
20+
"args": [
21+
"publish",
22+
"${workspaceFolder}/benchmark/benchmark.csproj",
23+
"/property:GenerateFullPaths=true",
24+
"/consoleloggerparameters:NoSummary;ForceNoAlign"
25+
],
26+
"problemMatcher": "$msCompile"
27+
},
28+
{
29+
"label": "watch",
30+
"command": "dotnet",
31+
"type": "process",
32+
"args": [
33+
"watch",
34+
"run",
35+
"--project",
36+
"${workspaceFolder}/benchmark/benchmark.csproj"
37+
],
38+
"problemMatcher": "$msCompile"
39+
}
40+
]
41+
}

src/Ascii.cs

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
119119
} */
120120

121121

122-
/*
122+
/* Unrolled twice:
123123
| Method | N | Mean | Error | StdDev |
124124
|----------------------- |---- |-----------:|---------:|---------:|
125125
| FastUnicodeIsAscii | 100 | 905.7 ns | 17.95 ns | 20.67 ns |
@@ -144,9 +144,43 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
144144
Vector128<ushort> raw2 = Sse41.LoadDquVector128((ushort*)pStart + i + 8);
145145

146146
total = Sse2.Or(total, raw1);
147-
total = Sse2.Or(total, raw2);
147+
total = Sse2.Or(total, raw2);
148148
}
149149

150+
// | Method | N | Mean | Error | StdDev |
151+
// |----------------------- |---- |-----------:|----------:|----------:|
152+
// | FastUnicodeIsAscii | 100 | 1,601.3 ns | 31.62 ns | 31.05 ns |
153+
// | StandardUnicodeIsAscii | 100 | 2,502.5 ns | 49.20 ns | 65.68 ns |
154+
// | RuntimeIsAscii | 100 | 2,478.5 ns | 30.08 ns | 26.66 ns |
155+
// | FastUnicodeIsAscii | 200 | 653.0 ns | 6.26 ns | 5.86 ns |
156+
// | StandardUnicodeIsAscii | 200 | 5,282.7 ns | 102.28 ns | 105.03 ns |
157+
// | RuntimeIsAscii | 200 | 5,366.1 ns | 65.50 ns | 61.27 ns |
158+
// | FastUnicodeIsAscii | 500 | 1,305.4 ns | 11.85 ns | 11.09 ns |
159+
// | StandardUnicodeIsAscii | 500 | 6,235.6 ns | 103.06 ns | 96.40 ns |
160+
// | RuntimeIsAscii | 500 | 6,389.6 ns | 103.20 ns | 96.53 ns |
161+
162+
163+
// if (s.Length > 32) // Adjusted for the 4x unrolled loop
164+
// {
165+
// Vector128<ushort> total = Sse41.LoadDquVector128((ushort*)pStart);
166+
// i += 8;
167+
168+
// // 4x loop unrolling
169+
// for (; i + 31 < s.Length; i += 32)
170+
// {
171+
// Vector128<ushort> raw1 = Sse41.LoadDquVector128((ushort*)pStart + i);
172+
// Vector128<ushort> raw2 = Sse41.LoadDquVector128((ushort*)pStart + i + 8);
173+
// Vector128<ushort> raw3 = Sse41.LoadDquVector128((ushort*)pStart + i + 16);
174+
// Vector128<ushort> raw4 = Sse41.LoadDquVector128((ushort*)pStart + i + 24);
175+
176+
// total = Sse2.Or(total, raw1);
177+
// total = Sse2.Or(total, raw2);
178+
// total = Sse2.Or(total, raw3);
179+
// total = Sse2.Or(total, raw4);
180+
// }
181+
182+
183+
150184
Vector128<ushort> b127 = Vector128.Create((ushort)127);
151185
Vector128<ushort> b = Sse41.Max(b127, total);
152186
Vector128<ushort> b16 = Sse41.CompareEqual(b, b127);

0 commit comments

Comments
 (0)