@@ -119,7 +119,7 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
119119 } */
120120
121121
122- /*
122+ /* Unrolled twice:
123123| Method | N | Mean | Error | StdDev |
124124|----------------------- |---- |-----------:|---------:|---------:|
125125| FastUnicodeIsAscii | 100 | 905.7 ns | 17.95 ns | 20.67 ns |
@@ -144,9 +144,43 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
144144 Vector128 < ushort > raw2 = Sse41 . LoadDquVector128 ( ( ushort * ) pStart + i + 8 ) ;
145145
146146 total = Sse2 . Or ( total , raw1 ) ;
147- total = Sse2 . Or ( total , raw2 ) ;
147+ total = Sse2 . Or ( total , raw2 ) ;
148148 }
149149
150+ // | Method | N | Mean | Error | StdDev |
151+ // |----------------------- |---- |-----------:|----------:|----------:|
152+ // | FastUnicodeIsAscii | 100 | 1,601.3 ns | 31.62 ns | 31.05 ns |
153+ // | StandardUnicodeIsAscii | 100 | 2,502.5 ns | 49.20 ns | 65.68 ns |
154+ // | RuntimeIsAscii | 100 | 2,478.5 ns | 30.08 ns | 26.66 ns |
155+ // | FastUnicodeIsAscii | 200 | 653.0 ns | 6.26 ns | 5.86 ns |
156+ // | StandardUnicodeIsAscii | 200 | 5,282.7 ns | 102.28 ns | 105.03 ns |
157+ // | RuntimeIsAscii | 200 | 5,366.1 ns | 65.50 ns | 61.27 ns |
158+ // | FastUnicodeIsAscii | 500 | 1,305.4 ns | 11.85 ns | 11.09 ns |
159+ // | StandardUnicodeIsAscii | 500 | 6,235.6 ns | 103.06 ns | 96.40 ns |
160+ // | RuntimeIsAscii | 500 | 6,389.6 ns | 103.20 ns | 96.53 ns |
161+
162+
163+ // if (s.Length > 32) // Adjusted for the 4x unrolled loop
164+ // {
165+ // Vector128<ushort> total = Sse41.LoadDquVector128((ushort*)pStart);
166+ // i += 8;
167+
168+ // // 4x loop unrolling
169+ // for (; i + 31 < s.Length; i += 32)
170+ // {
171+ // Vector128<ushort> raw1 = Sse41.LoadDquVector128((ushort*)pStart + i);
172+ // Vector128<ushort> raw2 = Sse41.LoadDquVector128((ushort*)pStart + i + 8);
173+ // Vector128<ushort> raw3 = Sse41.LoadDquVector128((ushort*)pStart + i + 16);
174+ // Vector128<ushort> raw4 = Sse41.LoadDquVector128((ushort*)pStart + i + 24);
175+
176+ // total = Sse2.Or(total, raw1);
177+ // total = Sse2.Or(total, raw2);
178+ // total = Sse2.Or(total, raw3);
179+ // total = Sse2.Or(total, raw4);
180+ // }
181+
182+
183+
150184 Vector128 < ushort > b127 = Vector128 . Create ( ( ushort ) 127 ) ;
151185 Vector128 < ushort > b = Sse41 . Max ( b127 , total ) ;
152186 Vector128 < ushort > b16 = Sse41 . CompareEqual ( b , b127 ) ;
0 commit comments