@@ -92,69 +92,16 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
9292 fixed ( char * pStart = & MemoryMarshal . GetReference ( s ) )
9393 {
9494 int i = 0 ;
95-
96- /* PAR Unrolled twice:
97- | Method | N | Mean | Error | StdDev |
98- |----------------------- |---- |-----------:|---------:|---------:|
99- | FastUnicodeIsAscii | 100 | 905.7 ns | 17.95 ns | 20.67 ns |
100- | StandardUnicodeIsAscii | 100 | 2,502.4 ns | 49.67 ns | 66.31 ns |
101- | RuntimeIsAscii | 100 | 2,522.8 ns | 32.70 ns | 30.59 ns |
102- | FastUnicodeIsAscii | 200 | 649.3 ns | 10.24 ns | 9.57 ns |
103- | StandardUnicodeIsAscii | 200 | 5,299.7 ns | 64.91 ns | 57.54 ns |
104- | RuntimeIsAscii | 200 | 5,307.2 ns | 49.18 ns | 46.00 ns |
105- | FastUnicodeIsAscii | 500 | 1,382.2 ns | 9.40 ns | 8.79 ns |
106- | StandardUnicodeIsAscii | 500 | 6,127.7 ns | 57.69 ns | 48.18 ns |
107- | RuntimeIsAscii | 500 | 6,258.2 ns | 62.05 ns | 58.05 ns | */
108-
109- // if (s.Length > 16) // Adjusted for the unrolled loop
110- // {
111- // Vector128<ushort> total = Sse41.LoadDquVector128((ushort*)pStart);
112- // i += 8;
113-
114- // // Unrolling the loop by 2x
115- // for (; i + 15 < s.Length; i += 16)
116- // {
117- // Vector128<ushort> raw1 = Sse41.LoadDquVector128((ushort*)pStart + i);
118- // Vector128<ushort> raw2 = Sse41.LoadDquVector128((ushort*)pStart + i + 8);
119-
120- // total = Sse2.Or(total, raw1);
121- // total = Sse2.Or(total, raw2);
122- // }
123-
124- // Vector128<ushort> b127 = Vector128.Create((ushort)127);
125- // Vector128<ushort> b = Sse41.Max(b127, total);
126- // Vector128<ushort> b16 = Sse41.CompareEqual(b, b127);
127- // int movemask = Sse2.MoveMask(b16.AsByte());
128- // if (movemask != 0xffff)
129- // {
130- // return false;
131- // }
132- // }
133-
134- // | Method | N | Mean | Error | StdDev |
135- // |----------------------- |---- |-----------:|---------:|---------:|
136- // | FastUnicodeIsAscii | 100 | 904.0 ns | 9.22 ns | 8.17 ns |
137- // | StandardUnicodeIsAscii | 100 | 2,396.5 ns | 11.33 ns | 10.04 ns |
138- // | RuntimeIsAscii | 100 | 2,498.8 ns | 42.35 ns | 37.54 ns |
139- // | FastUnicodeIsAscii | 200 | 1,270.0 ns | 7.69 ns | 6.01 ns |
140- // | StandardUnicodeIsAscii | 200 | 5,173.0 ns | 57.82 ns | 54.08 ns |
141- // | RuntimeIsAscii | 200 | 5,197.5 ns | 15.40 ns | 13.65 ns |
142- // | FastUnicodeIsAscii | 500 | 1,412.0 ns | 24.22 ns | 21.47 ns |
143- // | StandardUnicodeIsAscii | 500 | 6,196.5 ns | 60.78 ns | 53.88 ns |
144- // | RuntimeIsAscii | 500 | 6,215.5 ns | 96.43 ns | 90.20 ns |
145-
146-
14795 if ( s . Length > 16 ) // Adjusted for the unrolled loop
14896 {
149- // Using zeroed vector as initialization
150- Vector128 < ushort > total = Vector128 < ushort > . Zero ;
97+ Vector128 < ushort > total = Sse41 . LoadDquVector128 ( ( ushort * ) pStart ) ;
15198 i += 8 ;
15299
153100 // Unrolling the loop by 2x
154- for ( ; i + 16 < s . Length ; i += 16 )
101+ for ( ; i + 15 < s . Length ; i += 16 )
155102 {
156- Vector128 < ushort > raw1 = Sse41 . LoadDquVector128 ( ( ushort * ) pStart ) ;
157- Vector128 < ushort > raw2 = Sse41 . LoadDquVector128 ( ( ushort * ) pStart + i ) ;
103+ Vector128 < ushort > raw1 = Sse41 . LoadDquVector128 ( ( ushort * ) pStart + i ) ;
104+ Vector128 < ushort > raw2 = Sse41 . LoadDquVector128 ( ( ushort * ) pStart + i + 8 ) ;
158105
159106 total = Sse2 . Or ( total , raw1 ) ;
160107 total = Sse2 . Or ( total , raw2 ) ;
@@ -169,7 +116,6 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
169116 return false ;
170117 }
171118 }
172-
173119 for ( ; i < s . Length ; i ++ )
174120 {
175121 if ( pStart [ i ] >= 128 ) return false ;
0 commit comments