@@ -15,13 +15,20 @@ namespace SimdUnicodeBenchmarks
1515 public class Checker
1616 {
1717 List < char [ ] > names ;
18- List < bool > results ;
19- public static bool RuntimeIsAsciiApproach ( ReadOnlySpan < char > s )
20- {
21- // The runtime as of NET 8.0 has a dedicated method for this, but
22- // it is not available prior to that, so let us branch.
18+ List < byte [ ] > AsciiBytes ;
19+ List < char [ ] > nonAsciichars ;
20+ public List < byte [ ] > nonAsciiBytes ; // Declare at the class level
21+
22+ List < bool > results ;
23+
24+ public static bool RuntimeIsAsciiApproach ( ReadOnlySpan < char > s )
25+ {
26+
27+ // The runtime as of NET 8.0 has a dedicated method for this, but
28+ // it is not available prior to that, so let us branch.
2329#if NET8_0_OR_GREATER
24- return Ascii . IsValid ( s ) ;
30+ return System . Text . Ascii . IsValid ( s ) ;
31+
2532#else
2633 foreach ( char c in s )
2734 {
@@ -34,6 +41,8 @@ public static bool RuntimeIsAsciiApproach(ReadOnlySpan<char> s)
3441 return true ;
3542#endif
3643 }
44+
45+
3746 public static char [ ] GetRandomASCIIString ( uint n )
3847 {
3948 var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89" ;
@@ -49,23 +58,49 @@ public static char[] GetRandomASCIIString(uint n)
4958 return chars ;
5059 }
5160
61+ public static char [ ] GetRandomNonASCIIString ( uint n )
62+ {
63+ // Chose a few Latin Extended-A and Latin Extended-B characters alongside ASCII chars
64+ var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89šžŸũŭůűųŷŹźŻżŽ" ;
65+
66+ var chars = new char [ n ] ;
67+ var rd = new Random ( 12345 ) ; // fixed seed
5268
53- [ Params ( 100 , 200 , 500 ) ]
69+ for ( var i = 0 ; i < n ; i ++ )
70+ {
71+ chars [ i ] = allowedChars [ rd . Next ( 0 , allowedChars . Length ) ] ;
72+ }
73+
74+ return chars ;
75+ }
76+
77+
78+
79+ [ Params ( 100 , 200 , 500 , 1000 , 2000 ) ]
5480 public uint N ;
5581
82+
5683 [ GlobalSetup ]
5784 public void Setup ( )
5885 {
5986 names = new List < char [ ] > ( ) ;
87+ nonAsciiBytes = new List < byte [ ] > ( ) ; // Initialize the list of byte arrays
6088 results = new List < bool > ( ) ;
6189
6290 for ( int i = 0 ; i < 100 ; i ++ )
6391 {
6492 names . Add ( GetRandomASCIIString ( N ) ) ;
93+ char [ ] nonAsciiChars = GetRandomNonASCIIString ( N ) ;
94+ nonAsciiBytes . Add ( Encoding . UTF8 . GetBytes ( nonAsciiChars ) ) ; // Convert to byte array and store
6595 results . Add ( false ) ;
6696 }
97+
98+ AsciiBytes = names
99+ . Select ( name => System . Text . Encoding . ASCII . GetBytes ( name ) )
100+ . ToList ( ) ;
67101 }
68102
103+
69104 [ Benchmark ]
70105 public void FastUnicodeIsAscii ( )
71106 {
@@ -98,7 +133,65 @@ public void RuntimeIsAscii()
98133 count += 1 ;
99134 }
100135 }
136+ [ Benchmark ]
137+ public void Error_GetIndexOfFirstNonAsciiByte ( )
138+ {
139+ foreach ( byte [ ] nonAsciiByte in nonAsciiBytes ) // Use nonAsciiBytes directly
140+ {
141+ unsafe
142+ {
143+ fixed ( byte * pNonAscii = nonAsciiByte )
144+ {
145+ nuint result = SimdUnicode . Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) nonAsciiByte . Length ) ;
146+ }
147+ }
148+ }
149+ }
150+
151+ [ Benchmark ]
152+ public void Error_Runtime_GetIndexOfFirstNonAsciiByte ( )
153+ {
154+ foreach ( byte [ ] nonAsciiByte in nonAsciiBytes ) // Use nonAsciiBytes directly
155+ {
156+ unsafe
157+ {
158+ fixed ( byte * pNonAscii = nonAsciiByte )
159+ {
160+ nuint result = Competition . Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) nonAsciiByte . Length ) ;
161+ }
162+ }
163+ }
164+ }
101165
166+ [ Benchmark ]
167+ public void allAscii_GetIndexOfFirstNonAsciiByte ( )
168+ {
169+ foreach ( byte [ ] Abyte in AsciiBytes ) // Use nonAsciiBytes directly
170+ {
171+ unsafe
172+ {
173+ fixed ( byte * pNonAscii = Abyte )
174+ {
175+ nuint result = SimdUnicode . Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) Abyte . Length ) ;
176+ }
177+ }
178+ }
179+ }
180+
181+ [ Benchmark ]
182+ public void allAscii_Runtime_GetIndexOfFirstNonAsciiByte ( )
183+ {
184+ foreach ( byte [ ] Abyte in AsciiBytes ) // Use nonAsciiBytes directly
185+ {
186+ unsafe
187+ {
188+ fixed ( byte * pNonAscii = Abyte )
189+ {
190+ nuint result = Competition . Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) Abyte . Length ) ;
191+ }
192+ }
193+ }
194+ }
102195 }
103196
104197 public class Program
0 commit comments