@@ -15,6 +15,9 @@ namespace SimdUnicodeBenchmarks
1515 public class Checker
1616 {
1717 List < char [ ] > names ;
18+ List < char [ ] > nonAsciichars ;
19+ public List < byte [ ] > nonAsciiByteArrays ; // Declare at the class level
20+
1821 List < bool > results ;
1922
2023 public static bool RuntimeIsAsciiApproach ( ReadOnlySpan < char > s )
@@ -50,23 +53,45 @@ public static char[] GetRandomASCIIString(uint n)
5053 return chars ;
5154 }
5255
56+ public static char [ ] GetRandomNonASCIIString ( uint n )
57+ {
58+ // Chose a few Latin Extended-A and Latin Extended-B characters alongside ASCII chars
59+ var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89šžŸũŭůűųŷŹźŻżŽ" ;
60+
61+ var chars = new char [ n ] ;
62+ var rd = new Random ( 12345 ) ; // fixed seed
63+
64+ for ( var i = 0 ; i < n ; i ++ )
65+ {
66+ chars [ i ] = allowedChars [ rd . Next ( 0 , allowedChars . Length ) ] ;
67+ }
68+
69+ return chars ;
70+ }
71+
72+
5373
54- [ Params ( 100 , 200 , 500 ) ]
74+ [ Params ( 100 , 200 , 500 , 1000 , 2000 ) ]
5575 public uint N ;
5676
77+
5778 [ GlobalSetup ]
5879 public void Setup ( )
5980 {
6081 names = new List < char [ ] > ( ) ;
82+ nonAsciiByteArrays = new List < byte [ ] > ( ) ; // Initialize the list of byte arrays
6183 results = new List < bool > ( ) ;
6284
6385 for ( int i = 0 ; i < 100 ; i ++ )
6486 {
6587 names . Add ( GetRandomASCIIString ( N ) ) ;
88+ char [ ] nonAsciiChars = GetRandomNonASCIIString ( N ) ;
89+ nonAsciiByteArrays . Add ( Encoding . UTF8 . GetBytes ( nonAsciiChars ) ) ; // Convert to byte array and store
6690 results . Add ( false ) ;
6791 }
6892 }
6993
94+
7095 [ Benchmark ]
7196 public void FastUnicodeIsAscii ( )
7297 {
@@ -99,33 +124,23 @@ public void RuntimeIsAscii()
99124 count += 1 ;
100125 }
101126 }
102-
103-
104127 [ Benchmark ]
105128 public void TestErrorGetIndexOfFirstNonAsciiByteBenchmark ( )
106129 {
107- foreach ( char [ ] chars in names )
130+ foreach ( byte [ ] nonAsciiBytes in nonAsciiByteArrays ) // Use nonAsciiByteArrays directly
108131 {
109- byte [ ] ascii = Encoding . UTF8 . GetBytes ( chars ) ;
110-
111- for ( int i = 0 ; i < ascii . Length ; i ++ )
132+ unsafe
112133 {
113- ascii [ i ] += 0b10000000 ;
114-
115- unsafe
134+ fixed ( byte * pNonAscii = nonAsciiBytes )
116135 {
117- fixed ( byte * pAscii = ascii )
118- {
119- nuint result = Ascii . GetIndexOfFirstNonAsciiByte ( pAscii , ( nuint ) ascii . Length ) ;
120- }
136+ nuint result = Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) nonAsciiBytes . Length ) ;
121137 }
122-
123- ascii [ i ] -= 0b10000000 ;
124138 }
125139 }
126140 }
127141
128142
143+
129144 }
130145
131146 public class Program
0 commit comments