@@ -10,6 +10,70 @@ namespace SimdUnicode
1010 public static class UTF8
1111 {
1212
13+ // helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index
14+ static void PrintHexAndBinary ( byte [ ] bytes , int highlightIndex = - 1 )
15+ {
16+ int chunkSize = 16 ; // 128 bits = 16 bytes
17+
18+ // Process each chunk for hexadecimal
19+ Console . Write ( "Hex: " ) ;
20+ for ( int i = 0 ; i < bytes . Length ; i ++ )
21+ {
22+ if ( i > 0 && i % chunkSize == 0 )
23+ Console . WriteLine ( ) ; // New line after every 16 bytes
24+
25+ if ( i == highlightIndex )
26+ {
27+ Console . ForegroundColor = ConsoleColor . Red ;
28+ Console . Write ( $ "{ bytes [ i ] : X2} ") ;
29+ Console . ResetColor ( ) ;
30+ }
31+ else if ( i % ( chunkSize * 2 ) == 0 ) // print green every 256 bytes
32+ {
33+ Console . ForegroundColor = ConsoleColor . Green ;
34+ Console . Write ( $ "{ bytes [ i ] : X2} ") ;
35+ Console . ResetColor ( ) ;
36+ }
37+ else
38+ {
39+ Console . Write ( $ "{ bytes [ i ] : X2} ") ;
40+ }
41+
42+ if ( ( i + 1 ) % chunkSize != 0 ) Console . Write ( " " ) ; // Add space between bytes but not at the end of the line
43+ }
44+ Console . WriteLine ( "\n " ) ; // New line for readability and to separate hex from binary
45+
46+ // Process each chunk for binary
47+ Console . Write ( "Binary: " ) ;
48+ for ( int i = 0 ; i < bytes . Length ; i ++ )
49+ {
50+ if ( i > 0 && i % chunkSize == 0 )
51+ Console . WriteLine ( ) ; // New line after every 16 bytes
52+
53+ string binaryString = Convert . ToString ( bytes [ i ] , 2 ) . PadLeft ( 8 , '0' ) ;
54+ if ( i == highlightIndex )
55+ {
56+ Console . ForegroundColor = ConsoleColor . Red ;
57+ Console . Write ( $ "{ binaryString } ") ;
58+ Console . ResetColor ( ) ;
59+ }
60+ else if ( i % ( chunkSize * 2 ) == 0 ) // print green every 256 bytes
61+ {
62+ Console . ForegroundColor = ConsoleColor . Green ;
63+ Console . Write ( $ "{ binaryString } ") ;
64+ Console . ResetColor ( ) ;
65+ }
66+ else
67+ {
68+ Console . Write ( $ "{ binaryString } ") ;
69+ }
70+
71+ if ( ( i + 1 ) % chunkSize != 0 ) Console . Write ( " " ) ; // Add space between bytes but not at the end of the line
72+ }
73+ Console . WriteLine ( ) ; // New line for readability
74+ }
75+
76+
1377 static Func < byte , string > byteToBinaryString = b => Convert . ToString ( b , 2 ) . PadLeft ( 8 , '0' ) ; //for debugging
1478
1579 public unsafe static byte * RewindAndValidateWithErrors ( int howFarBack , byte * buf , int len , ref int utf16CodeUnitCountAdjustment , ref int scalarCountAdjustment )
@@ -188,38 +252,40 @@ public static class UTF8
188252 public unsafe static ( int totalbyteadjustment , int backedupByHowMuch , int ascii , int contbyte , int n4 ) adjustmentFactor ( byte * pInputBuffer ) {
189253 // Find the first non-continuation byte, working backward.
190254 int i = 1 ;
255+ int contbyteadjust = 0 ;
191256 for ( ; i <= 4 ; i ++ )
192257 {
193258 if ( ( pInputBuffer [ - i ] & 0b11000000 ) != 0b10000000 )
194259 {
195260 break ;
196261 }
262+ contbyteadjust -= 1 ;
263+
197264 }
198265 if ( ( pInputBuffer [ - i ] & 0b10000000 ) == 0 ) {
199- return ( 0 , i , - 1 , 0 , 0 ) ; // We must have that i == 1
266+ return ( 0 , i , - 1 , contbyteadjust , 0 ) ; // We must have that i == 1
200267 }
201268 if ( ( pInputBuffer [ - i ] & 0b11100000 ) == 0b11000000 ) {
202- return ( 2 - i , i , 0 , 0 , 0 ) ; // We have that i == 1 or i == 2, if i == 1, we are missing one byte.
269+ return ( 2 - i , i , 0 , contbyteadjust , 0 ) ; // We have that i == 1 or i == 2, if i == 1, we are missing one byte.
203270 }
204271 if ( ( pInputBuffer [ - i ] & 0b11110000 ) == 0b11100000 ) {
205- return ( 3 - i , i , 0 , 0 , 0 ) ; // We have that i == 1 or i == 2 or i == 3, if i == 1, we are missing two bytes, if i == 2, we are missing one byte.
272+ return ( 3 - i , i , 0 , contbyteadjust , 0 ) ; // We have that i == 1 or i == 2 or i == 3, if i == 1, we are missing two bytes, if i == 2, we are missing one byte.
206273 }
207274 // We must have that (pInputBuffer[-i] & 0b11111000) == 0b11110000
208- return ( 4 - i , i , 0 , 0 , - 1 ) ; // We have that i == 1 or i == 2 or i == 3 or i == 4, if i == 1, we are missing three bytes, if i == 2, we are missing two bytes, if i == 3, we are missing one byte.
275+ return ( 4 - i , i , 0 , contbyteadjust , - 1 ) ; // We have that i == 1 or i == 2 or i == 3 or i == 4, if i == 1, we are missing three bytes, if i == 2, we are missing two bytes, if i == 3, we are missing one byte.
209276 }
210277
211278 public static ( int utfadjust , int scalaradjust ) CalculateN2N3FinalSIMDAdjustments ( int asciibytes , int n4 , int contbytes , int totalbyte )
212279 {
213- Console . WriteLine ( "---------" ) ;
214- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte ) ;
280+ Console . WriteLine ( "---------" ) ; //debug
281+ Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte ) ; //debug
215282 int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte ;
216283 int n2 = - 2 * asciibytes + n4 - 3 * contbytes + 2 * totalbyte ;
217284 int utfadjust = - 2 * n4 - 2 * n3 - n2 ;
218285 int scalaradjust = - n4 ;
219286
220- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust ) ;
287+ Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust ) ; //debug
221288
222-
223289 return ( utfadjust , scalaradjust ) ;
224290 }
225291
@@ -395,7 +461,10 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
395461
396462 public unsafe static byte * GetPointerToFirstInvalidByteAvx2 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
397463 {
398- Console . WriteLine ( "-------------------------------------" ) ;
464+ Console . ForegroundColor = ConsoleColor . Blue ; //debug
465+ Console . WriteLine ( "-------------------------------------" ) ; //debug
466+ Console . ResetColor ( ) ; //debug
467+
399468 int processedLength = 0 ;
400469 int TempUtf16CodeUnitCountAdjustment = 0 ;
401470 int TempScalarCountAdjustment = 0 ;
@@ -568,10 +637,17 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
568637 Vector256 < byte > prev1 = Avx2 . AlignRight ( prevInputBlock , shuffled , ( byte ) ( 16 - 1 ) ) ;
569638 // Vector256.Shuffle vs Avx2.Shuffle
570639 // https://github.com/dotnet/runtime/blob/1400c1e7a888ea1e710e5c08d55c800e0b04bf8a/docs/coding-guidelines/vectorization-guidelines.md#vector256shuffle-vs-avx2shuffle
571- Vector256 < byte > byte_1_high = Avx2 . Shuffle ( shuf1 , Avx2 . ShiftRightLogical ( prev1 . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
572- Vector256 < byte > byte_1_low = Avx2 . Shuffle ( shuf2 , ( prev1 & v0f ) ) ;
573- Vector256 < byte > byte_2_high = Avx2 . Shuffle ( shuf3 , Avx2 . ShiftRightLogical ( currentBlock . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
640+ Vector256 < byte > byte_1_high = Avx2 . Shuffle ( shuf1 , Avx2 . ShiftRightLogical ( prev1 . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ; // takes the XXXX 0000 part of the previous byte
641+ Vector256 < byte > byte_1_low = Avx2 . Shuffle ( shuf2 , ( prev1 & v0f ) ) ; // takes the 0000 XXXX part of the previous part
642+ Vector256 < byte > byte_2_high = Avx2 . Shuffle ( shuf3 , Avx2 . ShiftRightLogical ( currentBlock . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ; // takes the XXXX 0000 part of the current byte
574643 Vector256 < byte > sc = Avx2 . And ( Avx2 . And ( byte_1_high , byte_1_low ) , byte_2_high ) ;
644+
645+ // Create a span from the Vector256<byte>
646+ // Console.WriteLine("");
647+ // Span<byte> byteSpan = MemoryMarshal.Cast<Vector256<byte>, byte>(MemoryMarshal.CreateSpan(ref sc, 1));
648+ // byte[] scbytes = byteSpan.ToArray();
649+ // PrintHexAndBinary(scbytes);55555555555555555
650+
575651 Vector256 < byte > prev2 = Avx2 . AlignRight ( prevInputBlock , shuffled , ( byte ) ( 16 - 2 ) ) ;
576652 Vector256 < byte > prev3 = Avx2 . AlignRight ( prevInputBlock , shuffled , ( byte ) ( 16 - 3 ) ) ;
577653 Vector256 < byte > isThirdByte = Avx2 . SubtractSaturate ( prev2 , thirdByte ) ;
@@ -581,7 +657,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
581657 Vector256 < byte > error = Avx2 . Xor ( must23As80 , sc ) ;
582658 if ( ! Avx2 . TestZ ( error , error ) )
583659 {
584- Console . WriteLine ( "--Error!" ) ;
660+ Console . WriteLine ( $ "--Error! @ { processedLength } bytes ") ; //debug
585661 int totalbyteasciierror = processedLength - start_point ;
586662 var ( utfadjustasciierror , scalaradjustasciierror ) = calculateErrorPathadjust ( start_point , processedLength , pInputBuffer , asciibytes , n4 , contbytes ) ;
587663
@@ -608,25 +684,53 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
608684 var ( totalbyteadjustment , i , tempascii , tempcont , tempn4 ) = adjustmentFactor ( pInputBuffer + processedLength + 32 ) ;
609685
610686 processedLength -= i ;
611- n4 += tempn4 ;
687+ n4 += tempn4 ; // this is + because the adjustment function returns something negative already
612688 contbytes += tempcont ;
613- Console . WriteLine ( $ "Unterminated! Backing up by { i } ") ;
614-
689+ Console . WriteLine ( $ "Unterminated! @ { processedLength } Backing up by { i } ") ; //debug
615690 }
616691
692+
693+
694+
695+
696+ // Vector256<byte> contbyto = Vector256.Create((byte)(0b11000000u - 0x80));
697+ // Vector256<byte> isStartOf4ByteSequence = Avx2.SubtractSaturate(currentBlock, fourthByte);
698+ // Vector256<byte> isStartOf3OrMoreByteSequence = Avx2.SubtractSaturate(currentBlock, thirdByte);
699+ // Vector256<byte> isStartOf2OrMoreByteSequence = Avx2.SubtractSaturate(currentBlock, secondByte);
700+
701+ // uint twoBytePlusCount = Popcnt.PopCount((uint)Avx2.MoveMask(isStartOf2OrMoreByteSequence));
702+ // uint threeBytePlusCount = Popcnt.PopCount((uint)Avx2.MoveMask(isStartOf3OrMoreByteSequence));
703+ // uint fourByteCount = Popcnt.PopCount((uint)Avx2.MoveMask(isStartOf4ByteSequence));
704+
705+
617706 // No errors! Updating the variables we keep track of
618707 // We use one instruction (MoveMask) to update ncon, plus one arithmetic operation.
619- contbytes += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( sc ) ) ;
708+
709+ // contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(sc)); // this actually counts the number of 2 consecutive continuous bytes
710+ // Placeholder until andether way to do with contbyte is found
711+
712+ Vector256 < byte > top2bits = Vector256 . Create ( ( byte ) 0b11000000 ) ; // Mask to isolate the two most significant bits
713+ Vector256 < byte > contbytemask = Vector256 . Create ( ( byte ) 0b10000000 ) ; // The expected pattern for continuation bytes: 10xxxxxx
620714
715+ // Apply the mask and compare
716+ Vector256 < byte > maskedData = Avx2 . And ( currentBlock , top2bits ) ;
717+ Vector256 < byte > compareResult = Avx2 . CompareEqual ( maskedData , contbytemask ) ;
718+ // Move mask to get integer representation
719+ contbytes += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( compareResult ) ) ;
720+
721+
722+
621723 // We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
622724 n4 += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( Avx2 . SubtractSaturate ( currentBlock , fourthByte ) ) ) ;
623725 }
726+
727+ // important: we just update asciibytes if there was no error.
728+ // We count the number of ascii bytes in the block using just some simple arithmetic
729+ // and no expensive operation:
624730 asciibytes += ( int ) ( 32 - Popcnt . PopCount ( ( uint ) mask ) ) ;
625731 }
626732
627- // important: we just update asciibytes if there was no error.
628- // We count the number of ascii bytes in the block using just some simple arithmetic
629- // and no expensive operation:
733+
630734
631735
632736 // There are 2 possible scenarios here : either
0 commit comments