@@ -10,7 +10,7 @@ namespace SimdUnicode
1010 public static class UTF8
1111 {
1212
13- //debug helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index
13+ // //debug helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index
1414static void PrintHexAndBinary ( byte [ ] bytes , int highlightIndex = - 1 )
1515{
1616 int chunkSize = 16 ; // 128 bits = 16 bytes
@@ -78,20 +78,20 @@ static void PrintHexAndBinary(byte[] bytes, int highlightIndex = -1)
7878
7979 public unsafe static byte * RewindAndValidateWithErrors ( int howFarBack , byte * buf , int len , ref int utf16CodeUnitCountAdjustment , ref int scalarCountAdjustment )
8080 {
81- // Console.WriteLine("CALLING REWIND");
81+ // // Console.WriteLine("CALLING REWIND");//debug
8282 int extraLen = 0 ;
8383 bool foundLeadingBytes = false ;
8484
8585 for ( int i = 0 ; i <= howFarBack ; i ++ )
8686 {
8787 byte candidateByte = buf [ 0 - i ] ;
8888 foundLeadingBytes = ( candidateByte & 0b11000000 ) != 0b10000000 ;
89- Console . WriteLine ( $ "Rewinding byte to offset { - i } : { candidateByte : X2} ") ;
90- Console . WriteLine ( foundLeadingBytes ) ;
89+ // Console.WriteLine($"Rewinding byte to offset {-i}: {candidateByte:X2}");//debug
90+ // Console.WriteLine(foundLeadingBytes);//debug
9191
9292 if ( foundLeadingBytes )
9393 {
94- Console . WriteLine ( "Found leading byte" ) ;
94+ // Console.WriteLine("Found leading byte");//debug
9595 buf -= i ;
9696 break ;
9797 }
@@ -257,8 +257,8 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
257257 {
258258 if ( ( pInputBuffer [ - i ] & 0b11000000 ) != 0b10000000 )
259259 {
260- string binaryString = Convert . ToString ( pInputBuffer [ - i ] , 2 ) . PadLeft ( 8 , '0' ) ; //debug
261- Console . WriteLine ( $ "Stopping at byte { binaryString } ") ; //debug
260+ // string binaryString = Convert.ToString(pInputBuffer[-i], 2).PadLeft(8, '0');//debug
261+ // Console.WriteLine($"Stopping at byte {binaryString}"); //debug
262262 break ;
263263 }
264264 contbyteadjust -= 1 ;
@@ -278,14 +278,14 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
278278
279279 public static ( int utfadjust , int scalaradjust ) CalculateN2N3FinalSIMDAdjustments ( int asciibytes , int n4 , int contbytes , int totalbyte )
280280 {
281- Console . WriteLine ( "---------" ) ; //debug
282- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte ) ; //debug
281+ // Console.WriteLine("---------"); //debug
282+ // Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);//debug
283283 int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte ;
284284 int n2 = - 2 * asciibytes + n4 - 3 * contbytes + 2 * totalbyte ;
285285 int utfadjust = - 2 * n4 - 2 * n3 - n2 ;
286286 int scalaradjust = - n4 ;
287287
288- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust ) ; //debug
288+ // Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust);//debug
289289
290290 return ( utfadjust , scalaradjust ) ;
291291 }
@@ -494,9 +494,9 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
494494
495495 public unsafe static byte * GetPointerToFirstInvalidByteAvx2 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
496496 {
497- Console . ForegroundColor = ConsoleColor . Blue ; //debug
498- Console . WriteLine ( "-------------------------------------" ) ; //debug
499- Console . ResetColor ( ) ; //debug
497+ // Console.ForegroundColor = ConsoleColor.Blue; //debug
498+ // Console.WriteLine("-------------------------------------");//debug
499+ // Console.ResetColor();//debug
500500
501501 int processedLength = 0 ;
502502 int TempUtf16CodeUnitCountAdjustment = 0 ;
@@ -690,7 +690,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
690690 Vector256 < byte > error = Avx2 . Xor ( must23As80 , sc ) ;
691691 // if (!Avx2.TestZ(error, error))
692692 // {
693- // Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
693+ // Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
694694
695695 // int off = processedLength >= 32 ? processedLength - 32 : processedLength;
696696 // byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment,ref TailScalarCodeUnitCountAdjustment);
@@ -733,16 +733,19 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
733733
734734 if ( ! Avx2 . TestZ ( error , error ) )
735735 {
736- Console . WriteLine ( $ "--Error! @ { processedLength } bytes") ; //debug
736+ // Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
737737
738- int off = processedLength >= 32 ? processedLength - 32 : processedLength ;
738+ int off = processedLength > 32 ? processedLength - 32 : processedLength ; // this does not backup ff processedlength = 32
739+ // int off = processedLength >= 32 ? processedLength - 32 : processedLength; original/main algorithm working
740+
741+ // Console.WriteLine($"Offset backup by: {off}");//debug
739742 byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( off , pInputBuffer + processedLength , inputLength - processedLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
740743 bool TooLongErroronEdge = false ;
741744
742745 utf16CodeUnitCountAdjustment = TailUtf16CodeUnitCountAdjustment ;
743746 scalarCountAdjustment = TailScalarCodeUnitCountAdjustment ;
744747
745- Console . WriteLine ( $ "RewindScalarValidation's function utf16adjust:{ TailUtf16CodeUnitCountAdjustment } , scalaradjust:{ TailScalarCodeUnitCountAdjustment } ") ;
748+ // Console.WriteLine($"RewindScalarValidation's function utf16adjust:{TailUtf16CodeUnitCountAdjustment}, scalaradjust:{TailScalarCodeUnitCountAdjustment}");//debug
746749
747750 // We need to take care of eg
748751 // 11011110 10101101 11110000 10101101 10101111 10011111 11010111 10101000 11001101 10111001 11010100 10000111 11101111 10010000 10000000 11110011
@@ -772,7 +775,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
772775 // if (isContinuationByte && isAtBoundary && isOneByteAfterProcessedLength)// this alone creates false positives
773776 if ( isContinuationByte && isOneByteAfterProcessedLength )
774777 {
775- Console . WriteLine ( "Triggering TooLongErrorOnEdge adjustment" ) ;
778+ // Console.WriteLine("Triggering TooLongErrorOnEdge adjustment");//debug
776779 TooLongErroronEdge = true ;
777780 }
778781
@@ -796,7 +799,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
796799 processedLength -= i ;
797800 n4 += tempn4 ; // this is + because the adjustment function returns something negative already
798801 contbytes += tempcont ;
799- Console . WriteLine ( $ "Unterminated! @ { processedLength } Backing up by { i } ") ; //debug
802+ // Console.WriteLine($"Unterminated! @ {processedLength} Backing up by {i}"); //debug
800803 }
801804
802805
0 commit comments