@@ -14,8 +14,6 @@ public static class UTF8
1414
1515 public unsafe static byte * RewindAndValidateWithErrors ( int howFarBack , byte * buf , int len , ref int utf16CodeUnitCountAdjustment , ref int scalarCountAdjustment )
1616 {
17- Console . WriteLine ( "-Rewind Validate with Errors" ) ;
18- Console . WriteLine ( "current Byte:" + Convert . ToString ( buf [ 0 ] , 2 ) . PadLeft ( 8 , '0' ) ) ;
1917
2018 int TempUtf16CodeUnitCountAdjustment = 0 ;
2119 int TempScalarCountAdjustment = 0 ;
@@ -31,11 +29,6 @@ public static class UTF8
3129 if ( foundLeadingBytes )
3230 {
3331 buf -= i ;
34- // extraLen = i; // a measure of how far we've backed up, only useful for debugging
35- // Console.WriteLine(howFarBack);
36- Console . WriteLine ( "Found leading byte at:" + i + ",Byte:" + Convert . ToString ( candidateByte , 2 ) . PadLeft ( 8 , '0' ) ) ;
37-
38- // Console.WriteLine("Backed up " + extraLen + 1 + " bytes");
3932 break ;
4033 }
4134 }
@@ -218,32 +211,16 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
218211
219212 public static ( int utfadjust , int scalaradjust ) CalculateN2N3FinalSIMDAdjustments ( int asciibytes , int n4 , int contbytes , int totalbyte )
220213 {
221-
222- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte ) ;
223- // Calculate n3 based on the provided formula
214+ // Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);
224215 int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte ;
225-
226- // Calculate n2 based on the provided formula
227216 int n2 = - 2 * asciibytes + n4 - 3 * contbytes + 2 * totalbyte ;
228-
229- // Calculate utfadjust by adding them all up
230217 int utfadjust = - 2 * n4 - 2 * n3 - n2 ;
231-
232- // Calculate scalaradjust based on n4
233218 int scalaradjust = - n4 ;
234219
235-
236-
237-
238- // Return the calculated utfadjust and scalaradjust
239220 return ( utfadjust , scalaradjust ) ;
240221 }
241222
242-
243-
244-
245-
246- public unsafe static ( int utfadjust , int scalaradjust ) calculateErrorPathadjust ( int start_point , int processedLength , byte * pInputBuffer , int asciibytes , int n4 , int n2 , int contbytes )
223+ public unsafe static ( int utfadjust , int scalaradjust ) calculateErrorPathadjust ( int start_point , int processedLength , byte * pInputBuffer , int asciibytes , int n4 , int contbytes )
247224 {
248225 // Calculate the total bytes from start_point to processedLength
249226 int totalbyte = processedLength - start_point ;
@@ -257,7 +234,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
257234
258235 var ( utfadjust , scalaradjust ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes + adjustascii , n4 + adjustn4 , contbytes + adjustcont , totalbyte + adjusttotalbyte ) ;
259236
260- // Return the calculated n2 and n3
261237 return ( utfadjust , scalaradjust ) ;
262238 }
263239
@@ -339,7 +315,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
339315 Vector128 < byte > fourthByte = Vector128 . Create ( ( byte ) ( 0b11110000u - 0x80 ) ) ;
340316 Vector128 < byte > v0f = Vector128 . Create ( ( byte ) 0x0F ) ;
341317 Vector128 < byte > v80 = Vector128 . Create ( ( byte ) 0x80 ) ;
342-
343318 for ( ; processedLength + 16 <= inputLength ; processedLength += 16 )
344319 {
345320
@@ -417,8 +392,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
417392
418393 public unsafe static byte * GetPointerToFirstInvalidByteAvx2 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
419394 {
420- Console . WriteLine ( "--------------------------Calling function----------------------------------" ) ;
421- // Console.WriteLine("Length: " + inputLength);
422395 int processedLength = 0 ;
423396 int TempUtf16CodeUnitCountAdjustment = 0 ;
424397 int TempScalarCountAdjustment = 0 ;
@@ -570,11 +543,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
570543 // The block goes from processedLength to processedLength/16*16.
571544 int asciibytes = 0 ; // number of ascii bytes in the block (could also be called n1)
572545 int contbytes = 0 ; // number of continuation bytes in the block
573- int n4 = 0 ; // number of 4-byte sequences that start in this block
574- // int totalbyte = 0, n3 = 0, n2 = 0;
575-
576-
577-
546+ int n4 = 0 ; // number of 4-byte sequences that start in this block
578547
579548 for ( ; processedLength + 32 <= inputLength ; processedLength += 32 )
580549 {
@@ -586,12 +555,10 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
586555 {
587556 // We have an ASCII block, no need to process it, but
588557 // we need to check if the previous block was incomplete.
558+ //
589559 if ( ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) )
590560 {
591- // TODO? : this path is not explicitly tested
592- Console . WriteLine ( "---------All ascii need rewind" ) ;
593-
594-
561+ // TODO? : this path is not explicitly tested, write tests
595562 int totalbyteasciierror = processedLength - start_point ;
596563 var ( utfadjustasciierror , scalaradjustasciierror ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes , n4 , contbytes , totalbyteasciierror ) ;
597564
@@ -605,7 +572,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
605572 }
606573 else // Contains non-ASCII characters, we need to do non-trivial processing
607574 {
608- Console . WriteLine ( "--Found non-ascii:triggering SIMD routine at " + processedLength + "bytes" ) ; //debug
609575 // Use SubtractSaturate to effectively compare if bytes in block are greater than markers.
610576 Vector256 < byte > shuffled = Avx2 . Permute2x128 ( prevInputBlock , currentBlock , 0x21 ) ;
611577 prevInputBlock = currentBlock ;
@@ -625,13 +591,8 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
625591 Vector256 < byte > error = Avx2 . Xor ( must23As80 , sc ) ;
626592 if ( ! Avx2 . TestZ ( error , error ) )
627593 {
628- Console . WriteLine ( "-----Error path!!" ) ;
629-
630594 int totalbyteasciierror = processedLength - start_point ;
631- var ( utfadjustasciierror , scalaradjustasciierror ) = calculateErrorPathadjust ( start_point , processedLength , pInputBuffer , asciibytes , n4 , contbytes , contbytes ) ;
632-
633- Console . WriteLine ( "calculateErrorPathadjust utf16 adjustment:" + utfadjustasciierror ) ;
634- Console . WriteLine ( "calculateErrorPathadjust scalar adjustment:" + scalaradjustasciierror ) ;
595+ var ( utfadjustasciierror , scalaradjustasciierror ) = calculateErrorPathadjust ( start_point , processedLength , pInputBuffer , asciibytes , n4 , contbytes ) ;
635596
636597 utf16CodeUnitCountAdjustment = utfadjustasciierror ;
637598 scalarCountAdjustment = scalaradjustasciierror ;
@@ -645,52 +606,30 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
645606 utf16CodeUnitCountAdjustment += TailUtf16CodeUnitCountAdjustment ;
646607 scalarCountAdjustment += TailScalarCodeUnitCountAdjustment ;
647608
648- // Console.WriteLine("--------"); //debug
649- Console . WriteLine ( "TempUTF16 after error rewind:" + utf16CodeUnitCountAdjustment ) ;
650- Console . WriteLine ( "TempScalar '' '' '':" + scalarCountAdjustment ) ;
651-
652609 return invalidBytePointer ;
653610 }
654611
655- // Console.WriteLine("Doublecount(Temp) after SIMD processing:" + TempUtf16CodeUnitCountAdjustment); debug
656- // Console.WriteLine("Scalarcount after SIMD processing:" + TempScalarCountAdjustment);
657612 prevIncomplete = Avx2 . SubtractSaturate ( currentBlock , maxValue ) ;
658613
659614 if ( ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) )
660615 {
661616 // We have an unterminated sequence.
662- Console . WriteLine ( "---Unterminated seq--- at " + processedLength + "bytes" ) ;
663-
664-
665617 var ( totalbyteadjustment , i , tempascii , tempcont , tempn4 ) = adjustmentFactor ( pInputBuffer + processedLength + 32 ) ;
666618
667- Console . WriteLine ( "this is n4 adjusted by the adjustmentfactor function :" + tempn4 + " contbyte: " + contbytes ) ;
668- 6
669619 processedLength -= i ;
670620 n4 += tempn4 ;
671621 contbytes += tempcont ;
672622
673- lastSIMDisIncomplete = true ;
674-
675- // // Console.WriteLine("TempUTF16:"+ TempUtf16CodeUnitCountAdjustment);
676- // // Console.WriteLine("TempScalar:"+ TempScalarCountAdjustment);
677-
678623 }
679624
680625 // No errors! Updating the variables we keep track of
681626 // We use one instruction (MoveMask) to update ncon, plus one arithmetic operation.
682627 contbytes += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( sc ) ) ;
683628
684-
685-
686629 // We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
687630 n4 += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( Avx2 . SubtractSaturate ( currentBlock , fourthByte ) ) ) ;
688- Console . WriteLine ( "No error has been detected! Adding contbytes: " + ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( sc ) ) + "Adding n4: " + ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( Avx2 . SubtractSaturate ( currentBlock , fourthByte ) ) ) ) ;
689- Console . WriteLine ( " this is the accumulated contbytes" + contbytes + " and n4:" + n4 ) ; // debug
690631 }
691- asciibytes += ( int ) ( 32 - Popcnt . PopCount ( ( uint ) mask ) ) ; // TODO(Nick Nuon): simplify this expression
692-
693-
632+ asciibytes += ( int ) ( 32 - Popcnt . PopCount ( ( uint ) mask ) ) ;
694633 }
695634
696635 // important: we just update asciibytes if there was no error.
@@ -712,7 +651,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
712651
713652
714653 }
715- // Console.WriteLine("-Done with SIMD part!"); //debug
716654 // We have processed all the blocks using SIMD, we need to process the remaining bytes.
717655 // Process the remaining bytes with the scalar function
718656
@@ -721,11 +659,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
721659 // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
722660 if ( processedLength < inputLength )
723661 {
724- Console . WriteLine ( "----Process remaining Scalar @ " + processedLength + "bytes" ) ;
725- // int overlapCount = 0;
726- // Console.WriteLine("processed length after backtrack:" + processedLength);
727- // Console.WriteLine("TempUTF16 before tail remaining check:"+ TempUtf16CodeUnitCountAdjustment);
728- // Console.WriteLine("TempScalar '' '' '':"+ TempScalarCountAdjustment);
729662 byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( 32 , pInputBuffer + processedLength , inputLength - processedLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
730663 if ( invalidBytePointer != pInputBuffer + inputLength )
731664 {
@@ -734,8 +667,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
734667 // An invalid byte was found by the scalar function
735668 return invalidBytePointer ;
736669 }
737- // Console.WriteLine("TempUTF16 after tail remaining check:"+ TempUtf16CodeUnitCountAdjustment);
738- // Console.WriteLine("TempScalar '' '' '':"+ TempScalarCountAdjustment);
739670 }
740671 utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment ;
741672 scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment ;
@@ -746,7 +677,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
746677 public unsafe static byte * GetPointerToFirstInvalidByteArm64 ( byte * pInputBuffer , int inputLength )
747678 {
748679 int processedLength = 0 ;
749-
750680 int TempUtf16CodeUnitCountAdjustment = 0 ;
751681 int TempScalarCountAdjustment = 0 ;
752682
0 commit comments