@@ -715,16 +715,19 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
715715 return pInputBuffer + inputLength ;
716716 }
717717
718- public unsafe static byte * GetPointerToFirstInvalidByteArm64 ( byte * pInputBuffer , int inputLength )
718+ public unsafe static byte * GetPointerToFirstInvalidByteArm64 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
719719 {
720720 int processedLength = 0 ;
721721 int TempUtf16CodeUnitCountAdjustment = 0 ;
722722 int TempScalarCountAdjustment = 0 ;
723723
724- int utf16CodeUnitCountAdjustment = 0 , scalarCountAdjustment = 0 ;
724+ int TailScalarCodeUnitCountAdjustment = 0 ;
725+ int TailUtf16CodeUnitCountAdjustment = 0 ;
725726
726727 if ( pInputBuffer == null || inputLength <= 0 )
727728 {
729+ utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
730+ scalarCountAdjustment = TempScalarCountAdjustment ;
728731 return pInputBuffer ;
729732 }
730733 if ( inputLength > 128 )
@@ -793,18 +796,32 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
793796 Vector128 < byte > v0f = Vector128 . Create ( ( byte ) 0x0F ) ;
794797 Vector128 < byte > v80 = Vector128 . Create ( ( byte ) 0x80 ) ;
795798 // Performance note: we could process 64 bytes at a time for better speed in some cases.
799+ int start_point = processedLength ;
800+
801+ // The block goes from processedLength to processedLength/16*16.
802+ int asciibytes = 0 ; // number of ascii bytes in the block (could also be called n1)
803+ int contbytes = 0 ; // number of continuation bytes in the block
804+ int n4 = 0 ; // number of 4-byte sequences that start in this block
805+
796806 for ( ; processedLength + 16 <= inputLength ; processedLength += 16 )
797807 {
798808
799809 Vector128 < byte > currentBlock = AdvSimd . LoadVector128 ( pInputBuffer + processedLength ) ;
800810
801- if ( AdvSimd . Arm64 . MaxAcross ( currentBlock ) . ToScalar ( ) > 127 )
811+ if ( AdvSimd . Arm64 . MaxAcross ( currentBlock ) . ToScalar ( ) <= 127 )
802812 {
803813 // We have an ASCII block, no need to process it, but
804814 // we need to check if the previous block was incomplete.
805815 if ( AdvSimd . Arm64 . MaxAcross ( prevIncomplete ) . ToScalar ( ) != 0 )
806816 {
807- return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( processedLength , pInputBuffer + processedLength , inputLength - processedLength , ref utf16CodeUnitCountAdjustment , ref scalarCountAdjustment ) ;
817+ int totalbyteasciierror = processedLength - start_point ;
818+ var ( utfadjustasciierror , scalaradjustasciierror ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes , n4 , contbytes , totalbyteasciierror ) ;
819+
820+ utf16CodeUnitCountAdjustment = utfadjustasciierror ;
821+ scalarCountAdjustment = scalaradjustasciierror ;
822+
823+ int off = processedLength >= 3 ? processedLength - 3 : processedLength ;
824+ return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( off , pInputBuffer + off , inputLength - off , ref utf16CodeUnitCountAdjustment , ref scalarCountAdjustment ) ;
808825 }
809826 prevIncomplete = Vector128 < byte > . Zero ;
810827 }
@@ -829,52 +846,76 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
829846 // hardware:
830847 if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( error ) ) . ToScalar ( ) != 0 )
831848 {
832- return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( processedLength , pInputBuffer + processedLength , inputLength - processedLength , ref utf16CodeUnitCountAdjustment , ref scalarCountAdjustment ) ;
849+ int off = processedLength > 32 ? processedLength - 32 : processedLength ; // this does not backup ff processedlength = 32
850+ byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( off , pInputBuffer + processedLength , inputLength - processedLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
851+ utf16CodeUnitCountAdjustment = TailUtf16CodeUnitCountAdjustment ;
852+ scalarCountAdjustment = TailScalarCodeUnitCountAdjustment ;
853+
854+ int totalbyteasciierror = processedLength - start_point ;
855+ var ( utfadjustasciierror , scalaradjustasciierror ) = calculateErrorPathadjust ( start_point , processedLength , pInputBuffer , asciibytes , n4 , contbytes ) ;
856+
857+ utf16CodeUnitCountAdjustment += utfadjustasciierror ;
858+ scalarCountAdjustment += scalaradjustasciierror ;
859+
860+ return invalidBytePointer ;
833861 }
834862 prevIncomplete = AdvSimd . SubtractSaturate ( currentBlock , maxValue ) ;
863+ if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( prevIncomplete ) ) . ToScalar ( ) != 0 )
864+ {
865+ // We have an unterminated sequence.
866+ var ( totalbyteadjustment , i , tempascii , tempcont , tempn4 ) = adjustmentFactor ( pInputBuffer + processedLength + 32 ) ;
867+ processedLength -= i ;
868+ n4 += tempn4 ;
869+ contbytes += tempcont ;
870+ }
871+ Vector128 < sbyte > largestcont = Vector128 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
872+ contbytes += 16 - AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareGreaterThan ( Vector128 . AsSByte ( currentBlock ) , largestcont ) ) . ToScalar ( ) ;
873+ Vector128 < byte > fourthByteMinusOne = Vector128 . Create ( ( byte ) ( 0b11110000u - 1 ) ) ;
874+ n4 += ( int ) ( AdvSimd . Arm64 . AddAcross ( AdvSimd . SubtractSaturate ( currentBlock , fourthByteMinusOne ) ) . ToScalar ( ) ) ;
835875 }
876+
877+ asciibytes -= ( int ) AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareGreaterThanOrEqual ( currentBlock , v80 ) ) . ToScalar ( ) ;
878+
836879 }
880+
881+ int totalbyte = processedLength - start_point ;
882+ var ( utf16adjust , scalaradjust ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes , n4 , contbytes , totalbyte ) ;
883+
884+ TempUtf16CodeUnitCountAdjustment = utf16adjust ;
885+ TempScalarCountAdjustment = scalaradjust ;
886+
837887 }
838888 }
839889 // We have processed all the blocks using SIMD, we need to process the remaining bytes.
840-
841890 // Process the remaining bytes with the scalar function
891+
892+ // worst possible case is 4 bytes, where we need to backtrack 3 bytes
893+ // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
842894 if ( processedLength < inputLength )
843895 {
844- // We need to possibly backtrack to the start of the last code point
845- // worst possible case is 4 bytes, where we need to backtrack 3 bytes
846- // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
847- if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
848- {
849- processedLength -= 1 ;
850- if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
851- {
852- processedLength -= 1 ;
853- if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
854- {
855- processedLength -= 1 ;
856- }
857- }
858- }
859- int TailScalarCodeUnitCountAdjustment = 0 ;
860- int TailUtf16CodeUnitCountAdjustment = 0 ;
861- byte * invalidBytePointer = SimdUnicode . UTF8 . GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength , out TailUtf16CodeUnitCountAdjustment , out TailScalarCodeUnitCountAdjustment ) ;
896+
897+ byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( 32 , pInputBuffer + processedLength , inputLength - processedLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
862898 if ( invalidBytePointer != pInputBuffer + inputLength )
863899 {
900+ utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment ;
901+ scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment ;
902+
864903 // An invalid byte was found by the scalar function
865904 return invalidBytePointer ;
866905 }
867906 }
907+ utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment ;
908+ scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment ;
868909
869910 return pInputBuffer + inputLength ;
870911 }
871912 public unsafe static byte * GetPointerToFirstInvalidByte ( byte * pInputBuffer , int inputLength , out int Utf16CodeUnitCountAdjustment , out int ScalarCodeUnitCountAdjustment )
872913 {
873914
874- // if (AdvSimd.Arm64.IsSupported)
875- // {
876- // return GetPointerToFirstInvalidByteArm64(pInputBuffer, inputLength);
877- // }
915+ if ( AdvSimd . Arm64 . IsSupported )
916+ {
917+ return GetPointerToFirstInvalidByteArm64 ( pInputBuffer , inputLength , out Utf16CodeUnitCountAdjustment , out ScalarCodeUnitCountAdjustment ) ;
918+ }
878919 if ( Avx2 . IsSupported )
879920 {
880921 return GetPointerToFirstInvalidByteAvx2 ( pInputBuffer , inputLength , out Utf16CodeUnitCountAdjustment , out ScalarCodeUnitCountAdjustment ) ;
0 commit comments