@@ -881,7 +881,12 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
881881 }
882882 return GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
883883 }
884-
884+ public static void ToString ( Vector128 < byte > v )
885+ {
886+ Span < byte > b = stackalloc byte [ 16 ] ;
887+ v . CopyTo ( b ) ;
888+ Console . WriteLine ( Convert . ToHexString ( b ) ) ;
889+ }
885890 public unsafe static byte * GetPointerToFirstInvalidByteArm64 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
886891 {
887892 int processedLength = 0 ;
@@ -974,6 +979,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
974979 // we need to check if the previous block was incomplete.
975980 if ( AdvSimd . Arm64 . MaxAcross ( prevIncomplete ) . ToScalar ( ) != 0 )
976981 {
982+ // Console.WriteLine("ASCII block, but previous block was incomplete");
977983 int off = processedLength >= 3 ? processedLength - 3 : processedLength ;
978984 byte * invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 16 - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
979985 // So the code is correct up to invalidBytePointer
@@ -1013,6 +1019,19 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10131019 // hardware:
10141020 if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( error ) ) . ToScalar ( ) != 0 )
10151021 {
1022+ // Console.WriteLine("Error block detected");
1023+ int slown4 = 0 ;
1024+ int slowcontbytes = 0 ;
1025+ int slowasciibytes = 0 ;
1026+ addCounters ( pInputBuffer , pInputBuffer + processedLength , ref slowasciibytes , ref slown4 , ref slowcontbytes ) ;
1027+ if ( slowasciibytes != asciibytes || slown4 != n4 || slowcontbytes != contbytes )
1028+ {
1029+ Console . WriteLine ( "Error in counting" ) ;
1030+ Console . WriteLine ( $ "asciibytes: { asciibytes } { slowasciibytes } ") ;
1031+ Console . WriteLine ( $ "n4: { n4 } { slown4 } ") ;
1032+ Console . WriteLine ( $ "contbytes: { contbytes } { slowcontbytes } ") ;
1033+ }
1034+
10161035 byte * invalidBytePointer ;
10171036 if ( processedLength == 0 )
10181037 {
@@ -1038,13 +1057,38 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10381057 Vector128 < sbyte > largestcont = Vector128 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
10391058 contbytes += - AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareLessThanOrEqual ( Vector128 . AsSByte ( currentBlock ) , largestcont ) ) . ToScalar ( ) ;
10401059 Vector128 < byte > fourthByteMinusOne = Vector128 . Create ( ( byte ) ( 0b11110000u - 1 ) ) ;
1041- n4 += ( int ) ( AdvSimd . Arm64 . AddAcross ( AdvSimd . SubtractSaturate ( currentBlock , fourthByteMinusOne ) ) . ToScalar ( ) ) ;
1060+
1061+ int reallyslown4 = 0 ;
1062+ for ( int i = 0 ; i < 16 ; i ++ )
1063+ {
1064+ if ( pInputBuffer [ processedLength + i ] >= 0xF0 )
1065+ {
1066+ reallyslown4 ++ ;
1067+ }
1068+ }
1069+ var largerthan0f = AdvSimd . CompareGreaterThan ( currentBlock , fourthByteMinusOne ) ;
1070+ var largerthan0fones = AdvSimd . And ( largerthan0f , Vector128 . Create ( ( byte ) 1 ) ) ;
1071+ var largerthan0fonescount = AdvSimd . Arm64 . AddAcross ( largerthan0fones ) . ToScalar ( ) ;
1072+ if ( largerthan0fonescount != reallyslown4 )
1073+ {
1074+ Console . WriteLine ( "***********Error in counting 4-byte sequences" ) ;
1075+ ToString ( currentBlock ) ;
1076+ ToString ( fourthByteMinusOne ) ;
1077+ ToString ( AdvSimd . SubtractSaturate ( currentBlock , fourthByteMinusOne ) ) ;
1078+ ToString ( AdvSimd . CompareGreaterThan ( currentBlock , fourthByteMinusOne ) ) ;
1079+ Console . WriteLine ( ( ( AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareGreaterThan ( currentBlock , fourthByteMinusOne ) ) . ToScalar ( ) ^ 0xff ) + 1 ) & 0xff ) ;
1080+
1081+ Console . WriteLine ( reallyslown4 ) ;
1082+ }
1083+ n4 += largerthan0fonescount ;
1084+
1085+ //n4 += (int)(((AdvSimd.Arm64.AddAcross(AdvSimd.CompareGreaterThan(currentBlock, fourthByteMinusOne)).ToScalar()^0xff)+1)&0xff);
10421086 }
10431087 asciibytes -= ( sbyte ) AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareLessThan ( currentBlock , v80 ) ) . ToScalar ( ) ;
10441088 }
10451089
10461090 // We may still have an error.
1047- if ( processedLength < inputLength || ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) )
1091+ if ( processedLength < inputLength || AdvSimd . Arm64 . MaxAcross ( prevIncomplete ) . ToScalar ( ) != 0 )
10481092 {
10491093 byte * invalidBytePointer ;
10501094 if ( processedLength == 0 )
@@ -1054,10 +1098,11 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10541098 else
10551099 {
10561100 invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
1057-
10581101 }
10591102 if ( invalidBytePointer != pInputBuffer + inputLength )
10601103 {
1104+ // Console.WriteLine("trailing Error block detected");
1105+
10611106 if ( invalidBytePointer < pInputBuffer + processedLength )
10621107 {
10631108 removeCounters ( invalidBytePointer , pInputBuffer + processedLength , ref asciibytes , ref n4 , ref contbytes ) ;
@@ -1075,11 +1120,15 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10751120 addCounters ( pInputBuffer + processedLength , invalidBytePointer , ref asciibytes , ref n4 , ref contbytes ) ;
10761121 }
10771122 }
1123+ //Console.WriteLine("trailing simd method");
1124+
10781125 int final_total_bytes_processed = inputLength - start_point ;
10791126 ( utf16CodeUnitCountAdjustment , scalarCountAdjustment ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes , n4 , contbytes , final_total_bytes_processed ) ;
10801127 return pInputBuffer + inputLength ;
10811128 }
10821129 }
1130+ //Console.WriteLine("trailing scalar method");
1131+
10831132 return GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
10841133 }
10851134
0 commit comments