11using System ;
22using System . Collections . Generic ;
33using System . Diagnostics . CodeAnalysis ;
4+ using System . Runtime . CompilerServices ;
5+
46
57
68#if ! NETSTANDARD2_0
@@ -172,6 +174,7 @@ private unsafe void IncrementStd(T value)
172174 }
173175
174176 // Applies another round of hashing for additional randomization
177+ //[MethodImpl(MethodImplOptions.AggressiveInlining)]
175178 private static int Rehash ( int x )
176179 {
177180 x = ( int ) ( x * 0x31848bab ) ;
@@ -180,6 +183,7 @@ private static int Rehash(int x)
180183 }
181184
182185 // Applies a supplemental hash functions to defends against poor quality hash.
186+ //[MethodImpl(MethodImplOptions.AggressiveInlining)]
183187 private static int Spread ( int x )
184188 {
185189 x ^= ( int ) ( ( uint ) x >> 17 ) ;
@@ -231,40 +235,28 @@ private void Reset()
231235 }
232236
233237#if ! NETSTANDARD2_0
238+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
239+ //[MethodImpl((MethodImplOptions)512)]
234240 private unsafe int EstimateFrequencyAvx ( T value )
235241 {
236242 int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
237243 int counterHash = Rehash ( blockHash ) ;
238244 int block = ( blockHash & blockMask ) << 3 ;
239245
240- Vector128 < int > h = Vector128 . Create ( counterHash ) ;
241- h = Avx2 . ShiftRightLogicalVariable ( h . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
246+ Vector128 < int > h = Avx2 . ShiftRightLogicalVariable ( Vector128 . Create ( counterHash ) . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
247+ Vector128 < int > index = Avx2 . ShiftLeftLogical ( Avx2 . And ( Avx2 . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 15 ) ) , 2 ) ;
248+ Vector128 < int > blockOffset = Avx2 . Add ( Avx2 . Add ( Vector128 . Create ( block ) , Avx2 . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
242249
243- var index = Avx2 . ShiftRightLogical ( h , 1 ) ;
244- index = Avx2 . And ( index , Vector128 . Create ( 15 ) ) ; // j - counter index
245- Vector128 < int > offset = Avx2 . And ( h , Vector128 . Create ( 1 ) ) ;
246- Vector128 < int > blockOffset = Avx2 . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
247- blockOffset = Avx2 . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
250+ Vector256 < ulong > indexLong = Avx2 . PermuteVar8x32 ( Vector256 . Create ( index , Vector128 < int > . Zero ) , Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ) . AsUInt64 ( ) ;
248251
249252 fixed ( long * tablePtr = table )
250253 {
251- Vector256 < long > tableVector = Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) ;
252- index = Avx2 . ShiftLeftLogical ( index , 2 ) ;
253-
254- // convert index from int to long via permute
255- Vector256 < long > indexLong = Vector256 . Create ( index , Vector128 < int > . Zero ) . AsInt64 ( ) ;
256- Vector256 < int > permuteMask2 = Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ;
257- indexLong = Avx2 . PermuteVar8x32 ( indexLong . AsInt32 ( ) , permuteMask2 ) . AsInt64 ( ) ;
258- tableVector = Avx2 . ShiftRightLogicalVariable ( tableVector , indexLong . AsUInt64 ( ) ) ;
259- tableVector = Avx2 . And ( tableVector , Vector256 . Create ( 0xfL ) ) ;
260-
261- Vector256 < int > permuteMask = Vector256 . Create ( 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ) ;
262- Vector128 < ushort > count = Avx2 . PermuteVar8x32 ( tableVector . AsInt32 ( ) , permuteMask )
254+ Vector128 < ushort > count = Avx2 . PermuteVar8x32 ( Avx2 . And ( Avx2 . ShiftRightLogicalVariable ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , indexLong ) , Vector256 . Create ( 0xfL ) ) . AsInt32 ( ) , Vector256 . Create ( 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ) )
263255 . GetLower ( )
264256 . AsUInt16 ( ) ;
265257
266258 // set the zeroed high parts of the long value to ushort.Max
267- #if NET6_0
259+ #if NET6_0_OR_GREATER
268260 count = Avx2 . Blend ( count , Vector128 < ushort > . AllBitsSet , 0b10101010 ) ;
269261#else
270262 count = Avx2 . Blend ( count , Vector128 . Create ( ushort . MaxValue ) , 0b10101010 ) ;
@@ -274,48 +266,30 @@ private unsafe int EstimateFrequencyAvx(T value)
274266 }
275267 }
276268
269+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
270+ //[MethodImpl((MethodImplOptions)512)]
277271 private unsafe void IncrementAvx ( T value )
278272 {
279273 int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
280274 int counterHash = Rehash ( blockHash ) ;
281275 int block = ( blockHash & blockMask ) << 3 ;
282276
283- Vector128 < int > h = Vector128 . Create ( counterHash ) ;
284- h = Avx2 . ShiftRightLogicalVariable ( h . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
277+ Vector128 < int > h = Avx2 . ShiftRightLogicalVariable ( Vector128 . Create ( counterHash ) . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
278+ Vector128 < int > index = Avx2 . ShiftLeftLogical ( Avx2 . And ( Avx2 . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 15 ) ) , 2 ) ;
279+ Vector128 < int > blockOffset = Avx2 . Add ( Avx2 . Add ( Vector128 . Create ( block ) , Avx2 . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
285280
286- Vector128 < int > index = Avx2 . ShiftRightLogical ( h , 1 ) ;
287- index = Avx2 . And ( index , Vector128 . Create ( 15 ) ) ; // j - counter index
288- Vector128 < int > offset = Avx2 . And ( h , Vector128 . Create ( 1 ) ) ;
289- Vector128 < int > blockOffset = Avx2 . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
290- blockOffset = Avx2 . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
281+ Vector256 < ulong > offsetLong = Avx2 . PermuteVar8x32 ( Vector256 . Create ( index , Vector128 < int > . Zero ) , Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ) . AsUInt64 ( ) ;
282+ Vector256 < long > mask = Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 0xfL ) , offsetLong ) ;
291283
292284 fixed ( long * tablePtr = table )
293285 {
294- Vector256 < long > tableVector = Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) ;
295-
296- // j == index
297- index = Avx2 . ShiftLeftLogical ( index , 2 ) ;
298- Vector256 < long > offsetLong = Vector256 . Create ( index , Vector128 < int > . Zero ) . AsInt64 ( ) ;
299-
300- Vector256 < int > permuteMask = Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ;
301- offsetLong = Avx2 . PermuteVar8x32 ( offsetLong . AsInt32 ( ) , permuteMask ) . AsInt64 ( ) ;
302-
303- // mask = (0xfL << offset)
304- Vector256 < long > fifteen = Vector256 . Create ( 0xfL ) ;
305- Vector256 < long > mask = Avx2 . ShiftLeftLogicalVariable ( fifteen , offsetLong . AsUInt64 ( ) ) ;
306-
307- // (table[i] & mask) != mask)
308286 // Note masked is 'equal' - therefore use AndNot below
309- Vector256 < long > masked = Avx2 . CompareEqual ( Avx2 . And ( tableVector , mask ) , mask ) ;
310-
311- // 1L << offset
312- Vector256 < long > inc = Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 1L ) , offsetLong . AsUInt64 ( ) ) ;
287+ Vector256 < long > masked = Avx2 . CompareEqual ( Avx2 . And ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , mask ) , mask ) ;
313288
314289 // Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
315- inc = Avx2 . AndNot ( masked , inc ) ;
290+ Vector256 < long > inc = Avx2 . AndNot ( masked , Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 1L ) , offsetLong ) ) ;
316291
317- Vector256 < byte > result = Avx2 . CompareEqual ( masked . AsByte ( ) , Vector256 < byte > . Zero ) ;
318- bool wasInc = Avx2 . MoveMask ( result . AsByte ( ) ) == unchecked ( ( int ) ( 0b1111_1111_1111_1111_1111_1111_1111_1111 ) ) ;
292+ bool wasInc = Avx2 . MoveMask ( Avx2 . CompareEqual ( masked . AsByte ( ) , Vector256 < byte > . Zero ) . AsByte ( ) ) == unchecked ( ( int ) ( 0b1111_1111_1111_1111_1111_1111_1111_1111 ) ) ;
319293
320294 tablePtr [ blockOffset . GetElement ( 0 ) ] += inc . GetElement ( 0 ) ;
321295 tablePtr [ blockOffset . GetElement ( 1 ) ] += inc . GetElement ( 1 ) ;
0 commit comments