88using System . Runtime . Intrinsics . X86 ;
99#endif
1010
11+ #if NET6_0_OR_GREATER
12+ using System . Runtime . Intrinsics . Arm ;
13+ #endif
14+
1115namespace BitFaster . Caching . Lfu
1216{
1317 /// <summary>
@@ -76,6 +80,12 @@ public int EstimateFrequency(T value)
7680 {
7781 return EstimateFrequencyAvx ( value ) ;
7882 }
83+ #if NET6_0_OR_GREATER
84+ else if ( isa . IsArm64Supported )
85+ {
86+ return EstimateFrequencyArm ( value ) ;
87+ }
88+ #endif
7989 else
8090 {
8191 return EstimateFrequencyStd ( value ) ;
@@ -99,6 +109,12 @@ public void Increment(T value)
99109 {
100110 IncrementAvx ( value ) ;
101111 }
112+ #if NET6_0_OR_GREATER
113+ else if ( isa . IsArm64Supported )
114+ {
115+ IncrementArm ( value ) ;
116+ }
117+ #endif
102118 else
103119 {
104120 IncrementStd ( value ) ;
@@ -329,5 +345,142 @@ private unsafe void IncrementAvx(T value)
329345 }
330346 }
331347#endif
348+
349+ #if NET6_0_OR_GREATER
350+ private unsafe void IncrementArm ( T value )
351+ {
352+ int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
353+ int counterHash = Rehash ( blockHash ) ;
354+ int block = ( blockHash & blockMask ) << 3 ;
355+
356+ Vector128 < int > h = Vector128 . Create ( counterHash ) ;
357+ h = AdvSimd . ShiftArithmetic ( h , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
358+
359+ Vector128 < int > index = AdvSimd . ShiftRightLogical ( h , 1 ) ;
360+ index = AdvSimd . And ( index , Vector128 . Create ( 15 ) ) ; // j - counter index
361+ Vector128 < int > offset = AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ;
362+ Vector128 < int > blockOffset = AdvSimd . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
363+ blockOffset = AdvSimd . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
364+
365+ fixed ( long * tablePtr = table )
366+ {
367+ int t0 = AdvSimd . Extract ( blockOffset , 0 ) ;
368+ int t1 = AdvSimd . Extract ( blockOffset , 1 ) ;
369+ int t2 = AdvSimd . Extract ( blockOffset , 2 ) ;
370+ int t3 = AdvSimd . Extract ( blockOffset , 3 ) ;
371+
372+ var ta0 = AdvSimd . LoadVector64 ( tablePtr + t0 ) ;
373+ var ta1 = AdvSimd . LoadVector64 ( tablePtr + t1 ) ;
374+ var ta2 = AdvSimd . LoadVector64 ( tablePtr + t2 ) ;
375+ var ta3 = AdvSimd . LoadVector64 ( tablePtr + t3 ) ;
376+
377+ Vector128 < long > tableVectorA = Vector128 . Create ( ta0 , ta1 ) ;
378+ Vector128 < long > tableVectorB = Vector128 . Create ( ta2 , ta3 ) ;
379+
380+ // TODO: VectorTableLookup
381+ //Vector128<long> tableVectorA = Vector128.Create(
382+ // tablePtr[t0],
383+ // tablePtr[t1]);
384+ //Vector128<long> tableVectorB = Vector128.Create(
385+ // tablePtr[t2],
386+ // tablePtr[t3]);
387+
388+ // j == index
389+ index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
390+
391+ Vector128 < int > longOffA = AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) ;
392+ longOffA = AdvSimd . Arm64 . InsertSelectedScalar ( longOffA , 2 , index , 1 ) ;
393+
394+ Vector128 < int > longOffB = AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) ;
395+ longOffB = AdvSimd . Arm64 . InsertSelectedScalar ( longOffB , 2 , index , 3 ) ;
396+
397+ Vector128 < long > fifteen = Vector128 . Create ( 0xfL ) ;
398+ Vector128 < long > maskA = AdvSimd . ShiftArithmetic ( fifteen , longOffA . AsInt64 ( ) ) ;
399+ Vector128 < long > maskB = AdvSimd . ShiftArithmetic ( fifteen , longOffB . AsInt64 ( ) ) ;
400+
401+ Vector128 < long > maskedA = AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorA , maskA ) , maskA ) ;
402+ Vector128 < long > maskedB = AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorB , maskB ) , maskB ) ;
403+
404+ var one = Vector128 . Create ( 1L ) ;
405+ Vector128 < long > incA = AdvSimd . ShiftArithmetic ( one , longOffA . AsInt64 ( ) ) ;
406+ Vector128 < long > incB = AdvSimd . ShiftArithmetic ( one , longOffB . AsInt64 ( ) ) ;
407+
408+ maskedA = AdvSimd . Not ( maskedA ) ;
409+ maskedB = AdvSimd . Not ( maskedB ) ;
410+
411+ incA = AdvSimd . And ( maskedA , incA ) ;
412+ incB = AdvSimd . And ( maskedA , incB ) ;
413+
414+ tablePtr [ t0 ] += AdvSimd . Extract ( incA , 0 ) ;
415+ tablePtr [ t1 ] += AdvSimd . Extract ( incA , 1 ) ;
416+ tablePtr [ t2 ] += AdvSimd . Extract ( incB , 0 ) ;
417+ tablePtr [ t3 ] += AdvSimd . Extract ( incB , 1 ) ;
418+
419+ var maxA = AdvSimd . Arm64 . MaxAcross ( incA . AsInt32 ( ) ) ;
420+ var maxB = AdvSimd . Arm64 . MaxAcross ( incB . AsInt32 ( ) ) ;
421+ maxA = AdvSimd . Arm64 . InsertSelectedScalar ( maxA , 1 , maxB , 0 ) ;
422+ var max = AdvSimd . Arm64 . MaxAcross ( maxA . AsInt16 ( ) ) ;
423+
424+ if ( max . ToScalar ( ) != 0 && ( ++ size == sampleSize ) )
425+ {
426+ Reset ( ) ;
427+ }
428+ }
429+ }
430+
431+ private unsafe int EstimateFrequencyArm ( T value )
432+ {
433+ int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
434+ int counterHash = Rehash ( blockHash ) ;
435+ int block = ( blockHash & blockMask ) << 3 ;
436+
437+ Vector128 < int > h = Vector128 . Create ( counterHash ) ;
438+ h = AdvSimd . ShiftArithmetic ( h , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
439+
440+ Vector128 < int > index = AdvSimd . ShiftRightLogical ( h , 1 ) ;
441+
442+ index = AdvSimd . And ( index , Vector128 . Create ( 0xf ) ) ; // j - counter index
443+ Vector128 < int > offset = AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ;
444+ Vector128 < int > blockOffset = AdvSimd . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
445+ blockOffset = AdvSimd . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
446+
447+ fixed ( long * tablePtr = table )
448+ {
449+ // TODO: VectorTableLookup
450+ Vector128 < long > tableVectorA = Vector128 . Create (
451+ tablePtr [ AdvSimd . Extract ( blockOffset , 0 ) ] ,
452+ tablePtr [ AdvSimd . Extract ( blockOffset , 1 ) ] ) ;
453+ Vector128 < long > tableVectorB = Vector128 . Create (
454+ tablePtr [ AdvSimd . Extract ( blockOffset , 2 ) ] ,
455+ tablePtr [ AdvSimd . Extract ( blockOffset , 3 ) ] ) ;
456+
457+ // j == index
458+ index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
459+
460+ Vector128 < int > indexA = AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) ;
461+ indexA = AdvSimd . Arm64 . InsertSelectedScalar ( indexA , 2 , index , 1 ) ;
462+
463+ Vector128 < int > indexB = AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) ;
464+ indexB = AdvSimd . Arm64 . InsertSelectedScalar ( indexB , 2 , index , 3 ) ;
465+
466+ indexA = AdvSimd . Negate ( indexA ) ;
467+ indexB = AdvSimd . Negate ( indexB ) ;
468+
469+ Vector128 < long > a = AdvSimd . ShiftArithmetic ( tableVectorA , indexA . AsInt64 ( ) ) ;
470+ Vector128 < long > b = AdvSimd . ShiftArithmetic ( tableVectorB , indexB . AsInt64 ( ) ) ;
471+
472+ var fifteen = Vector128 . Create ( 0xfL ) ;
473+ a = AdvSimd . And ( a , fifteen ) ;
474+ b = AdvSimd . And ( b , fifteen ) ;
475+
476+ var minA = AdvSimd . Arm64 . MinAcross ( a . AsInt32 ( ) ) ;
477+ var minB = AdvSimd . Arm64 . MinAcross ( b . AsInt32 ( ) ) ;
478+ minA = AdvSimd . Arm64 . InsertSelectedScalar ( minA , 1 , minB , 0 ) ;
479+ var min = AdvSimd . Arm64 . MinAcross ( minA . AsInt16 ( ) ) ;
480+
481+ return min . ToScalar ( ) ;
482+ }
483+ }
484+ #endif
332485 }
333486}
0 commit comments