From 6e9efe729c75b5f5c4dd9c12d7f442842bd00ddf Mon Sep 17 00:00:00 2001 From: Jorge Vieyra Date: Mon, 11 Nov 2024 17:25:46 +0100 Subject: [PATCH 1/5] Static version of has_feature() --- Project.toml | 2 +- src/HostCPUFeatures.jl | 42 ++++++------ src/cpu_info.jl | 47 +++++++------- src/cpu_info_x86.jl | 141 ++++++++++++++++++++--------------------- src/static_features.jl | 28 ++++++++ 5 files changed, 143 insertions(+), 117 deletions(-) create mode 100644 src/static_features.jl diff --git a/Project.toml b/Project.toml index 0fc7612..401e321 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "HostCPUFeatures" uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" authors = ["Chris Elrod and contributors"] -version = "0.1.17" +version = "0.1.18" [deps] BitTwiddlingConvenienceFunctions = "62783981-4cbd-42fc-bca8-16325de8dc4b" diff --git a/src/HostCPUFeatures.jl b/src/HostCPUFeatures.jl index 3e8b5ba..ed6b2ae 100644 --- a/src/HostCPUFeatures.jl +++ b/src/HostCPUFeatures.jl @@ -1,6 +1,6 @@ module HostCPUFeatures if isdefined(Base, :Experimental) && - isdefined(Base.Experimental, Symbol("@max_methods")) + isdefined(Base.Experimental, Symbol("@max_methods")) @eval Base.Experimental.@max_methods 1 end @@ -11,14 +11,14 @@ using IfElse: ifelse using BitTwiddlingConvenienceFunctions: prevpow2, nextpow2, intlog2 export has_feature, fma_fast, pick_vector_width, pick_vector_width_shift, register_count, - register_size, simd_integer_register_size + register_size, simd_integer_register_size function get_cpu_name()::String - if isdefined(Sys, :CPU_NAME) - Sys.CPU_NAME - else - ccall(:jl_get_cpu_name, Ref{String}, ()) - end + if isdefined(Sys, :CPU_NAME) + Sys.CPU_NAME + else + ccall(:jl_get_cpu_name, Ref{String}, ()) + end end include("cpu_info.jl") if (Sys.ARCH === :x86_64) || (Sys.ARCH === :i686) @@ -29,28 +29,28 @@ else include("cpu_info_generic.jl") end include("pick_vector_width.jl") +include("static_features.jl") unwrap(::Val{S}) where {S} = S unwrap(::StaticInt{S}) where {S} = S unwrap(::StaticFloat64{S}) where {S} = S unwrap(::StaticSymbol{S}) where {S} = S - @noinline function redefine() - @debug "Defining CPU name." - define_cpu_name() + @debug "Defining CPU name." + define_cpu_name() - reset_features!() - reset_extra_features!() + reset_features!() + reset_extra_features!() end const BASELINE_CPU_NAME = get_cpu_name() -function __init__() - ccall(:jl_generating_output, Cint, ()) == 1 && return - if Sys.ARCH === :x86_64 || Sys.ARCH === :i686 - target = Base.unsafe_string(Base.JLOptions().cpu_target) - occursin("native", target) || return make_generic(target) - end - BASELINE_CPU_NAME == Sys.CPU_NAME::String || redefine() - return nothing -end +# function __init__() +# ccall(:jl_generating_output, Cint, ()) == 1 && return +# if Sys.ARCH === :x86_64 || Sys.ARCH === :i686 +# target = Base.unsafe_string(Base.JLOptions().cpu_target) +# occursin("native", target) || return make_generic(target) +# end +# BASELINE_CPU_NAME == Sys.CPU_NAME::String || redefine() +# return nothing +# end end diff --git a/src/cpu_info.jl b/src/cpu_info.jl index 52b49fe..96f0cd2 100644 --- a/src/cpu_info.jl +++ b/src/cpu_info.jl @@ -1,5 +1,5 @@ function feature_string() - llvmlib_path = VERSION ≥ v"1.6.0-DEV.1429" ? Base.libllvm_path() : only(filter(lib->occursin(r"LLVM\b", basename(lib)), Libdl.dllist())) + llvmlib_path = VERSION ≥ v"1.6.0-DEV.1429" ? Base.libllvm_path() : only(filter(lib -> occursin(r"LLVM\b", basename(lib)), Libdl.dllist())) libllvm = Libdl.dlopen(llvmlib_path) gethostcpufeatures = Libdl.dlsym(libllvm, :LLVMGetHostCPUFeatures) features_cstring = ccall(gethostcpufeatures, Cstring, ()) @@ -16,36 +16,35 @@ archstr() = Sys.ARCH === :i686 ? "x86_64_" : string(Sys.ARCH) * '_' feature_name(ext) = archstr() * ext[2:end] process_feature(ext) = (feature_name(ext), first(ext) == '+') -has_feature(_) = False() -@noinline function set_feature(feature::String, has::Bool) - featqn = QuoteNode(Symbol(feature)) - if has - @eval has_feature(::Val{$featqn}) = True() - else - @eval has_feature(::Val{$featqn}) = False() - end -end +# has_feature(_) = False() +# @noinline function set_feature(feature::String, has::Bool) +# featqn = QuoteNode(Symbol(feature)) +# if has +# @eval has_feature(::Val{$featqn}) = True() +# else +# @eval has_feature(::Val{$featqn}) = False() +# end +# end function set_features!() - features, features_cstring = feature_string() - znver3 = get_cpu_name() === "znver3" - for ext ∈ features - feature, has = process_feature(ext) - if znver3 && occursin("512", feature) - has = false + features, features_cstring = feature_string() + znver3 = get_cpu_name() === "znver3" + for ext in features + feature, has = process_feature(ext) + if znver3 && occursin("512", feature) + has = false + end + has && push!(FEATURE_SET, feature) + set_feature(feature, has) end - has && push!(FEATURE_SET, feature) - set_feature(feature, has) - end - Libc.free(features_cstring) + Libc.free(features_cstring) end -set_features!() - +# set_features!() function reset_features!() features, features_cstring = feature_string() - for ext ∈ features + for ext in features feature, has = process_feature(ext) if _has_feature(feature) ≠ has @debug "Defining $(has ? "presence" : "absense") of feature $feature." @@ -56,7 +55,7 @@ function reset_features!() end register_size(::Type{T}) where {T} = register_size() -register_size(::Type{T}) where {T<:Union{Signed,Unsigned}} = simd_integer_register_size() +register_size(::Type{T}) where {T <: Union{Signed, Unsigned}} = simd_integer_register_size() function define_cpu_name() cpu = QuoteNode(Symbol(get_cpu_name())) diff --git a/src/cpu_info_x86.jl b/src/cpu_info_x86.jl index b0348c0..cabd071 100644 --- a/src/cpu_info_x86.jl +++ b/src/cpu_info_x86.jl @@ -1,4 +1,3 @@ - fma_fast() = has_feature(Val(:x86_64_fma)) | has_feature(Val(:x86_64_fma4)) register_size() = ifelse( has_feature(Val(:x86_64_avx512f)), @@ -33,77 +32,77 @@ fast_int64_to_double() = has_feature(Val(:x86_64_avx512dq)) fast_half() = False() @noinline function setfeaturefalse(s) - if has_feature(Val(s)) === True() - @eval has_feature(::Val{$(QuoteNode(s))}) = False() - end + if has_feature(Val(s)) === True() + @eval has_feature(::Val{$(QuoteNode(s))}) = False() + end end @noinline function setfeaturetrue(s) - if has_feature(Val(s)) === False() - @eval has_feature(::Val{$(QuoteNode(s))}) = True() - end -end - -function make_generic(target) - if occursin("tigerlake", target) || occursin("znver4", target) || occursin("sapphirerapids", target) - # most feature-complete architectures we use - setfeaturetrue(:x86_64_avx512ifma) - setfeaturetrue(:x86_64_avx512vl) - setfeaturetrue(:x86_64_avx512bw) - setfeaturetrue(:x86_64_avx512dq) - setfeaturetrue(:x86_64_avx512f) - setfeaturetrue(:x86_64_avx2) - setfeaturetrue(:x86_64_bmi2) - setfeaturetrue(:x86_64_fma) - setfeaturetrue(:x86_64_avx) - elseif occursin("icelake", target) || occursin("skylake-avx512", target) || occursin("rocketlake", target) || occursin("cascadelake", target) - # no ifma, but avx512f and avx512dq - setfeaturefalse(:x86_64_avx512ifma) - setfeaturetrue(:x86_64_avx512vl) - setfeaturetrue(:x86_64_avx512bw) - setfeaturetrue(:x86_64_avx512dq) - setfeaturetrue(:x86_64_avx512f) - setfeaturetrue(:x86_64_avx2) - setfeaturetrue(:x86_64_bmi2) - setfeaturetrue(:x86_64_fma) - setfeaturetrue(:x86_64_avx) - elseif occursin("znver", target) || occursin("lake", target) || occursin("well", target) - # no avx512, but avx2, fma, and bmi2 - # znver tries to capture all zen < 4 - # lake tries to capture lakes we didn't single out above as having avx512 - # - setfeaturefalse(:x86_64_avx512ifma) - setfeaturefalse(:x86_64_avx512vl) - setfeaturefalse(:x86_64_avx512bw) - setfeaturefalse(:x86_64_avx512dq) - setfeaturefalse(:x86_64_avx512f) - setfeaturetrue(:x86_64_avx2) - setfeaturetrue(:x86_64_bmi2) - setfeaturetrue(:x86_64_fma) - setfeaturetrue(:x86_64_avx) - elseif occursin("ivybridge", target) || occursin("sandybridge", target) - # has avx, and that is about it we care about - setfeaturefalse(:x86_64_avx512ifma) - setfeaturefalse(:x86_64_avx512vl) - setfeaturefalse(:x86_64_avx512bw) - setfeaturefalse(:x86_64_avx512dq) - setfeaturefalse(:x86_64_avx512f) - setfeaturefalse(:x86_64_avx2) - setfeaturefalse(:x86_64_bmi2) - setfeaturefalse(:x86_64_fma) - setfeaturetrue(:x86_64_avx) - else - # hopefully we didn't miss something - # TODO: sapphire rapids - setfeaturefalse(:x86_64_avx512ifma) - setfeaturefalse(:x86_64_avx512vl) - setfeaturefalse(:x86_64_avx512bw) - setfeaturefalse(:x86_64_avx512dq) - setfeaturefalse(:x86_64_avx512f) - setfeaturefalse(:x86_64_avx2) - setfeaturefalse(:x86_64_bmi2) - setfeaturefalse(:x86_64_fma) - setfeaturefalse(:x86_64_avx) - end - return nothing + if has_feature(Val(s)) === False() + @eval has_feature(::Val{$(QuoteNode(s))}) = True() + end end +# function make_generic(target) +# if occursin("tigerlake", target) || occursin("znver4", target) || occursin("sapphirerapids", target) +# # most feature-complete architectures we use +# setfeaturetrue(:x86_64_avx512ifma) +# setfeaturetrue(:x86_64_avx512vl) +# setfeaturetrue(:x86_64_avx512bw) +# setfeaturetrue(:x86_64_avx512dq) +# setfeaturetrue(:x86_64_avx512f) +# setfeaturetrue(:x86_64_avx2) +# setfeaturetrue(:x86_64_bmi2) +# setfeaturetrue(:x86_64_fma) +# setfeaturetrue(:x86_64_avx) +# elseif occursin("icelake", target) || occursin("skylake-avx512", target) || occursin("rocketlake", target) || occursin("cascadelake", target) +# # no ifma, but avx512f and avx512dq +# setfeaturefalse(:x86_64_avx512ifma) +# setfeaturetrue(:x86_64_avx512vl) +# setfeaturetrue(:x86_64_avx512bw) +# setfeaturetrue(:x86_64_avx512dq) +# setfeaturetrue(:x86_64_avx512f) +# setfeaturetrue(:x86_64_avx2) +# setfeaturetrue(:x86_64_bmi2) +# setfeaturetrue(:x86_64_fma) +# setfeaturetrue(:x86_64_avx) +# elseif occursin("znver", target) || occursin("lake", target) || occursin("well", target) +# # no avx512, but avx2, fma, and bmi2 +# # znver tries to capture all zen < 4 +# # lake tries to capture lakes we didn't single out above as having avx512 +# # +# setfeaturefalse(:x86_64_avx512ifma) +# setfeaturefalse(:x86_64_avx512vl) +# setfeaturefalse(:x86_64_avx512bw) +# setfeaturefalse(:x86_64_avx512dq) +# setfeaturefalse(:x86_64_avx512f) +# setfeaturetrue(:x86_64_avx2) +# setfeaturetrue(:x86_64_bmi2) +# setfeaturetrue(:x86_64_fma) +# setfeaturetrue(:x86_64_avx) +# elseif occursin("ivybridge", target) || occursin("sandybridge", target) +# # has avx, and that is about it we care about +# setfeaturefalse(:x86_64_avx512ifma) +# setfeaturefalse(:x86_64_avx512vl) +# setfeaturefalse(:x86_64_avx512bw) +# setfeaturefalse(:x86_64_avx512dq) +# setfeaturefalse(:x86_64_avx512f) +# setfeaturefalse(:x86_64_avx2) +# setfeaturefalse(:x86_64_bmi2) +# setfeaturefalse(:x86_64_fma) +# setfeaturetrue(:x86_64_avx) +# else +# # hopefully we didn't miss something +# # TODO: sapphire rapids +# setfeaturefalse(:x86_64_avx512ifma) +# setfeaturefalse(:x86_64_avx512vl) +# setfeaturefalse(:x86_64_avx512bw) +# setfeaturefalse(:x86_64_avx512dq) +# setfeaturefalse(:x86_64_avx512f) +# setfeaturefalse(:x86_64_avx2) +# setfeaturefalse(:x86_64_bmi2) +# setfeaturefalse(:x86_64_fma) +# setfeaturefalse(:x86_64_avx) +# end +# return nothing +# end +# diff --git a/src/static_features.jl b/src/static_features.jl new file mode 100644 index 0000000..a88f518 --- /dev/null +++ b/src/static_features.jl @@ -0,0 +1,28 @@ +has_feature(::Val{S}) where {S} = has_feature(S) + +@generated function has_feature(my_feature::Symbol) + + features, features_cstring = feature_string() + + matches = map(features) do feature + fname, has = process_feature(feature) + val = has ? True() : False() + sname = Symbol(fname) + + :( + if my_feature == $(Meta.quot(sname)) + return $val + end + ) + end + + push!(matches, :(return False())) + + Libc.free(features_cstring) + + return quote + begin + $(matches...) + end + end +end From 851add107cdb117258d61382fa8419798adf18ca Mon Sep 17 00:00:00 2001 From: el-oso Date: Sat, 26 Jul 2025 17:32:11 +0200 Subject: [PATCH 2/5] Marked inline by Cody's suggestion --- src/static_features.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/static_features.jl b/src/static_features.jl index a88f518..c7649ad 100644 --- a/src/static_features.jl +++ b/src/static_features.jl @@ -1,6 +1,6 @@ has_feature(::Val{S}) where {S} = has_feature(S) -@generated function has_feature(my_feature::Symbol) +@generated @inline function has_feature(my_feature::Symbol) features, features_cstring = feature_string() From 6bef812c38442ec516a0c900738c72b7e60c24ae Mon Sep 17 00:00:00 2001 From: el-oso Date: Sat, 26 Jul 2025 17:42:46 +0200 Subject: [PATCH 3/5] Revert "Marked inline by Cody's suggestion" This reverts commit 851add107cdb117258d61382fa8419798adf18ca. --- src/static_features.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/static_features.jl b/src/static_features.jl index c7649ad..a88f518 100644 --- a/src/static_features.jl +++ b/src/static_features.jl @@ -1,6 +1,6 @@ has_feature(::Val{S}) where {S} = has_feature(S) -@generated @inline function has_feature(my_feature::Symbol) +@generated function has_feature(my_feature::Symbol) features, features_cstring = feature_string() From b413382f3346e2beb752f220f57b7b60d32b2452 Mon Sep 17 00:00:00 2001 From: el-oso Date: Sat, 26 Jul 2025 17:48:54 +0200 Subject: [PATCH 4/5] Readded @inline in the correct spot --- src/static_features.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/static_features.jl b/src/static_features.jl index a88f518..01499a6 100644 --- a/src/static_features.jl +++ b/src/static_features.jl @@ -1,6 +1,6 @@ has_feature(::Val{S}) where {S} = has_feature(S) -@generated function has_feature(my_feature::Symbol) +@inline @generated function has_feature(my_feature::Symbol) features, features_cstring = feature_string() From 14796851712dcfccdab5d468c8c533390c683e1d Mon Sep 17 00:00:00 2001 From: el-oso Date: Sun, 27 Jul 2025 01:23:59 +0200 Subject: [PATCH 5/5] Formatted code with JuliaFormatter --- src/HostCPUFeatures.jl | 38 ++++++++++++++------------ src/cpu_info.jl | 59 ++++++++++++++++++++++------------------ src/cpu_info_aarch64.jl | 41 ++++++++++++++-------------- src/cpu_info_generic.jl | 1 - src/cpu_info_x86.jl | 27 ++++++++---------- src/pick_vector_width.jl | 46 ++++++++++++++++++++++++------- src/static_features.jl | 34 +++++++++++------------ 7 files changed, 139 insertions(+), 107 deletions(-) diff --git a/src/HostCPUFeatures.jl b/src/HostCPUFeatures.jl index ed6b2ae..d339c16 100644 --- a/src/HostCPUFeatures.jl +++ b/src/HostCPUFeatures.jl @@ -1,7 +1,6 @@ module HostCPUFeatures -if isdefined(Base, :Experimental) && - isdefined(Base.Experimental, Symbol("@max_methods")) - @eval Base.Experimental.@max_methods 1 +if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@max_methods")) + @eval Base.Experimental.@max_methods 1 end using Libdl, Static @@ -10,23 +9,28 @@ using IfElse: ifelse using BitTwiddlingConvenienceFunctions: prevpow2, nextpow2, intlog2 -export has_feature, fma_fast, pick_vector_width, pick_vector_width_shift, register_count, - register_size, simd_integer_register_size +export has_feature, + fma_fast, + pick_vector_width, + pick_vector_width_shift, + register_count, + register_size, + simd_integer_register_size function get_cpu_name()::String - if isdefined(Sys, :CPU_NAME) - Sys.CPU_NAME - else - ccall(:jl_get_cpu_name, Ref{String}, ()) - end + if isdefined(Sys, :CPU_NAME) + Sys.CPU_NAME + else + ccall(:jl_get_cpu_name, Ref{String}, ()) + end end include("cpu_info.jl") if (Sys.ARCH === :x86_64) || (Sys.ARCH === :i686) - include("cpu_info_x86.jl") + include("cpu_info_x86.jl") elseif Sys.ARCH === :aarch64 - include("cpu_info_aarch64.jl") + include("cpu_info_aarch64.jl") else - include("cpu_info_generic.jl") + include("cpu_info_generic.jl") end include("pick_vector_width.jl") include("static_features.jl") @@ -36,11 +40,11 @@ unwrap(::StaticInt{S}) where {S} = S unwrap(::StaticFloat64{S}) where {S} = S unwrap(::StaticSymbol{S}) where {S} = S @noinline function redefine() - @debug "Defining CPU name." - define_cpu_name() + @debug "Defining CPU name." + define_cpu_name() - reset_features!() - reset_extra_features!() + reset_features!() + reset_extra_features!() end const BASELINE_CPU_NAME = get_cpu_name() # function __init__() diff --git a/src/cpu_info.jl b/src/cpu_info.jl index 96f0cd2..f571c8f 100644 --- a/src/cpu_info.jl +++ b/src/cpu_info.jl @@ -1,10 +1,15 @@ function feature_string() - llvmlib_path = VERSION ≥ v"1.6.0-DEV.1429" ? Base.libllvm_path() : only(filter(lib -> occursin(r"LLVM\b", basename(lib)), Libdl.dllist())) - libllvm = Libdl.dlopen(llvmlib_path) - gethostcpufeatures = Libdl.dlsym(libllvm, :LLVMGetHostCPUFeatures) - features_cstring = ccall(gethostcpufeatures, Cstring, ()) - features = filter(ext -> (ext ≠ "" && (m = match(r"\d", ext); isnothing(m) ? true : m.offset != 2)), split(unsafe_string(features_cstring), ',')) - features, features_cstring + llvmlib_path = + VERSION ≥ v"1.6.0-DEV.1429" ? Base.libllvm_path() : + only(filter(lib -> occursin(r"LLVM\b", basename(lib)), Libdl.dllist())) + libllvm = Libdl.dlopen(llvmlib_path) + gethostcpufeatures = Libdl.dlsym(libllvm, :LLVMGetHostCPUFeatures) + features_cstring = ccall(gethostcpufeatures, Cstring, ()) + features = filter( + ext -> (ext ≠ "" && (m = match(r"\d", ext); isnothing(m) ? true : m.offset != 2)), + split(unsafe_string(features_cstring), ','), + ) + features, features_cstring end const FEATURE_SET = Set{String}() @@ -27,38 +32,38 @@ process_feature(ext) = (feature_name(ext), first(ext) == '+') # end function set_features!() - features, features_cstring = feature_string() - znver3 = get_cpu_name() === "znver3" - for ext in features - feature, has = process_feature(ext) - if znver3 && occursin("512", feature) - has = false - end - has && push!(FEATURE_SET, feature) - set_feature(feature, has) + features, features_cstring = feature_string() + znver3 = get_cpu_name() === "znver3" + for ext in features + feature, has = process_feature(ext) + if znver3 && occursin("512", feature) + has = false end - Libc.free(features_cstring) + has && push!(FEATURE_SET, feature) + set_feature(feature, has) + end + Libc.free(features_cstring) end # set_features!() function reset_features!() - features, features_cstring = feature_string() - for ext in features - feature, has = process_feature(ext) - if _has_feature(feature) ≠ has - @debug "Defining $(has ? "presence" : "absense") of feature $feature." - set_feature(feature, has) - end + features, features_cstring = feature_string() + for ext in features + feature, has = process_feature(ext) + if _has_feature(feature) ≠ has + @debug "Defining $(has ? "presence" : "absense") of feature $feature." + set_feature(feature, has) end - Libc.free(features_cstring) + end + Libc.free(features_cstring) end register_size(::Type{T}) where {T} = register_size() -register_size(::Type{T}) where {T <: Union{Signed, Unsigned}} = simd_integer_register_size() +register_size(::Type{T}) where {T<:Union{Signed,Unsigned}} = simd_integer_register_size() function define_cpu_name() - cpu = QuoteNode(Symbol(get_cpu_name())) - @eval cpu_name() = Val{$cpu}() + cpu = QuoteNode(Symbol(get_cpu_name())) + @eval cpu_name() = Val{$cpu}() end define_cpu_name() diff --git a/src/cpu_info_aarch64.jl b/src/cpu_info_aarch64.jl index 605d54d..b3f5d41 100644 --- a/src/cpu_info_aarch64.jl +++ b/src/cpu_info_aarch64.jl @@ -1,10 +1,12 @@ -_has_aarch64_sve() = (Base.libllvm_version ≥ v"11") && (Base.BinaryPlatforms.CPUID.test_cpu_feature(Base.BinaryPlatforms.CPUID.JL_AArch64_sve)) +_has_aarch64_sve() = + (Base.libllvm_version ≥ v"11") && + (Base.BinaryPlatforms.CPUID.test_cpu_feature(Base.BinaryPlatforms.CPUID.JL_AArch64_sve)) if Int === Int64 - @noinline vscale() = ccall("llvm.vscale.i64", llvmcall, Int64, ()) + @noinline vscale() = ccall("llvm.vscale.i64", llvmcall, Int64, ()) else - @noinline vscale() = ccall("llvm.vscale.i32", llvmcall, Int32, ()) + @noinline vscale() = ccall("llvm.vscale.i32", llvmcall, Int32, ()) end # TODO: find actually support SVE @@ -20,30 +22,30 @@ function _dynamic_register_size() end function _set_sve_vector_width!(bytes = _dynamic_register_size()) - @eval begin - register_size() = StaticInt{$bytes}() - simd_integer_register_size() = StaticInt{$bytes}() - end - nothing + @eval begin + register_size() = StaticInt{$bytes}() + simd_integer_register_size() = StaticInt{$bytes}() + end + nothing end if _has_aarch64_sve()# && !(Bool(has_feature(Val(:aarch64_sve)))) - has_feature(::Val{:aarch64_sve_cpuid}) = True() - _set_sve_vector_width!() + has_feature(::Val{:aarch64_sve_cpuid}) = True() + _set_sve_vector_width!() else - # has_feature(::Val{:aarch64_svejl}) = False() - register_size() = StaticInt{16}() - simd_integer_register_size() = StaticInt{16}() + # has_feature(::Val{:aarch64_svejl}) = False() + register_size() = StaticInt{16}() + simd_integer_register_size() = StaticInt{16}() end function reset_extra_features!() - drs = _dynamic_register_size() - register_size() ≠ drs && _set_sve_vector_width!(drs) - hassve = _has_aarch64_sve() - if hassve ≠ has_feature(Val(:aarch64_sve_cpuid)) - @eval has_feature(::Val{:aarch64_sve_cpuid}) = $(Expr(:call, hassve ? :True : :False)) - end + drs = _dynamic_register_size() + register_size() ≠ drs && _set_sve_vector_width!(drs) + hassve = _has_aarch64_sve() + if hassve ≠ has_feature(Val(:aarch64_sve_cpuid)) + @eval has_feature(::Val{:aarch64_sve_cpuid}) = $(Expr(:call, hassve ? :True : :False)) + end end fma_fast() = True() @@ -53,4 +55,3 @@ has_opmask_registers() = has_feature(Val(:aarch64_sve_cpuid)) fast_int64_to_double() = True() fast_half() = False() - diff --git a/src/cpu_info_generic.jl b/src/cpu_info_generic.jl index c0758f3..2c668a5 100644 --- a/src/cpu_info_generic.jl +++ b/src/cpu_info_generic.jl @@ -9,4 +9,3 @@ reset_extra_features!() = nothing fast_int64_to_double() = True() fast_half() = False() - diff --git a/src/cpu_info_x86.jl b/src/cpu_info_x86.jl index cabd071..db71014 100644 --- a/src/cpu_info_x86.jl +++ b/src/cpu_info_x86.jl @@ -1,12 +1,8 @@ fma_fast() = has_feature(Val(:x86_64_fma)) | has_feature(Val(:x86_64_fma4)) register_size() = ifelse( - has_feature(Val(:x86_64_avx512f)), - StaticInt{64}(), - ifelse( - has_feature(Val(:x86_64_avx)), - StaticInt{32}(), - StaticInt{16}() - ) + has_feature(Val(:x86_64_avx512f)), + StaticInt{64}(), + ifelse(has_feature(Val(:x86_64_avx)), StaticInt{32}(), StaticInt{16}()), ) const simd_integer_register_size = register_size # simd_integer_register_size() = ifelse( @@ -19,9 +15,10 @@ const simd_integer_register_size = register_size # ) # ) if Sys.ARCH === :i686 - register_count() = StaticInt{8}() + register_count() = StaticInt{8}() elseif Sys.ARCH === :x86_64 - register_count() = ifelse(has_feature(Val(:x86_64_avx512f)), StaticInt{32}(), StaticInt{16}()) + register_count() = + ifelse(has_feature(Val(:x86_64_avx512f)), StaticInt{32}(), StaticInt{16}()) end has_opmask_registers() = has_feature(Val(:x86_64_avx512f)) @@ -32,14 +29,14 @@ fast_int64_to_double() = has_feature(Val(:x86_64_avx512dq)) fast_half() = False() @noinline function setfeaturefalse(s) - if has_feature(Val(s)) === True() - @eval has_feature(::Val{$(QuoteNode(s))}) = False() - end + if has_feature(Val(s)) === True() + @eval has_feature(::Val{$(QuoteNode(s))}) = False() + end end @noinline function setfeaturetrue(s) - if has_feature(Val(s)) === False() - @eval has_feature(::Val{$(QuoteNode(s))}) = True() - end + if has_feature(Val(s)) === False() + @eval has_feature(::Val{$(QuoteNode(s))}) = True() + end end # function make_generic(target) diff --git a/src/pick_vector_width.jl b/src/pick_vector_width.jl index 0ebcbc9..5f52053 100644 --- a/src/pick_vector_width.jl +++ b/src/pick_vector_width.jl @@ -1,7 +1,9 @@ @static if isdefined(Base, Symbol("@constprop")) using Base: @constprop else - macro constprop(_, ex); esc(ex); end + macro constprop(_, ex) + esc(ex) + end end @generated function static_sizeof(::Type{T}) where {T} @@ -12,29 +14,53 @@ end smax(a::StaticInt, b::StaticInt) = ifelse(gt(a, b), a, b) smin(a::StaticInt, b::StaticInt) = ifelse(lt(a, b), a, b) -_pick_vector_width_float16(::StaticInt{RS}, ::True) where {RS} = StaticInt{RS}() ÷ StaticInt{2}() -_pick_vector_width_float16(::StaticInt{RS}, ::False) where {RS} = StaticInt{RS}() ÷ StaticInt{4}() -pick_vector_width(::Type{Float16}) = _pick_vector_width_float16(register_size(Float32), fast_half()) +_pick_vector_width_float16(::StaticInt{RS}, ::True) where {RS} = + StaticInt{RS}() ÷ StaticInt{2}() +_pick_vector_width_float16(::StaticInt{RS}, ::False) where {RS} = + StaticInt{RS}() ÷ StaticInt{4}() +pick_vector_width(::Type{Float16}) = + _pick_vector_width_float16(register_size(Float32), fast_half()) pick_vector_width(::Type{T}) where {T} = register_size(T) ÷ static_sizeof(T) -@inline @constprop :aggressive function _pick_vector_width(min_W, max_W, ::Type{T}, ::Type{S}, args::Vararg{Any,K}) where {K,S,T} +@inline @constprop :aggressive function _pick_vector_width( + min_W, + max_W, + ::Type{T}, + ::Type{S}, + args::Vararg{Any,K}, +) where {K,S,T} _max_W = smin(max_W, pick_vector_width(T)) _pick_vector_width(min_W, _max_W, S, args...) end -@inline @constprop :aggressive function _pick_vector_width(min_W, max_W, ::Type{T}) where {T} +@inline @constprop :aggressive function _pick_vector_width( + min_W, + max_W, + ::Type{T}, +) where {T} _max_W = smin(max_W, pick_vector_width(T)) smax(min_W, _max_W) end -@inline @constprop :aggressive function pick_vector_width(::Type{T}, ::Type{S}, args::Vararg{Any,K}) where {T,S,K} +@inline @constprop :aggressive function pick_vector_width( + ::Type{T}, + ::Type{S}, + args::Vararg{Any,K}, +) where {T,S,K} _pick_vector_width(One(), register_size(), T, S, args...) end -@inline @constprop :aggressive function pick_vector_width(::Union{Val{P},StaticInt{P}}, ::Type{T}, ::Type{S}, args::Vararg{Any,K}) where {P,T,S,K} +@inline @constprop :aggressive function pick_vector_width( + ::Union{Val{P},StaticInt{P}}, + ::Type{T}, + ::Type{S}, + args::Vararg{Any,K}, +) where {P,T,S,K} _pick_vector_width(One(), smin(register_size(), nextpow2(StaticInt{P}())), T, S, args...) end -@inline @constprop :aggressive function pick_vector_width(::Union{Val{P},StaticInt{P}}, ::Type{T}) where {P,T} +@inline @constprop :aggressive function pick_vector_width( + ::Union{Val{P},StaticInt{P}}, + ::Type{T}, +) where {P,T} _pick_vector_width(One(), smin(register_size(), nextpow2(StaticInt{P}())), T) end @inline function pick_vector_width_shift(args::Vararg{Any,K}) where {K} W = pick_vector_width(args...) W, intlog2(W) end - diff --git a/src/static_features.jl b/src/static_features.jl index 01499a6..c6caac9 100644 --- a/src/static_features.jl +++ b/src/static_features.jl @@ -2,27 +2,27 @@ has_feature(::Val{S}) where {S} = has_feature(S) @inline @generated function has_feature(my_feature::Symbol) - features, features_cstring = feature_string() + features, features_cstring = feature_string() - matches = map(features) do feature - fname, has = process_feature(feature) - val = has ? True() : False() - sname = Symbol(fname) + matches = map(features) do feature + fname, has = process_feature(feature) + val = has ? True() : False() + sname = Symbol(fname) - :( - if my_feature == $(Meta.quot(sname)) - return $val - end - ) - end + :( + if my_feature == $(Meta.quot(sname)) + return $val + end + ) + end - push!(matches, :(return False())) + push!(matches, :(return False())) - Libc.free(features_cstring) + Libc.free(features_cstring) - return quote - begin - $(matches...) - end + return quote + begin + $(matches...) end + end end