diff --git a/Project.toml b/Project.toml index 0fc7612..401e321 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "HostCPUFeatures" uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" authors = ["Chris Elrod and contributors"] -version = "0.1.17" +version = "0.1.18" [deps] BitTwiddlingConvenienceFunctions = "62783981-4cbd-42fc-bca8-16325de8dc4b" diff --git a/src/HostCPUFeatures.jl b/src/HostCPUFeatures.jl index 3e8b5ba..d339c16 100644 --- a/src/HostCPUFeatures.jl +++ b/src/HostCPUFeatures.jl @@ -1,7 +1,6 @@ module HostCPUFeatures -if isdefined(Base, :Experimental) && - isdefined(Base.Experimental, Symbol("@max_methods")) - @eval Base.Experimental.@max_methods 1 +if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@max_methods")) + @eval Base.Experimental.@max_methods 1 end using Libdl, Static @@ -10,8 +9,13 @@ using IfElse: ifelse using BitTwiddlingConvenienceFunctions: prevpow2, nextpow2, intlog2 -export has_feature, fma_fast, pick_vector_width, pick_vector_width_shift, register_count, - register_size, simd_integer_register_size +export has_feature, + fma_fast, + pick_vector_width, + pick_vector_width_shift, + register_count, + register_size, + simd_integer_register_size function get_cpu_name()::String if isdefined(Sys, :CPU_NAME) @@ -22,19 +26,19 @@ function get_cpu_name()::String end include("cpu_info.jl") if (Sys.ARCH === :x86_64) || (Sys.ARCH === :i686) - include("cpu_info_x86.jl") + include("cpu_info_x86.jl") elseif Sys.ARCH === :aarch64 - include("cpu_info_aarch64.jl") + include("cpu_info_aarch64.jl") else - include("cpu_info_generic.jl") + include("cpu_info_generic.jl") end include("pick_vector_width.jl") +include("static_features.jl") unwrap(::Val{S}) where {S} = S unwrap(::StaticInt{S}) where {S} = S unwrap(::StaticFloat64{S}) where {S} = S unwrap(::StaticSymbol{S}) where {S} = S - @noinline function redefine() @debug "Defining CPU name." define_cpu_name() @@ -43,14 +47,14 @@ unwrap(::StaticSymbol{S}) where {S} = S reset_extra_features!() end const BASELINE_CPU_NAME = get_cpu_name() -function __init__() - ccall(:jl_generating_output, Cint, ()) == 1 && return - if Sys.ARCH === :x86_64 || Sys.ARCH === :i686 - target = Base.unsafe_string(Base.JLOptions().cpu_target) - occursin("native", target) || return make_generic(target) - end - BASELINE_CPU_NAME == Sys.CPU_NAME::String || redefine() - return nothing -end +# function __init__() +# ccall(:jl_generating_output, Cint, ()) == 1 && return +# if Sys.ARCH === :x86_64 || Sys.ARCH === :i686 +# target = Base.unsafe_string(Base.JLOptions().cpu_target) +# occursin("native", target) || return make_generic(target) +# end +# BASELINE_CPU_NAME == Sys.CPU_NAME::String || redefine() +# return nothing +# end end diff --git a/src/cpu_info.jl b/src/cpu_info.jl index 52b49fe..f571c8f 100644 --- a/src/cpu_info.jl +++ b/src/cpu_info.jl @@ -1,10 +1,15 @@ function feature_string() - llvmlib_path = VERSION ≥ v"1.6.0-DEV.1429" ? Base.libllvm_path() : only(filter(lib->occursin(r"LLVM\b", basename(lib)), Libdl.dllist())) - libllvm = Libdl.dlopen(llvmlib_path) - gethostcpufeatures = Libdl.dlsym(libllvm, :LLVMGetHostCPUFeatures) - features_cstring = ccall(gethostcpufeatures, Cstring, ()) - features = filter(ext -> (ext ≠ "" && (m = match(r"\d", ext); isnothing(m) ? true : m.offset != 2)), split(unsafe_string(features_cstring), ',')) - features, features_cstring + llvmlib_path = + VERSION ≥ v"1.6.0-DEV.1429" ? Base.libllvm_path() : + only(filter(lib -> occursin(r"LLVM\b", basename(lib)), Libdl.dllist())) + libllvm = Libdl.dlopen(llvmlib_path) + gethostcpufeatures = Libdl.dlsym(libllvm, :LLVMGetHostCPUFeatures) + features_cstring = ccall(gethostcpufeatures, Cstring, ()) + features = filter( + ext -> (ext ≠ "" && (m = match(r"\d", ext); isnothing(m) ? true : m.offset != 2)), + split(unsafe_string(features_cstring), ','), + ) + features, features_cstring end const FEATURE_SET = Set{String}() @@ -16,20 +21,20 @@ archstr() = Sys.ARCH === :i686 ? "x86_64_" : string(Sys.ARCH) * '_' feature_name(ext) = archstr() * ext[2:end] process_feature(ext) = (feature_name(ext), first(ext) == '+') -has_feature(_) = False() -@noinline function set_feature(feature::String, has::Bool) - featqn = QuoteNode(Symbol(feature)) - if has - @eval has_feature(::Val{$featqn}) = True() - else - @eval has_feature(::Val{$featqn}) = False() - end -end +# has_feature(_) = False() +# @noinline function set_feature(feature::String, has::Bool) +# featqn = QuoteNode(Symbol(feature)) +# if has +# @eval has_feature(::Val{$featqn}) = True() +# else +# @eval has_feature(::Val{$featqn}) = False() +# end +# end function set_features!() features, features_cstring = feature_string() znver3 = get_cpu_name() === "znver3" - for ext ∈ features + for ext in features feature, has = process_feature(ext) if znver3 && occursin("512", feature) has = false @@ -39,27 +44,26 @@ function set_features!() end Libc.free(features_cstring) end -set_features!() - +# set_features!() function reset_features!() - features, features_cstring = feature_string() - for ext ∈ features - feature, has = process_feature(ext) - if _has_feature(feature) ≠ has - @debug "Defining $(has ? "presence" : "absense") of feature $feature." - set_feature(feature, has) - end + features, features_cstring = feature_string() + for ext in features + feature, has = process_feature(ext) + if _has_feature(feature) ≠ has + @debug "Defining $(has ? "presence" : "absense") of feature $feature." + set_feature(feature, has) end - Libc.free(features_cstring) + end + Libc.free(features_cstring) end register_size(::Type{T}) where {T} = register_size() register_size(::Type{T}) where {T<:Union{Signed,Unsigned}} = simd_integer_register_size() function define_cpu_name() - cpu = QuoteNode(Symbol(get_cpu_name())) - @eval cpu_name() = Val{$cpu}() + cpu = QuoteNode(Symbol(get_cpu_name())) + @eval cpu_name() = Val{$cpu}() end define_cpu_name() diff --git a/src/cpu_info_aarch64.jl b/src/cpu_info_aarch64.jl index 605d54d..b3f5d41 100644 --- a/src/cpu_info_aarch64.jl +++ b/src/cpu_info_aarch64.jl @@ -1,10 +1,12 @@ -_has_aarch64_sve() = (Base.libllvm_version ≥ v"11") && (Base.BinaryPlatforms.CPUID.test_cpu_feature(Base.BinaryPlatforms.CPUID.JL_AArch64_sve)) +_has_aarch64_sve() = + (Base.libllvm_version ≥ v"11") && + (Base.BinaryPlatforms.CPUID.test_cpu_feature(Base.BinaryPlatforms.CPUID.JL_AArch64_sve)) if Int === Int64 - @noinline vscale() = ccall("llvm.vscale.i64", llvmcall, Int64, ()) + @noinline vscale() = ccall("llvm.vscale.i64", llvmcall, Int64, ()) else - @noinline vscale() = ccall("llvm.vscale.i32", llvmcall, Int32, ()) + @noinline vscale() = ccall("llvm.vscale.i32", llvmcall, Int32, ()) end # TODO: find actually support SVE @@ -20,30 +22,30 @@ function _dynamic_register_size() end function _set_sve_vector_width!(bytes = _dynamic_register_size()) - @eval begin - register_size() = StaticInt{$bytes}() - simd_integer_register_size() = StaticInt{$bytes}() - end - nothing + @eval begin + register_size() = StaticInt{$bytes}() + simd_integer_register_size() = StaticInt{$bytes}() + end + nothing end if _has_aarch64_sve()# && !(Bool(has_feature(Val(:aarch64_sve)))) - has_feature(::Val{:aarch64_sve_cpuid}) = True() - _set_sve_vector_width!() + has_feature(::Val{:aarch64_sve_cpuid}) = True() + _set_sve_vector_width!() else - # has_feature(::Val{:aarch64_svejl}) = False() - register_size() = StaticInt{16}() - simd_integer_register_size() = StaticInt{16}() + # has_feature(::Val{:aarch64_svejl}) = False() + register_size() = StaticInt{16}() + simd_integer_register_size() = StaticInt{16}() end function reset_extra_features!() - drs = _dynamic_register_size() - register_size() ≠ drs && _set_sve_vector_width!(drs) - hassve = _has_aarch64_sve() - if hassve ≠ has_feature(Val(:aarch64_sve_cpuid)) - @eval has_feature(::Val{:aarch64_sve_cpuid}) = $(Expr(:call, hassve ? :True : :False)) - end + drs = _dynamic_register_size() + register_size() ≠ drs && _set_sve_vector_width!(drs) + hassve = _has_aarch64_sve() + if hassve ≠ has_feature(Val(:aarch64_sve_cpuid)) + @eval has_feature(::Val{:aarch64_sve_cpuid}) = $(Expr(:call, hassve ? :True : :False)) + end end fma_fast() = True() @@ -53,4 +55,3 @@ has_opmask_registers() = has_feature(Val(:aarch64_sve_cpuid)) fast_int64_to_double() = True() fast_half() = False() - diff --git a/src/cpu_info_generic.jl b/src/cpu_info_generic.jl index c0758f3..2c668a5 100644 --- a/src/cpu_info_generic.jl +++ b/src/cpu_info_generic.jl @@ -9,4 +9,3 @@ reset_extra_features!() = nothing fast_int64_to_double() = True() fast_half() = False() - diff --git a/src/cpu_info_x86.jl b/src/cpu_info_x86.jl index b0348c0..db71014 100644 --- a/src/cpu_info_x86.jl +++ b/src/cpu_info_x86.jl @@ -1,13 +1,8 @@ - fma_fast() = has_feature(Val(:x86_64_fma)) | has_feature(Val(:x86_64_fma4)) register_size() = ifelse( - has_feature(Val(:x86_64_avx512f)), - StaticInt{64}(), - ifelse( - has_feature(Val(:x86_64_avx)), - StaticInt{32}(), - StaticInt{16}() - ) + has_feature(Val(:x86_64_avx512f)), + StaticInt{64}(), + ifelse(has_feature(Val(:x86_64_avx)), StaticInt{32}(), StaticInt{16}()), ) const simd_integer_register_size = register_size # simd_integer_register_size() = ifelse( @@ -20,9 +15,10 @@ const simd_integer_register_size = register_size # ) # ) if Sys.ARCH === :i686 - register_count() = StaticInt{8}() + register_count() = StaticInt{8}() elseif Sys.ARCH === :x86_64 - register_count() = ifelse(has_feature(Val(:x86_64_avx512f)), StaticInt{32}(), StaticInt{16}()) + register_count() = + ifelse(has_feature(Val(:x86_64_avx512f)), StaticInt{32}(), StaticInt{16}()) end has_opmask_registers() = has_feature(Val(:x86_64_avx512f)) @@ -43,67 +39,67 @@ end end end -function make_generic(target) - if occursin("tigerlake", target) || occursin("znver4", target) || occursin("sapphirerapids", target) - # most feature-complete architectures we use - setfeaturetrue(:x86_64_avx512ifma) - setfeaturetrue(:x86_64_avx512vl) - setfeaturetrue(:x86_64_avx512bw) - setfeaturetrue(:x86_64_avx512dq) - setfeaturetrue(:x86_64_avx512f) - setfeaturetrue(:x86_64_avx2) - setfeaturetrue(:x86_64_bmi2) - setfeaturetrue(:x86_64_fma) - setfeaturetrue(:x86_64_avx) - elseif occursin("icelake", target) || occursin("skylake-avx512", target) || occursin("rocketlake", target) || occursin("cascadelake", target) - # no ifma, but avx512f and avx512dq - setfeaturefalse(:x86_64_avx512ifma) - setfeaturetrue(:x86_64_avx512vl) - setfeaturetrue(:x86_64_avx512bw) - setfeaturetrue(:x86_64_avx512dq) - setfeaturetrue(:x86_64_avx512f) - setfeaturetrue(:x86_64_avx2) - setfeaturetrue(:x86_64_bmi2) - setfeaturetrue(:x86_64_fma) - setfeaturetrue(:x86_64_avx) - elseif occursin("znver", target) || occursin("lake", target) || occursin("well", target) - # no avx512, but avx2, fma, and bmi2 - # znver tries to capture all zen < 4 - # lake tries to capture lakes we didn't single out above as having avx512 - # - setfeaturefalse(:x86_64_avx512ifma) - setfeaturefalse(:x86_64_avx512vl) - setfeaturefalse(:x86_64_avx512bw) - setfeaturefalse(:x86_64_avx512dq) - setfeaturefalse(:x86_64_avx512f) - setfeaturetrue(:x86_64_avx2) - setfeaturetrue(:x86_64_bmi2) - setfeaturetrue(:x86_64_fma) - setfeaturetrue(:x86_64_avx) - elseif occursin("ivybridge", target) || occursin("sandybridge", target) - # has avx, and that is about it we care about - setfeaturefalse(:x86_64_avx512ifma) - setfeaturefalse(:x86_64_avx512vl) - setfeaturefalse(:x86_64_avx512bw) - setfeaturefalse(:x86_64_avx512dq) - setfeaturefalse(:x86_64_avx512f) - setfeaturefalse(:x86_64_avx2) - setfeaturefalse(:x86_64_bmi2) - setfeaturefalse(:x86_64_fma) - setfeaturetrue(:x86_64_avx) - else - # hopefully we didn't miss something - # TODO: sapphire rapids - setfeaturefalse(:x86_64_avx512ifma) - setfeaturefalse(:x86_64_avx512vl) - setfeaturefalse(:x86_64_avx512bw) - setfeaturefalse(:x86_64_avx512dq) - setfeaturefalse(:x86_64_avx512f) - setfeaturefalse(:x86_64_avx2) - setfeaturefalse(:x86_64_bmi2) - setfeaturefalse(:x86_64_fma) - setfeaturefalse(:x86_64_avx) - end - return nothing -end - +# function make_generic(target) +# if occursin("tigerlake", target) || occursin("znver4", target) || occursin("sapphirerapids", target) +# # most feature-complete architectures we use +# setfeaturetrue(:x86_64_avx512ifma) +# setfeaturetrue(:x86_64_avx512vl) +# setfeaturetrue(:x86_64_avx512bw) +# setfeaturetrue(:x86_64_avx512dq) +# setfeaturetrue(:x86_64_avx512f) +# setfeaturetrue(:x86_64_avx2) +# setfeaturetrue(:x86_64_bmi2) +# setfeaturetrue(:x86_64_fma) +# setfeaturetrue(:x86_64_avx) +# elseif occursin("icelake", target) || occursin("skylake-avx512", target) || occursin("rocketlake", target) || occursin("cascadelake", target) +# # no ifma, but avx512f and avx512dq +# setfeaturefalse(:x86_64_avx512ifma) +# setfeaturetrue(:x86_64_avx512vl) +# setfeaturetrue(:x86_64_avx512bw) +# setfeaturetrue(:x86_64_avx512dq) +# setfeaturetrue(:x86_64_avx512f) +# setfeaturetrue(:x86_64_avx2) +# setfeaturetrue(:x86_64_bmi2) +# setfeaturetrue(:x86_64_fma) +# setfeaturetrue(:x86_64_avx) +# elseif occursin("znver", target) || occursin("lake", target) || occursin("well", target) +# # no avx512, but avx2, fma, and bmi2 +# # znver tries to capture all zen < 4 +# # lake tries to capture lakes we didn't single out above as having avx512 +# # +# setfeaturefalse(:x86_64_avx512ifma) +# setfeaturefalse(:x86_64_avx512vl) +# setfeaturefalse(:x86_64_avx512bw) +# setfeaturefalse(:x86_64_avx512dq) +# setfeaturefalse(:x86_64_avx512f) +# setfeaturetrue(:x86_64_avx2) +# setfeaturetrue(:x86_64_bmi2) +# setfeaturetrue(:x86_64_fma) +# setfeaturetrue(:x86_64_avx) +# elseif occursin("ivybridge", target) || occursin("sandybridge", target) +# # has avx, and that is about it we care about +# setfeaturefalse(:x86_64_avx512ifma) +# setfeaturefalse(:x86_64_avx512vl) +# setfeaturefalse(:x86_64_avx512bw) +# setfeaturefalse(:x86_64_avx512dq) +# setfeaturefalse(:x86_64_avx512f) +# setfeaturefalse(:x86_64_avx2) +# setfeaturefalse(:x86_64_bmi2) +# setfeaturefalse(:x86_64_fma) +# setfeaturetrue(:x86_64_avx) +# else +# # hopefully we didn't miss something +# # TODO: sapphire rapids +# setfeaturefalse(:x86_64_avx512ifma) +# setfeaturefalse(:x86_64_avx512vl) +# setfeaturefalse(:x86_64_avx512bw) +# setfeaturefalse(:x86_64_avx512dq) +# setfeaturefalse(:x86_64_avx512f) +# setfeaturefalse(:x86_64_avx2) +# setfeaturefalse(:x86_64_bmi2) +# setfeaturefalse(:x86_64_fma) +# setfeaturefalse(:x86_64_avx) +# end +# return nothing +# end +# diff --git a/src/pick_vector_width.jl b/src/pick_vector_width.jl index 0ebcbc9..5f52053 100644 --- a/src/pick_vector_width.jl +++ b/src/pick_vector_width.jl @@ -1,7 +1,9 @@ @static if isdefined(Base, Symbol("@constprop")) using Base: @constprop else - macro constprop(_, ex); esc(ex); end + macro constprop(_, ex) + esc(ex) + end end @generated function static_sizeof(::Type{T}) where {T} @@ -12,29 +14,53 @@ end smax(a::StaticInt, b::StaticInt) = ifelse(gt(a, b), a, b) smin(a::StaticInt, b::StaticInt) = ifelse(lt(a, b), a, b) -_pick_vector_width_float16(::StaticInt{RS}, ::True) where {RS} = StaticInt{RS}() ÷ StaticInt{2}() -_pick_vector_width_float16(::StaticInt{RS}, ::False) where {RS} = StaticInt{RS}() ÷ StaticInt{4}() -pick_vector_width(::Type{Float16}) = _pick_vector_width_float16(register_size(Float32), fast_half()) +_pick_vector_width_float16(::StaticInt{RS}, ::True) where {RS} = + StaticInt{RS}() ÷ StaticInt{2}() +_pick_vector_width_float16(::StaticInt{RS}, ::False) where {RS} = + StaticInt{RS}() ÷ StaticInt{4}() +pick_vector_width(::Type{Float16}) = + _pick_vector_width_float16(register_size(Float32), fast_half()) pick_vector_width(::Type{T}) where {T} = register_size(T) ÷ static_sizeof(T) -@inline @constprop :aggressive function _pick_vector_width(min_W, max_W, ::Type{T}, ::Type{S}, args::Vararg{Any,K}) where {K,S,T} +@inline @constprop :aggressive function _pick_vector_width( + min_W, + max_W, + ::Type{T}, + ::Type{S}, + args::Vararg{Any,K}, +) where {K,S,T} _max_W = smin(max_W, pick_vector_width(T)) _pick_vector_width(min_W, _max_W, S, args...) end -@inline @constprop :aggressive function _pick_vector_width(min_W, max_W, ::Type{T}) where {T} +@inline @constprop :aggressive function _pick_vector_width( + min_W, + max_W, + ::Type{T}, +) where {T} _max_W = smin(max_W, pick_vector_width(T)) smax(min_W, _max_W) end -@inline @constprop :aggressive function pick_vector_width(::Type{T}, ::Type{S}, args::Vararg{Any,K}) where {T,S,K} +@inline @constprop :aggressive function pick_vector_width( + ::Type{T}, + ::Type{S}, + args::Vararg{Any,K}, +) where {T,S,K} _pick_vector_width(One(), register_size(), T, S, args...) end -@inline @constprop :aggressive function pick_vector_width(::Union{Val{P},StaticInt{P}}, ::Type{T}, ::Type{S}, args::Vararg{Any,K}) where {P,T,S,K} +@inline @constprop :aggressive function pick_vector_width( + ::Union{Val{P},StaticInt{P}}, + ::Type{T}, + ::Type{S}, + args::Vararg{Any,K}, +) where {P,T,S,K} _pick_vector_width(One(), smin(register_size(), nextpow2(StaticInt{P}())), T, S, args...) end -@inline @constprop :aggressive function pick_vector_width(::Union{Val{P},StaticInt{P}}, ::Type{T}) where {P,T} +@inline @constprop :aggressive function pick_vector_width( + ::Union{Val{P},StaticInt{P}}, + ::Type{T}, +) where {P,T} _pick_vector_width(One(), smin(register_size(), nextpow2(StaticInt{P}())), T) end @inline function pick_vector_width_shift(args::Vararg{Any,K}) where {K} W = pick_vector_width(args...) W, intlog2(W) end - diff --git a/src/static_features.jl b/src/static_features.jl new file mode 100644 index 0000000..c6caac9 --- /dev/null +++ b/src/static_features.jl @@ -0,0 +1,28 @@ +has_feature(::Val{S}) where {S} = has_feature(S) + +@inline @generated function has_feature(my_feature::Symbol) + + features, features_cstring = feature_string() + + matches = map(features) do feature + fname, has = process_feature(feature) + val = has ? True() : False() + sname = Symbol(fname) + + :( + if my_feature == $(Meta.quot(sname)) + return $val + end + ) + end + + push!(matches, :(return False())) + + Libc.free(features_cstring) + + return quote + begin + $(matches...) + end + end +end