From 2773ec110c78dfc04dd4692f3e57df1a954b10af Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 26 Nov 2025 16:39:47 +0100 Subject: [PATCH 1/7] Enable users to map from GV to Julia value --- src/driver.jl | 4 ++-- src/irgen.jl | 6 ++++-- src/jlgen.jl | 26 +++++++++++++++++++------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/driver.jl b/src/driver.jl index 950ea272..3d966f8a 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -197,7 +197,7 @@ const __llvm_initialized = Ref(false) end @tracepoint "IR generation" begin - ir, compiled = irgen(job) + ir, compiled, gv_to_value = irgen(job) if job.config.entry_abi === :specfunc entry_fn = compiled[job.source].specfunc else @@ -422,7 +422,7 @@ const __llvm_initialized = Ref(false) @tracepoint "verification" verify(ir) end - return ir, (; entry, compiled) + return ir, (; entry, compiled, gv_to_value) end @locked function emit_asm(@nospecialize(job::CompilerJob), ir::LLVM.Module, diff --git a/src/irgen.jl b/src/irgen.jl index a7c36a60..50475668 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -1,7 +1,7 @@ # LLVM IR generation function irgen(@nospecialize(job::CompilerJob)) - mod, compiled = @tracepoint "emission" compile_method_instance(job) + mod, compiled, gv_to_value = @tracepoint "emission" compile_method_instance(job) if job.config.entry_abi === :specfunc entry_fn = compiled[job.source].specfunc else @@ -120,7 +120,9 @@ function irgen(@nospecialize(job::CompilerJob)) can_throw(job) || lower_throw!(mod) end - return mod, compiled + # TODO: should we filter out non-preserved_gvs? + + return mod, compiled, gv_to_value end diff --git a/src/jlgen.jl b/src/jlgen.jl index 330cf7f8..a54e67f2 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -786,15 +786,17 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) cache_gbl = nothing end + gv_to_value = Dict{String, Any}() + num_gvars = Ref{Csize_t}(0) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + C_NULL::Ptr{Cvoid})::Nothing + gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, num_gvars[]) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + gvs::Ptr{LLVM.API.LLVMOpaqueValue})::Nothing + if VERSION >= v"1.13.0-DEV.623" # Since Julia 1.13, the caller is responsible for initializing global variables that # point to global values or bindings with their address in memory. - num_gvars = Ref{Csize_t}(0) - @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, - C_NULL::Ptr{Cvoid})::Nothing - gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, num_gvars[]) - @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, - gvs::Ptr{LLVM.API.LLVMOpaqueValue})::Nothing inits = Vector{Ptr{Cvoid}}(undef, num_gvars[]) @ccall jl_get_llvm_gv_inits(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, inits::Ptr{Cvoid})::Nothing @@ -803,6 +805,16 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) gv = GlobalVariable(gv_ref) val = const_inttoptr(ConstantInt(Int64(init)), LLVM.PointerType()) initializer!(gv, val) + + # TODO: jl_binding_t? + gv_to_value[LLVM.name(gv)] = Base.unsafe_pointer_to_objref(val) + end + else + for gv_ref in gvs + gv = GlobalVariable(gv_ref) + val = reinterpret(Ptr{Cvoid}, initializer(gv)) + # TODO: jl_binding_t? + gv_to_value[LLVM.name(gv)] = Base.unsafe_pointer_to_objref(val) end end @@ -874,7 +886,7 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) # ensure that the requested method instance was compiled @assert haskey(compiled, job.source) - return llvm_mod, compiled + return llvm_mod, compiled, gv_to_value end # partially revert JuliaLangjulia#49391 From 1c22f00aa820cf1d7062442240354fe9b76ed05d Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 26 Nov 2025 18:05:59 +0100 Subject: [PATCH 2/7] reconstruct map the hardway before 1.13 --- src/jlgen.jl | 80 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 11 deletions(-) diff --git a/src/jlgen.jl b/src/jlgen.jl index a54e67f2..b359a037 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -672,6 +672,28 @@ end CompilationPolicyExtern = 1 end +const AL_N_INLINE = 29 +# mirrows arraylist_t +mutable struct ArrayList + len::Csize_t + max::Csize_t + items::Ptr{Ptr{Cvoid}} + _space::NTuple{AL_N_INLINE, Ptr{Cvoid}} + + function ArrayList() + list = new(0, AL_N_INLINE, Ptr{Ptr{Cvoid}}(C_NULL), ntuple(_->Ptr{Cvoid}(C_NULL), AL_N_INLINE)) + list.items = Base.pointer_from_objref(list) + fieldoffset(typeof(list), 4) + + finalizer(list) do list + if list.items != Base.pointer_from_objref(list) + fieldoffset(typeof(list), 4) + Libc.free(list.items) + end + end + return list + end +end + + """ precompile(job::CompilerJob) @@ -787,16 +809,17 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) end gv_to_value = Dict{String, Any}() - num_gvars = Ref{Csize_t}(0) - @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, - C_NULL::Ptr{Cvoid})::Nothing - gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, num_gvars[]) - @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, - gvs::Ptr{LLVM.API.LLVMOpaqueValue})::Nothing if VERSION >= v"1.13.0-DEV.623" # Since Julia 1.13, the caller is responsible for initializing global variables that # point to global values or bindings with their address in memory. + num_gvars = Ref{Csize_t}(0) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + C_NULL::Ptr{Cvoid})::Nothing + gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, num_gvars[]) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + gvs::Ptr{LLVM.API.LLVMOpaqueValue})::Nothing + inits = Vector{Ptr{Cvoid}}(undef, num_gvars[]) @ccall jl_get_llvm_gv_inits(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, inits::Ptr{Cvoid})::Nothing @@ -807,15 +830,50 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) initializer!(gv, val) # TODO: jl_binding_t? + @show LLVM.name(gv), val gv_to_value[LLVM.name(gv)] = Base.unsafe_pointer_to_objref(val) end else - for gv_ref in gvs - gv = GlobalVariable(gv_ref) - val = reinterpret(Ptr{Cvoid}, initializer(gv)) - # TODO: jl_binding_t? - gv_to_value[LLVM.name(gv)] = Base.unsafe_pointer_to_objref(val) + # Prior to this version of Julia we only had access to the values that the global variables + # were initialized with, so we have to match them up manually. + # get the global values + if VERSION >= v"1.12.0-DEV.1703" + num_gvars = Ref{Csize_t}(0) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + C_NULL::Ptr{Cvoid})::Nothing + gvalues = Vector{Ptr{Cvoid}}(undef, num_gvars[]) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + gvalues::Ptr{Cvoid})::Nothing + else + # Sigh on older version of Julia we have to use `arraylist_t` which doesn't have a Julia API. + gvars = ArrayList() + GC.@preserve gvars begin + p_gvars = Base.pointer_from_objref(gvars) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, p_gvars::Ptr{Cvoid})::Nothing + gvalues = Vector{Ptr{Cvoid}}(undef, gvars.len) + for i in 1:gvars.len + gvalues[i] = unsafe_load(gvars.items, i) + end + end + end + gvalues = Set(gvalues) + for gv in globals(llvm_mod) + init = LLVM.initializer(gv) + if init === nothing + continue + end + if init isa LLVM.ConstantExpr && opcode(init) == LLVM.API.LLVMIntToPtr + init = operands(init)[1] + end + if !(init isa LLVM.ConstantInt) + continue + end + ptr = reinterpret(Ptr{Cvoid}, convert(UInt, init)) + if ptr in gvalues + gv_to_value[LLVM.name(gv)] = Base.unsafe_pointer_to_objref(ptr) + end end + @assert length(gv_to_value) == length(gvalues) end if VERSION >= v"1.13.0-DEV.1120" From 213a066d84df8b60dee9ebd1f15756ea275edebb Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 26 Nov 2025 18:17:44 +0100 Subject: [PATCH 3/7] WIP: Cache --- src/execution.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/execution.jl b/src/execution.jl index 9b4940a7..40485a9d 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -253,6 +253,13 @@ end if !ondisk_hit && path !== nothing && disk_cache_enabled() @debug "Writing out on-disk cache" job path mkpath(dirname(path)) + if haskey(asm.meta, :gv_to_value) + # TODO: Serializer cannot handle Core.IntrinsicFunction + # We kinda want Julia to serialize the values in `gv_to_value` in the pkgimg and us just having to store an index + # for now we just empty them out + # We would need to remove the initializers from LLVM IR as well to be correct, and then link these in at runtime + empty!(asm.meta.gv_to_value) + end entry = DiskCacheEntry(src.specTypes, cfg, asm) # atomic write to disk From 91012beeb9f24635962886314eb13d27b5d02155 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 26 Nov 2025 18:19:50 +0100 Subject: [PATCH 4/7] fixup! WIP: Cache --- src/execution.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/execution.jl b/src/execution.jl index 40485a9d..94c4ebcb 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -253,12 +253,12 @@ end if !ondisk_hit && path !== nothing && disk_cache_enabled() @debug "Writing out on-disk cache" job path mkpath(dirname(path)) - if haskey(asm.meta, :gv_to_value) + if haskey(asm[2].meta, :gv_to_value) # TODO: Serializer cannot handle Core.IntrinsicFunction # We kinda want Julia to serialize the values in `gv_to_value` in the pkgimg and us just having to store an index # for now we just empty them out # We would need to remove the initializers from LLVM IR as well to be correct, and then link these in at runtime - empty!(asm.meta.gv_to_value) + empty!(asm[2].meta.gv_to_value) end entry = DiskCacheEntry(src.specTypes, cfg, asm) From acd969c52c71280a6984ff1ca69a5946911e91b3 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 26 Nov 2025 20:06:33 +0100 Subject: [PATCH 5/7] fixup! fixup! WIP: Cache --- src/execution.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/execution.jl b/src/execution.jl index 94c4ebcb..78d05736 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -253,12 +253,12 @@ end if !ondisk_hit && path !== nothing && disk_cache_enabled() @debug "Writing out on-disk cache" job path mkpath(dirname(path)) - if haskey(asm[2].meta, :gv_to_value) + if haskey(asm[2], :gv_to_value) # TODO: Serializer cannot handle Core.IntrinsicFunction # We kinda want Julia to serialize the values in `gv_to_value` in the pkgimg and us just having to store an index # for now we just empty them out # We would need to remove the initializers from LLVM IR as well to be correct, and then link these in at runtime - empty!(asm[2].meta.gv_to_value) + empty!(asm[2].gv_to_value) end entry = DiskCacheEntry(src.specTypes, cfg, asm) From bc3e9d1be54f9cf1a81a595b328780fe7b5e092c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 26 Nov 2025 20:34:52 +0100 Subject: [PATCH 6/7] don't initialize non-relocatible globals --- src/driver.jl | 12 ++++++++++++ src/irgen.jl | 7 +++++-- src/jlgen.jl | 20 ++++++++++---------- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/driver.jl b/src/driver.jl index 3d966f8a..e21409e0 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -256,6 +256,7 @@ const __llvm_initialized = Ref(false) dyn_ir, dyn_meta = codegen(:llvm, CompilerJob(dyn_job; config)) dyn_entry_fn = LLVM.name(dyn_meta.entry) merge!(compiled, dyn_meta.compiled) + merge!(gv_to_value, dyn_meta.gv_to_value) @assert context(dyn_ir) == context(ir) link!(ir, dyn_ir) changed = true @@ -319,6 +320,17 @@ const __llvm_initialized = Ref(false) end end + # TODO: Move this to somewhere else? + @tracepoint "Resolve relocations eagerly" for gv in globals(ir) + name = LLVM.name(gv) + init = get(gv_to_value, name, nothing) + if init !== nothing + @assert initializer(gv) === nothing + val = const_inttoptr(ConstantInt(reinterpret(UInt, init)), LLVM.PointerType()) + initializer!(gv, val) + end + end + @tracepoint "IR post-processing" begin # mark the kernel entry-point functions (optimization may need it) if job.config.kernel diff --git a/src/irgen.jl b/src/irgen.jl index 50475668..5149e9f0 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -55,6 +55,11 @@ function irgen(@nospecialize(job::CompilerJob)) new_name = safe_name(old_name) if old_name != new_name LLVM.name!(val, new_name) + val = get(gv_to_value, old_name, nothing) + if val !== nothing + delete!(gv_to_value, old_name) + gv_to_value[new_name] = val + end end end @@ -120,8 +125,6 @@ function irgen(@nospecialize(job::CompilerJob)) can_throw(job) || lower_throw!(mod) end - # TODO: should we filter out non-preserved_gvs? - return mod, compiled, gv_to_value end diff --git a/src/jlgen.jl b/src/jlgen.jl index b359a037..87786ee4 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -808,11 +808,12 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) cache_gbl = nothing end - gv_to_value = Dict{String, Any}() + gv_to_value = Dict{String, Ptr{Cvoid}}() + # The caller is responsible for initializing global variables that + # point to global values or bindings with their address in memory. + # For Julia < v"1.13" to enable relocation we strip out the initializers here. if VERSION >= v"1.13.0-DEV.623" - # Since Julia 1.13, the caller is responsible for initializing global variables that - # point to global values or bindings with their address in memory. num_gvars = Ref{Csize_t}(0) @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, C_NULL::Ptr{Cvoid})::Nothing @@ -826,12 +827,7 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) for (gv_ref, init) in zip(gvs, inits) gv = GlobalVariable(gv_ref) - val = const_inttoptr(ConstantInt(Int64(init)), LLVM.PointerType()) - initializer!(gv, val) - - # TODO: jl_binding_t? - @show LLVM.name(gv), val - gv_to_value[LLVM.name(gv)] = Base.unsafe_pointer_to_objref(val) + gv_to_value[LLVM.name(gv)] = init end else # Prior to this version of Julia we only had access to the values that the global variables @@ -870,11 +866,15 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) end ptr = reinterpret(Ptr{Cvoid}, convert(UInt, init)) if ptr in gvalues - gv_to_value[LLVM.name(gv)] = Base.unsafe_pointer_to_objref(ptr) + gv_to_value[LLVM.name(gv)] = ptr end + LLVM.initializer!(gv, nothing) end @assert length(gv_to_value) == length(gvalues) end + # It's valid to call Base.unsafe_pointer_to_objref on values(gv_to_value), + # but we may not be able to "easily" obtain the pointer back later. + # (Types, etc, disallow Base.pointer_from_objref on them.) if VERSION >= v"1.13.0-DEV.1120" # on sufficiently recent versions of Julia, we can query the CIs compiled. From 9973a9ecd2c324c6573e93598b3553816e57d8e3 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 26 Nov 2025 20:36:52 +0100 Subject: [PATCH 7/7] fixup! don't initialize non-relocatible globals --- src/driver.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/driver.jl b/src/driver.jl index e21409e0..6bc9717e 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -325,7 +325,10 @@ const __llvm_initialized = Ref(false) name = LLVM.name(gv) init = get(gv_to_value, name, nothing) if init !== nothing - @assert initializer(gv) === nothing + if initializer(gv) !== nothing + # TODO: How is this happening we should have stripped initializers earlier + @show string(initializer(gv)), init + end val = const_inttoptr(ConstantInt(reinterpret(UInt, init)), LLVM.PointerType()) initializer!(gv, val) end