Skip to content

Commit 722e041

Browse files
Format .jl files
1 parent d5ee502 commit 722e041

File tree

8 files changed

+389
-209
lines changed

8 files changed

+389
-209
lines changed

docs/make.jl

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,22 @@ using OMOPCommonDataModel
66
using FeatureTransforms
77
using DuckDB
88

9-
DocMeta.setdocmeta!(HealthBase, :DocTestSetup, :(using HealthBase, Tables); recursive = true)
9+
DocMeta.setdocmeta!(
10+
HealthBase,
11+
:DocTestSetup,
12+
:(using HealthBase, Tables);
13+
recursive = true,
14+
)
1015

1116
makedocs(;
12-
modules = [
13-
HealthBase,
14-
isdefined(Base, :get_extension) ?
15-
Base.get_extension(HealthBase, :HealthBaseOMOPCDMExt) : HealthBase.HealthBaseOMOPCDMExt
17+
modules = [
18+
HealthBase,
19+
isdefined(Base, :get_extension) ?
20+
Base.get_extension(HealthBase, :HealthBaseOMOPCDMExt) :
21+
HealthBase.HealthBaseOMOPCDMExt,
1622
],
1723
checkdocs = :none,
18-
authors = "Jacob S. Zelko, Dilum Aluthge and contributors",
24+
authors = "Jacob S. Zelko, Dilum Aluthge and contributors",
1925
repo = "https://github.com/JuliaHealth/HealthBase.jl/blob/{commit}{path}#{line}",
2026
sitename = "HealthBase.jl",
2127
format = Documenter.HTML(;
@@ -26,12 +32,10 @@ modules = [
2632
pages = [
2733
"Home" => "index.md",
2834
"Quickstart" => "quickstart.md",
29-
3035
"Workflow Guides" => [
3136
"Observational Template Workflow" => "observational_template_workflow.md",
3237
"OMOP CDM Workflow" => "OMOPCDMWorkflow.md",
3338
],
34-
3539
"HealthTable System" => [
3640
"HealthTable: General Tables.jl Interface" => "HealthTableGeneral.md",
3741
"HealthTable: OMOP CDM Support" => "HealthTableOMOPCDM.md",

ext/HealthBaseDrWatsonExt.jl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,12 @@ The study environment remains activated for you to immediately add packages.
193193
julia> initialize_study("Cardiooncology", "Jacob S. Zelko, Jakub Mitura"; github_name = "TheCedarPrince", template=:observational)
194194
```
195195
"""
196-
function HealthBase.initialize_study(path, authors = nothing; github_name = "PutYourGitHubNameHere", template::Symbol = :default)
196+
function HealthBase.initialize_study(
197+
path,
198+
authors = nothing;
199+
github_name = "PutYourGitHubNameHere",
200+
template::Symbol = :default,
201+
)
197202
tpl = study_template(template).template
198203
ftg = study_template(template).folders_to_gitignore
199204

@@ -222,7 +227,7 @@ function HealthBase.initialize_study(path, authors = nothing; github_name = "Put
222227
folders_to_gitignore = ftg,
223228
force = true,
224229
add_docs = true,
225-
github_name = github_name
230+
github_name = github_name,
226231
)
227232
cd(path)
228233
end

ext/HealthBaseOMOPCDMExt.jl

Lines changed: 92 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,35 @@ using OMOPCommonDataModel
66
using Serialization
77
using InlineStrings
88
using Dates
9-
import FeatureTransforms:
10-
OneHotEncoding, apply_append
11-
using DuckDB
9+
import FeatureTransforms: OneHotEncoding, apply_append
10+
using DuckDB
1211
using DBInterface: execute
1312

1413
# NOTE: In the future, replace this with OMOP CDM version info directly from OMOPCommonDataModel.jl dependencies.
1514
const OMOPCDM_VERSIONS = deserialize(joinpath(@__DIR__, "..", "assets", "version_info"))
1615

1716
# Mapping OMOP CDM datatypes to Julia types
1817
const DATATYPE_MAP = Dict(
19-
"integer" => Int64, "Integer" => Int64, "bigint" => Int64,
18+
"integer" => Int64,
19+
"Integer" => Int64,
20+
"bigint" => Int64,
2021
"float" => Float64,
21-
"date" => Date, "datetime" => DateTime,
22-
"varchar(1)" => String, "varchar(2)" => String, "varchar(3)" => String,
23-
"varchar(9)" => String, "varchar(10)" => String, "varchar(20)" => String,
24-
"varchar(25)" => String, "varchar(50)" => String, "varchar(80)" => String,
25-
"varchar(250)" => String, "varchar(255)" => String, "varchar(1000)" => String,
26-
"varchar(2000)" => String, "varchar(MAX)" => String
22+
"date" => Date,
23+
"datetime" => DateTime,
24+
"varchar(1)" => String,
25+
"varchar(2)" => String,
26+
"varchar(3)" => String,
27+
"varchar(9)" => String,
28+
"varchar(10)" => String,
29+
"varchar(20)" => String,
30+
"varchar(25)" => String,
31+
"varchar(50)" => String,
32+
"varchar(80)" => String,
33+
"varchar(250)" => String,
34+
"varchar(255)" => String,
35+
"varchar(1000)" => String,
36+
"varchar(2000)" => String,
37+
"varchar(MAX)" => String,
2738
)
2839

2940
function __init__()
@@ -104,23 +115,28 @@ ht = HealthTable(df; disable_type_enforcement = true)
104115
Use disable_type_enforcement=true if you're exploring or cleaning data but for modeling or analysis, validated types are strongly recommended.
105116
"""
106117
function HealthBase.HealthTable(
107-
df::DataFrame;
108-
omop_cdm_version::String="v5.4.0",
109-
disable_type_enforcement=false,
110-
collect_errors=true
118+
df::DataFrame;
119+
omop_cdm_version::String = "v5.4.0",
120+
disable_type_enforcement = false,
121+
collect_errors = true,
111122
)
112123
if !haskey(OMOPCDM_VERSIONS, omop_cdm_version)
113-
throw(ArgumentError("OMOP CDM version '$(omop_cdm_version)' is not supported. Available versions: $(keys(OMOPCDM_VERSIONS))"))
124+
throw(
125+
ArgumentError(
126+
"OMOP CDM version '$(omop_cdm_version)' is not supported. Available versions: $(keys(OMOPCDM_VERSIONS))",
127+
),
128+
)
114129
end
115130

116131
omop_fields = OMOPCDM_VERSIONS[omop_cdm_version][:fields]
117132
@assert !isempty(omop_fields) "OMOP CDM version $(omop_cdm_version) has no registered fields."
118-
failed_columns = Vector{NamedTuple{(:colname, :type, :expected), Tuple{String, Any, Any}}}()
133+
failed_columns =
134+
Vector{NamedTuple{(:colname, :type, :expected),Tuple{String,Any,Any}}}()
119135
extra_columns = String[]
120136

121137
for col in names(df)
122138
col_symbol = Symbol(col)
123-
139+
124140
if !haskey(omop_fields, col_symbol)
125141
push!(extra_columns, col)
126142
continue
@@ -131,22 +147,43 @@ function HealthBase.HealthTable(
131147

132148
if !haskey(fieldinfo, :cdmDatatype)
133149
if !collect_errors
134-
throw(ArgumentError("Column '$(col)' is missing :cdmDatatype information in the schema."))
150+
throw(
151+
ArgumentError(
152+
"Column '$(col)' is missing :cdmDatatype information in the schema.",
153+
),
154+
)
135155
end
136-
push!(failed_columns, (colname=col, type=actual_type, expected="<missing from schema>"))
156+
push!(
157+
failed_columns,
158+
(colname = col, type = actual_type, expected = "<missing from schema>"),
159+
)
137160
else
138161
expected_string = fieldinfo[:cdmDatatype]
139162

140163
if !haskey(DATATYPE_MAP, expected_string)
141-
push!(failed_columns, (colname=col, type=actual_type, expected="Unrecognized OMOP datatype: $(expected_string)"))
164+
push!(
165+
failed_columns,
166+
(
167+
colname = col,
168+
type = actual_type,
169+
expected = "Unrecognized OMOP datatype: $(expected_string)",
170+
),
171+
)
142172
else
143173
expected_type = DATATYPE_MAP[expected_string]
144174

145-
if !(actual_type <: Union{expected_type, Missing})
175+
if !(actual_type <: Union{expected_type,Missing})
146176
if !collect_errors
147-
throw(ArgumentError("Column '$(col)' has type $(actual_type), but expected a subtype of $(expected_type)."))
177+
throw(
178+
ArgumentError(
179+
"Column '$(col)' has type $(actual_type), but expected a subtype of $(expected_type).",
180+
),
181+
)
148182
end
149-
push!(failed_columns, (colname=col, type=actual_type, expected=expected_type))
183+
push!(
184+
failed_columns,
185+
(colname = col, type = actual_type, expected = expected_type),
186+
)
150187
end
151188
end
152189

@@ -157,18 +194,28 @@ function HealthBase.HealthTable(
157194
end
158195
end
159196
end
160-
197+
161198
validation_msgs = String[]
162199

163200
if !isempty(failed_columns)
164-
error_details = join(["Column '$(err.colname)': has type $(err.type), expected $(err.expected)" for err in failed_columns], "\n")
165-
push!(validation_msgs, "OMOP CDM type validation failed for the following columns:\n" * error_details)
201+
error_details = join(
202+
[
203+
"Column '$(err.colname)': has type $(err.type), expected $(err.expected)"
204+
for err in failed_columns
205+
],
206+
"\n",
207+
)
208+
push!(
209+
validation_msgs,
210+
"OMOP CDM type validation failed for the following columns:\n" * error_details,
211+
)
166212
end
167213

168214
if !isempty(validation_msgs)
169215
full_message = join(validation_msgs, "\n\n") * "\n"
170216
if disable_type_enforcement
171-
@warn full_message * "\nType enforcement is disabled. Unexpected behavior may occur."
217+
@warn full_message *
218+
"\nType enforcement is disabled. Unexpected behavior may occur."
172219
else
173220
throw(ArgumentError(full_message))
174221
end
@@ -212,7 +259,7 @@ function HealthBase.one_hot_encode(
212259
ht::HealthTable;
213260
cols::Vector{Symbol},
214261
drop_original::Bool = true,
215-
return_features_only::Bool = false
262+
return_features_only::Bool = false,
216263
)
217264
df = copy(ht.source)
218265
missing = setdiff(cols, Symbol.(names(df)))
@@ -227,7 +274,7 @@ function HealthBase.one_hot_encode(
227274
cats = unique(skipmissing(df[!, col]))
228275
enc = OneHotEncoding(cats)
229276
header = Symbol.(string(col, "_", c) for c in cats)
230-
df = apply_append(df, enc; cols=[col], header=header)
277+
df = apply_append(df, enc; cols = [col], header = header)
231278
end
232279

233280
drop_original && select!(df, Not(cols))
@@ -266,13 +313,13 @@ ht_mapped = map_concepts(ht, :gender_concept_id, "gender_name", conn; schema = "
266313
"""
267314
function HealthBase.map_concepts(
268315
ht::HealthTable,
269-
cols::Union{Symbol, Vector{Symbol}},
316+
cols::Union{Symbol,Vector{Symbol}},
270317
conn::DuckDB.DB;
271-
new_cols::Union{Nothing, String, Vector{String}} = nothing,
318+
new_cols::Union{Nothing,String,Vector{String}} = nothing,
272319
drop_original::Bool = false,
273320
suffix::String = "_mapped",
274321
concept_table::String = "concept",
275-
schema::String = "main"
322+
schema::String = "main",
276323
)
277324
df = copy(ht.source)
278325
_map_concepts!(df, cols, conn; new_cols, drop_original, suffix, concept_table, schema)
@@ -309,13 +356,13 @@ map_concepts!(ht, :gender_concept_id, conn; new_cols="gender_name", schema="dbt_
309356
"""
310357
function HealthBase.map_concepts!(
311358
ht::HealthTable,
312-
cols::Union{Symbol, Vector{Symbol}},
359+
cols::Union{Symbol,Vector{Symbol}},
313360
conn::DuckDB.DB;
314-
new_cols::Union{Nothing, String, Vector{String}} = nothing,
361+
new_cols::Union{Nothing,String,Vector{String}} = nothing,
315362
drop_original::Bool = false,
316363
suffix::String = "_mapped",
317364
concept_table::String = "concept",
318-
schema::String = "main"
365+
schema::String = "main",
319366
)
320367
_map_concepts!(
321368
ht.source,
@@ -325,7 +372,7 @@ function HealthBase.map_concepts!(
325372
drop_original = drop_original,
326373
suffix = suffix,
327374
concept_table = concept_table,
328-
schema = schema
375+
schema = schema,
329376
)
330377
return ht
331378
end
@@ -351,13 +398,13 @@ Low-level internal helper to map concept IDs to names directly on a `DataFrame`.
351398
"""
352399
function _map_concepts!(
353400
df::DataFrame,
354-
cols::Union{Symbol, Vector{Symbol}},
401+
cols::Union{Symbol,Vector{Symbol}},
355402
conn::DuckDB.DB;
356-
new_cols::Union{Nothing, String, Vector{String}} = nothing,
403+
new_cols::Union{Nothing,String,Vector{String}} = nothing,
357404
drop_original::Bool = false,
358405
suffix::String = "_mapped",
359406
concept_table::String = "concept",
360-
schema::String = "main"
407+
schema::String = "main",
361408
)
362409
cols = isa(cols, Symbol) ? [cols] : cols
363410

@@ -391,7 +438,10 @@ function _map_concepts!(
391438
continue
392439
end
393440

394-
mapping = Dict((cid => cname) for (cid, cname) in zip(result_df.concept_id, result_df.concept_name))
441+
mapping = Dict(
442+
(cid => cname) for
443+
(cid, cname) in zip(result_df.concept_id, result_df.concept_name)
444+
)
395445
df[!, new_col] = map(x -> get(mapping, x, missing), df[!, col])
396446

397447
if drop_original
@@ -437,7 +487,8 @@ function HealthBase.apply_vocabulary_compression(
437487
counts = combine(groupby(df, col), nrow => :freq)
438488
to_compress = counts[counts.freq .< min_freq, col]
439489
if !isempty(to_compress)
440-
df[!, dest_col] = map(x -> in(x, to_compress) ? other_label : string(x), df[!, col])
490+
df[!, dest_col] =
491+
map(x -> in(x, to_compress) ? other_label : string(x), df[!, col])
441492
end
442493
end
443494

@@ -449,4 +500,3 @@ function HealthBase.apply_vocabulary_compression(
449500
end
450501

451502
end
452-

src/healthtable_interface.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,4 +119,4 @@ This function is part of the Tables.jl interface. It allows other packages to co
119119
"""
120120
Tables.materializer(::Type{<:HealthTable}) = DataFrame
121121

122-
export HealthTable
122+
export HealthTable

test/drwatsonext.jl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ cd("..")
99
rm("test_study", recursive = true, force = true)
1010

1111
mktemp() do fname, f
12-
write(f, "X")
13-
seek(f, 0)
14-
redirect_stdin(f) do
12+
write(f, "X")
13+
seek(f, 0)
14+
redirect_stdin(f) do
1515
@test initialize_study(path; template = :llm) == nothing
16-
end
16+
end
1717
end
1818

1919
cd("..")
@@ -46,7 +46,11 @@ quickactivate(path)
4646
cd("..")
4747
rm("test_study", recursive = true, force = true)
4848

49-
@test_throws ErrorException initialize_study(path; github_name = github_name, template = :foobar)
49+
@test_throws ErrorException initialize_study(
50+
path;
51+
github_name = github_name,
52+
template = :foobar,
53+
)
5054

5155
STUDY_TEMPLATES = Base.get_extension(HealthBase, :HealthBaseDrWatsonExt).STUDY_TEMPLATES
5256

0 commit comments

Comments
 (0)