Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions R/measure_importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ measure_min_depth <- function(min_depth_frame, mean_sample){

# Calculate the number of nodes split on each variable for a data frame with the whole forest
measure_no_of_nodes <- function(forest_table){
`split var` <- NULL
frame <- dplyr::group_by(forest_table, variable = variable) %>%
dplyr::summarize(no_of_nodes = dplyr::n())
frame <- as.data.frame(frame[!is.na(frame$variable),])
Expand Down Expand Up @@ -60,7 +59,6 @@ measure_vimp_ranger <- function(forest){

# Calculate the number of trees using each variable for splitting
measure_no_of_trees <- function(min_depth_frame){
variable <- NULL
frame <- dplyr::group_by(min_depth_frame, variable) %>%
dplyr::summarize(no_of_trees = n()) %>%
as.data.frame()
Expand All @@ -69,7 +67,6 @@ measure_no_of_trees <- function(min_depth_frame){

# Calculate the number of times each variable is split on the root node
measure_times_a_root <- function(min_depth_frame){
variable <- NULL
frame <- min_depth_frame[min_depth_frame$minimal_depth == 0, ] %>%
dplyr::group_by(variable) %>%
dplyr::summarize(times_a_root = n()) %>%
Expand Down Expand Up @@ -113,7 +110,6 @@ measure_importance <- function(forest, mean_sample = "top_trees", measures = NUL
#' @importFrom data.table rbindlist
#' @export
measure_importance.randomForest <- function(forest, mean_sample = "top_trees", measures = NULL){
tree <- NULL; `split var` <- NULL; depth <- NULL
if(is.null(measures)){
if(forest$type %in% c("classification", "unsupervised")){
measures <- c("mean_min_depth", "no_of_nodes", "accuracy_decrease",
Expand Down Expand Up @@ -177,7 +173,6 @@ measure_importance.randomForest <- function(forest, mean_sample = "top_trees", m
#' @importFrom data.table rbindlist
#' @export
measure_importance.ranger <- function(forest, mean_sample = "top_trees", measures = NULL){
tree <- NULL; splitvarName <- NULL; depth <- NULL
if(is.null(measures)){
measures <- c("mean_min_depth", "no_of_nodes", forest$importance.mode, "no_of_trees", "times_a_root", "p_value")
}
Expand Down Expand Up @@ -296,7 +291,6 @@ plot_multi_way_importance <- function(importance_frame, x_measure = "mean_min_de
y_measure = "times_a_root", size_measure = NULL,
min_no_of_trees = 0, no_of_labels = 10,
main = "Multi-way importance plot"){
variable <- NULL
if(any(c("randomForest", "ranger") %in% class(importance_frame))){
importance_frame <- measure_importance(importance_frame)
}
Expand Down
4 changes: 0 additions & 4 deletions R/min_depth_distribution.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#' @import dplyr
#' @importFrom data.table rbindlist
min_depth_distribution <- function(forest){
tree <- NULL; `split var` <- NULL; depth <- NULL
forest_table <- forest2df(forest)
min_depth_frame <- dplyr::group_by(forest_table, tree, variable) %>%
dplyr::summarize(minimal_depth = min(depth), .groups = "drop")
Expand All @@ -24,7 +23,6 @@ min_depth_distribution <- function(forest){

# Count the trees in which each variable had a given minimal depth
min_depth_count <- function(min_depth_frame){
tree <- NULL; minimal_depth <- NULL; variable <- NULL
mean_tree_depth <- dplyr::group_by(min_depth_frame, tree) %>%
dplyr::summarize(depth = max(minimal_depth) + 1) %>%
as.data.frame()
Expand All @@ -45,7 +43,6 @@ min_depth_count <- function(min_depth_frame){

# Get a data frame with means of minimal depth calculated using sample = c("all_trees", "top_trees", "relevant_trees")
get_min_depth_means <- function(min_depth_frame, min_depth_count_list, mean_sample){
.SD <- NULL; variable <- NULL
if(mean_sample == "all_trees"){
min_depth_count_list[[1]][is.na(min_depth_count_list[[1]]$minimal_depth), "minimal_depth"] <- min_depth_count_list[[3]]
min_depth_means <-
Expand Down Expand Up @@ -89,7 +86,6 @@ get_min_depth_means <- function(min_depth_frame, min_depth_count_list, mean_samp
plot_min_depth_distribution <- function(min_depth_frame, k = 10, min_no_of_trees = 0,
mean_sample = "top_trees", mean_scale = FALSE, mean_round = 2,
main = "Distribution of minimal depth and its mean"){
minimal_depth <- NULL; mean_minimal_depth_label <- NULL; mean_minimal_depth <- NULL
if(any(c("randomForest", "ranger") %in% class(min_depth_frame))){
min_depth_frame <- min_depth_distribution(min_depth_frame)
}
Expand Down
4 changes: 0 additions & 4 deletions R/min_depth_interactions.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Calculate conditional depth in a tree with respect to all variables from vector vars
conditional_depth <- function(frame, vars){
`.SD` <- NULL; depth <- NULL; splitvarName <- NULL
index <- data.table::as.data.table(frame)[
!is.na(variable), .SD[which.min(depth), "number"], by = variable
]
Expand All @@ -27,7 +26,6 @@ conditional_depth <- function(frame, vars){

# Get a data frame with values of minimal depth conditional on selected variables for the whole forest
min_depth_interactions_values <- function(forest, vars){
`.` <- NULL; .SD <- NULL; tree <- NULL; `split var` <- NULL
interactions_frame <- as.data.frame(forest2df(forest))
interactions_frame[vars] <- NA_real_
interactions_frame <-
Expand Down Expand Up @@ -71,7 +69,6 @@ min_depth_interactions_values <- function(forest, vars){
#' @export
min_depth_interactions <- function(forest, vars = important_variables(measure_importance(forest)),
mean_sample = "top_trees", uncond_mean_sample = mean_sample){
variable <- NULL; `.` <- NULL; tree <- NULL; `split var` <- NULL; depth <- NULL
ntree <- ntrees(forest)
min_depth_interactions_frame <- min_depth_interactions_values(forest, vars)
mean_tree_depth <- min_depth_interactions_frame[[2]]
Expand Down Expand Up @@ -145,7 +142,6 @@ min_depth_interactions <- function(forest, vars = important_variables(measure_im
plot_min_depth_interactions <- function(interactions_frame, k = 30,
main = paste0("Mean minimal depth for ",
paste0(k, " most frequent interactions"))){
mean_min_depth <- NULL; occurrences <- NULL; uncond_mean_min_depth <- NULL
if(any(c("randomForest", "ranger") %in% class(interactions_frame))){
interactions_frame <- min_depth_interactions(interactions_frame)
}
Expand Down
23 changes: 21 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ max_na <- function(x) {
max(x, na.rm = TRUE)
}

utils::globalVariables(c("prediction", "variable"))

# Depth of each node of a single tree.
# The input is a matrix with left and right child nodes in 1:nrow(childs).
tree_depth <- function(childs) {
Expand Down Expand Up @@ -75,6 +73,27 @@ ntrees <- function(x) {
}

# Applies tree2df() to each tree and stacks the results
#' @importFrom data.table rbindlist
forest2df <- function(x) {
rbindlist(lapply(seq_len(ntrees(x)), function(i) tree2df(x, i)))
}

# Deal with the global variable note
utils::globalVariables(
c(
".",
".SD",
"depth",
"mean_min_depth",
"mean_minimal_depth",
"mean_minimal_depth_label",
"minimal_depth",
"occurrences",
"prediction",
"split var",
"splitvarName",
"tree",
"uncond_mean_min_depth",
"variable"
)
)