From e9bf7bd68386ee7b3ff9ba80fb8e4d52ddb08bc1 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Mon, 25 Mar 2024 18:24:49 +0100 Subject: [PATCH] Deal with the global variable note via utils::globalVariables() --- R/measure_importance.R | 6 ------ R/min_depth_distribution.R | 4 ---- R/min_depth_interactions.R | 4 ---- R/utils.R | 23 +++++++++++++++++++++-- 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/R/measure_importance.R b/R/measure_importance.R index 0063e06..12a81dc 100644 --- a/R/measure_importance.R +++ b/R/measure_importance.R @@ -8,7 +8,6 @@ measure_min_depth <- function(min_depth_frame, mean_sample){ # Calculate the number of nodes split on each variable for a data frame with the whole forest measure_no_of_nodes <- function(forest_table){ - `split var` <- NULL frame <- dplyr::group_by(forest_table, variable = variable) %>% dplyr::summarize(no_of_nodes = dplyr::n()) frame <- as.data.frame(frame[!is.na(frame$variable),]) @@ -60,7 +59,6 @@ measure_vimp_ranger <- function(forest){ # Calculate the number of trees using each variable for splitting measure_no_of_trees <- function(min_depth_frame){ - variable <- NULL frame <- dplyr::group_by(min_depth_frame, variable) %>% dplyr::summarize(no_of_trees = n()) %>% as.data.frame() @@ -69,7 +67,6 @@ measure_no_of_trees <- function(min_depth_frame){ # Calculate the number of times each variable is split on the root node measure_times_a_root <- function(min_depth_frame){ - variable <- NULL frame <- min_depth_frame[min_depth_frame$minimal_depth == 0, ] %>% dplyr::group_by(variable) %>% dplyr::summarize(times_a_root = n()) %>% @@ -113,7 +110,6 @@ measure_importance <- function(forest, mean_sample = "top_trees", measures = NUL #' @importFrom data.table rbindlist #' @export measure_importance.randomForest <- function(forest, mean_sample = "top_trees", measures = NULL){ - tree <- NULL; `split var` <- NULL; depth <- NULL if(is.null(measures)){ if(forest$type %in% c("classification", "unsupervised")){ measures <- c("mean_min_depth", "no_of_nodes", "accuracy_decrease", @@ -177,7 +173,6 @@ measure_importance.randomForest <- function(forest, mean_sample = "top_trees", m #' @importFrom data.table rbindlist #' @export measure_importance.ranger <- function(forest, mean_sample = "top_trees", measures = NULL){ - tree <- NULL; splitvarName <- NULL; depth <- NULL if(is.null(measures)){ measures <- c("mean_min_depth", "no_of_nodes", forest$importance.mode, "no_of_trees", "times_a_root", "p_value") } @@ -296,7 +291,6 @@ plot_multi_way_importance <- function(importance_frame, x_measure = "mean_min_de y_measure = "times_a_root", size_measure = NULL, min_no_of_trees = 0, no_of_labels = 10, main = "Multi-way importance plot"){ - variable <- NULL if(any(c("randomForest", "ranger") %in% class(importance_frame))){ importance_frame <- measure_importance(importance_frame) } diff --git a/R/min_depth_distribution.R b/R/min_depth_distribution.R index da747e9..d35a6d3 100644 --- a/R/min_depth_distribution.R +++ b/R/min_depth_distribution.R @@ -14,7 +14,6 @@ #' @import dplyr #' @importFrom data.table rbindlist min_depth_distribution <- function(forest){ - tree <- NULL; `split var` <- NULL; depth <- NULL forest_table <- forest2df(forest) min_depth_frame <- dplyr::group_by(forest_table, tree, variable) %>% dplyr::summarize(minimal_depth = min(depth), .groups = "drop") @@ -24,7 +23,6 @@ min_depth_distribution <- function(forest){ # Count the trees in which each variable had a given minimal depth min_depth_count <- function(min_depth_frame){ - tree <- NULL; minimal_depth <- NULL; variable <- NULL mean_tree_depth <- dplyr::group_by(min_depth_frame, tree) %>% dplyr::summarize(depth = max(minimal_depth) + 1) %>% as.data.frame() @@ -45,7 +43,6 @@ min_depth_count <- function(min_depth_frame){ # Get a data frame with means of minimal depth calculated using sample = c("all_trees", "top_trees", "relevant_trees") get_min_depth_means <- function(min_depth_frame, min_depth_count_list, mean_sample){ - .SD <- NULL; variable <- NULL if(mean_sample == "all_trees"){ min_depth_count_list[[1]][is.na(min_depth_count_list[[1]]$minimal_depth), "minimal_depth"] <- min_depth_count_list[[3]] min_depth_means <- @@ -89,7 +86,6 @@ get_min_depth_means <- function(min_depth_frame, min_depth_count_list, mean_samp plot_min_depth_distribution <- function(min_depth_frame, k = 10, min_no_of_trees = 0, mean_sample = "top_trees", mean_scale = FALSE, mean_round = 2, main = "Distribution of minimal depth and its mean"){ - minimal_depth <- NULL; mean_minimal_depth_label <- NULL; mean_minimal_depth <- NULL if(any(c("randomForest", "ranger") %in% class(min_depth_frame))){ min_depth_frame <- min_depth_distribution(min_depth_frame) } diff --git a/R/min_depth_interactions.R b/R/min_depth_interactions.R index 6a2439c..75da9b3 100644 --- a/R/min_depth_interactions.R +++ b/R/min_depth_interactions.R @@ -1,6 +1,5 @@ # Calculate conditional depth in a tree with respect to all variables from vector vars conditional_depth <- function(frame, vars){ - `.SD` <- NULL; depth <- NULL; splitvarName <- NULL index <- data.table::as.data.table(frame)[ !is.na(variable), .SD[which.min(depth), "number"], by = variable ] @@ -27,7 +26,6 @@ conditional_depth <- function(frame, vars){ # Get a data frame with values of minimal depth conditional on selected variables for the whole forest min_depth_interactions_values <- function(forest, vars){ - `.` <- NULL; .SD <- NULL; tree <- NULL; `split var` <- NULL interactions_frame <- as.data.frame(forest2df(forest)) interactions_frame[vars] <- NA_real_ interactions_frame <- @@ -71,7 +69,6 @@ min_depth_interactions_values <- function(forest, vars){ #' @export min_depth_interactions <- function(forest, vars = important_variables(measure_importance(forest)), mean_sample = "top_trees", uncond_mean_sample = mean_sample){ - variable <- NULL; `.` <- NULL; tree <- NULL; `split var` <- NULL; depth <- NULL ntree <- ntrees(forest) min_depth_interactions_frame <- min_depth_interactions_values(forest, vars) mean_tree_depth <- min_depth_interactions_frame[[2]] @@ -145,7 +142,6 @@ min_depth_interactions <- function(forest, vars = important_variables(measure_im plot_min_depth_interactions <- function(interactions_frame, k = 30, main = paste0("Mean minimal depth for ", paste0(k, " most frequent interactions"))){ - mean_min_depth <- NULL; occurrences <- NULL; uncond_mean_min_depth <- NULL if(any(c("randomForest", "ranger") %in% class(interactions_frame))){ interactions_frame <- min_depth_interactions(interactions_frame) } diff --git a/R/utils.R b/R/utils.R index 71f7ff9..1d08b23 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,8 +21,6 @@ max_na <- function(x) { max(x, na.rm = TRUE) } -utils::globalVariables(c("prediction", "variable")) - # Depth of each node of a single tree. # The input is a matrix with left and right child nodes in 1:nrow(childs). tree_depth <- function(childs) { @@ -75,6 +73,27 @@ ntrees <- function(x) { } # Applies tree2df() to each tree and stacks the results +#' @importFrom data.table rbindlist forest2df <- function(x) { rbindlist(lapply(seq_len(ntrees(x)), function(i) tree2df(x, i))) } + +# Deal with the global variable note +utils::globalVariables( + c( + ".", + ".SD", + "depth", + "mean_min_depth", + "mean_minimal_depth", + "mean_minimal_depth_label", + "minimal_depth", + "occurrences", + "prediction", + "split var", + "splitvarName", + "tree", + "uncond_mean_min_depth", + "variable" + ) +)