From 8c27b6d8e28ea80ed58c03ad8c7492e7b689ae5d Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:52:40 -0400 Subject: [PATCH 01/16] Update grn.R added functionality to escape special characters within gene names --- R/grn.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/grn.R b/R/grn.R index b4a30ee..e14d41c 100644 --- a/R/grn.R +++ b/R/grn.R @@ -335,6 +335,8 @@ fit_grn_models.GRNData <- function( } peak_name <- str_replace_all(p, '-', '_') tf_name <- str_replace_all(peak_tfs, '-', '_') + #some gene names contain special characters + tf_name <- str_replace_all(tf_name, "([:\\(\\)])", "\\\\\\1") formula_str <- paste( paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') return(list(tfs=peak_tfs, frml=formula_str)) @@ -346,6 +348,7 @@ fit_grn_models.GRNData <- function( } target <- str_replace_all(g, '-', '_') + target <- str_replace_all(target, "([:\\(\\)])", "\\\\\\1") model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) ) @@ -356,7 +359,9 @@ fit_grn_models.GRNData <- function( gene_x <- gene_data[gene_groups, union(g, gene_tfs), drop=FALSE] model_mat <- as.data.frame(cbind(gene_x, peak_x)) if (scale) model_mat <- as.data.frame(scale(as.matrix(model_mat))) + colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') + colnames(model_mat) <- str_replace_all(colnames(model_mat), "([:\\(\\)])", "\\\\\\1") log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( @@ -446,15 +451,13 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ select(-region_, -tf_) %>% mutate( region = str_replace_all(region, '_', '-'), - tf = str_replace_all(tf, '_', '-'), - target = str_replace_all(target, '_', '-') + tf = str_replace_all(tf, "\\\\([:\\(\\)])", "\\1") %>% str_replace_all('_', '-'), + target = str_replace_all(target, "\\\\([:\\(\\)])", "\\1") %>% str_replace_all('_', '-') ) %>% select(tf, target, region, term, everything()) return(coefs_use) } - - #' Find TF modules in regulatory network #' #' @import tidygraph From 5f89379864b5941e5fe5eeefd01311e0768b1a1d Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 15:06:04 -0400 Subject: [PATCH 02/16] Update grn.R fix attempt #2 for the special characters non-functionality --- R/grn.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/grn.R b/R/grn.R index e14d41c..eb98497 100644 --- a/R/grn.R +++ b/R/grn.R @@ -336,7 +336,7 @@ fit_grn_models.GRNData <- function( peak_name <- str_replace_all(p, '-', '_') tf_name <- str_replace_all(peak_tfs, '-', '_') #some gene names contain special characters - tf_name <- str_replace_all(tf_name, "([:\\(\\)])", "\\\\\\1") + tf_name <- str_replace_all(tf_name, "\\\\([:\\(\\)])", "\\1") formula_str <- paste( paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') return(list(tfs=peak_tfs, frml=formula_str)) @@ -348,7 +348,7 @@ fit_grn_models.GRNData <- function( } target <- str_replace_all(g, '-', '_') - target <- str_replace_all(target, "([:\\(\\)])", "\\\\\\1") + target <- str_replace_all(target, "\\\\([:\\(\\)])", "\\1") model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) ) @@ -361,7 +361,7 @@ fit_grn_models.GRNData <- function( if (scale) model_mat <- as.data.frame(scale(as.matrix(model_mat))) colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') - colnames(model_mat) <- str_replace_all(colnames(model_mat), "([:\\(\\)])", "\\\\\\1") + colnames(model_mat) <- str_replace_all(colnames(model_mat), "\\\\([:\\(\\)])", "\\1") log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( @@ -451,12 +451,13 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ select(-region_, -tf_) %>% mutate( region = str_replace_all(region, '_', '-'), - tf = str_replace_all(tf, "\\\\([:\\(\\)])", "\\1") %>% str_replace_all('_', '-'), - target = str_replace_all(target, "\\\\([:\\(\\)])", "\\1") %>% str_replace_all('_', '-') + tf = str_replace_all(tf, "([:\\(\\)])", "\\\\\\1") %>% str_replace_all('_', '-'), + target = str_replace_all(target, "([:\\(\\)])", "\\\\\\1") %>% str_replace_all('_', '-') ) %>% select(tf, target, region, term, everything()) return(coefs_use) } + #' Find TF modules in regulatory network #' From 6654e416e2f99e046b938aad2e55e3f3b589d1bd Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 15:25:36 -0400 Subject: [PATCH 03/16] Update grn.R --- R/grn.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/R/grn.R b/R/grn.R index eb98497..740f5d9 100644 --- a/R/grn.R +++ b/R/grn.R @@ -334,9 +334,7 @@ fit_grn_models.GRNData <- function( return() } peak_name <- str_replace_all(p, '-', '_') - tf_name <- str_replace_all(peak_tfs, '-', '_') - #some gene names contain special characters - tf_name <- str_replace_all(tf_name, "\\\\([:\\(\\)])", "\\1") + tf_name <- str_replace_all(peak_tfs, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") #some gene names contain special characters formula_str <- paste( paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') return(list(tfs=peak_tfs, frml=formula_str)) @@ -347,8 +345,7 @@ fit_grn_models.GRNData <- function( return() } - target <- str_replace_all(g, '-', '_') - target <- str_replace_all(target, "\\\\([:\\(\\)])", "\\1") + target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) ) @@ -360,8 +357,7 @@ fit_grn_models.GRNData <- function( model_mat <- as.data.frame(cbind(gene_x, peak_x)) if (scale) model_mat <- as.data.frame(scale(as.matrix(model_mat))) - colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') - colnames(model_mat) <- str_replace_all(colnames(model_mat), "\\\\([:\\(\\)])", "\\1") + colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( @@ -451,8 +447,8 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ select(-region_, -tf_) %>% mutate( region = str_replace_all(region, '_', '-'), - tf = str_replace_all(tf, "([:\\(\\)])", "\\\\\\1") %>% str_replace_all('_', '-'), - target = str_replace_all(target, "([:\\(\\)])", "\\\\\\1") %>% str_replace_all('_', '-') + tf = str_replace_all(tf, '_', '-'), + target = str_replace_all(target, '_', '-') ) %>% select(tf, target, region, term, everything()) return(coefs_use) From e54889865e09e8e194879fc6348715de15dd1b0c Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 15:35:18 -0400 Subject: [PATCH 04/16] Update grn.R --- R/grn.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/grn.R b/R/grn.R index 740f5d9..59da8a8 100644 --- a/R/grn.R +++ b/R/grn.R @@ -334,7 +334,7 @@ fit_grn_models.GRNData <- function( return() } peak_name <- str_replace_all(p, '-', '_') - tf_name <- str_replace_all(peak_tfs, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") #some gene names contain special characters + tf_name <- str_replace_all(peak_tfs, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\\\0") #some gene names contain special characters formula_str <- paste( paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') return(list(tfs=peak_tfs, frml=formula_str)) @@ -345,11 +345,11 @@ fit_grn_models.GRNData <- function( return() } - target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") + target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\\\0") model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) ) - + # Get expression data nfeats <- sum(map_dbl(frml_string, function(x) length(x$tfs))) gene_tfs <- purrr::reduce(map(frml_string, function(x) x$tfs), union) @@ -357,8 +357,8 @@ fit_grn_models.GRNData <- function( model_mat <- as.data.frame(cbind(gene_x, peak_x)) if (scale) model_mat <- as.data.frame(scale(as.matrix(model_mat))) - colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") - + colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') %>% + str_replace_all("[\\(\\)]", "\\\\\\\\0") log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( model_frml, From d466a22838dd46d05499a5428b7b374e78480e19 Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 15:45:03 -0400 Subject: [PATCH 05/16] Update grn.R --- R/grn.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/grn.R b/R/grn.R index 59da8a8..b240b64 100644 --- a/R/grn.R +++ b/R/grn.R @@ -334,7 +334,7 @@ fit_grn_models.GRNData <- function( return() } peak_name <- str_replace_all(p, '-', '_') - tf_name <- str_replace_all(peak_tfs, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\\\0") #some gene names contain special characters + tf_name <- str_replace_all(peak_tfs, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") #some gene names contain special characters formula_str <- paste( paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') return(list(tfs=peak_tfs, frml=formula_str)) @@ -345,7 +345,7 @@ fit_grn_models.GRNData <- function( return() } - target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\\\0") + target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) ) @@ -358,7 +358,7 @@ fit_grn_models.GRNData <- function( if (scale) model_mat <- as.data.frame(scale(as.matrix(model_mat))) colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') %>% - str_replace_all("[\\(\\)]", "\\\\\\\\0") + str_replace_all("[\\(\\)]", "\\\\\\0") log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( model_frml, @@ -447,8 +447,10 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ select(-region_, -tf_) %>% mutate( region = str_replace_all(region, '_', '-'), - tf = str_replace_all(tf, '_', '-'), - target = str_replace_all(target, '_', '-') + tf = str_replace_all(tf, '_', '-') %>% + str_replace_all("\\\\([\\(\\)])", "\\1"), + target = str_replace_all(target, '_', '-') %>% + str_replace_all("\\\\([\\(\\)])", "\\1") ) %>% select(tf, target, region, term, everything()) return(coefs_use) From bcc32453ccfa2f85e844afe452b2d65ada72024c Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 16:16:19 -0400 Subject: [PATCH 06/16] Update grn.R --- R/grn.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/grn.R b/R/grn.R index b240b64..a6b9edb 100644 --- a/R/grn.R +++ b/R/grn.R @@ -345,10 +345,18 @@ fit_grn_models.GRNData <- function( return() } + target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") + + frml_string <- map(frml_string, function(x) { + # Escape special characters like parentheses + x$frml <- str_replace_all(x$frml, '[\\(\\)]', '\\\\\\0') + return(x) + }) model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) ) + # Get expression data nfeats <- sum(map_dbl(frml_string, function(x) length(x$tfs))) From 3f23b6c2caaba58af117a0e25b6373a0921f14f9 Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 16:25:42 -0400 Subject: [PATCH 07/16] Update grn.R --- R/grn.R | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/R/grn.R b/R/grn.R index a6b9edb..636d1a6 100644 --- a/R/grn.R +++ b/R/grn.R @@ -334,7 +334,9 @@ fit_grn_models.GRNData <- function( return() } peak_name <- str_replace_all(p, '-', '_') - tf_name <- str_replace_all(peak_tfs, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") #some gene names contain special characters + tf_name <- str_replace_all(peak_tfs, '-', '_') %>% + str_replace_all("[\\(\\)]", "") %>% + str_replace_all(":", "") #some gene names contain special characters, remove formula_str <- paste( paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') return(list(tfs=peak_tfs, frml=formula_str)) @@ -346,13 +348,9 @@ fit_grn_models.GRNData <- function( } - target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "\\\\\\0") + target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "") %>% + str_replace_all(":", "") #some gene names contain special characters, remove - frml_string <- map(frml_string, function(x) { - # Escape special characters like parentheses - x$frml <- str_replace_all(x$frml, '[\\(\\)]', '\\\\\\0') - return(x) - }) model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) ) @@ -366,7 +364,8 @@ fit_grn_models.GRNData <- function( if (scale) model_mat <- as.data.frame(scale(as.matrix(model_mat))) colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') %>% - str_replace_all("[\\(\\)]", "\\\\\\0") + str_replace_all("[\\(\\)]", "") %>% + str_replace_all(":", "") #some gene names contain special characters, remove log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( model_frml, @@ -455,10 +454,8 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ select(-region_, -tf_) %>% mutate( region = str_replace_all(region, '_', '-'), - tf = str_replace_all(tf, '_', '-') %>% - str_replace_all("\\\\([\\(\\)])", "\\1"), - target = str_replace_all(target, '_', '-') %>% - str_replace_all("\\\\([\\(\\)])", "\\1") + tf = str_replace_all(tf, '_', '-'), + target = str_replace_all(target, '_', '-') ) %>% select(tf, target, region, term, everything()) return(coefs_use) From 087b58c39004dcc61eceeb6df326cc509f118201 Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 16:36:48 -0400 Subject: [PATCH 08/16] Update grn.R --- R/grn.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/grn.R b/R/grn.R index 636d1a6..c36f13b 100644 --- a/R/grn.R +++ b/R/grn.R @@ -336,7 +336,9 @@ fit_grn_models.GRNData <- function( peak_name <- str_replace_all(p, '-', '_') tf_name <- str_replace_all(peak_tfs, '-', '_') %>% str_replace_all("[\\(\\)]", "") %>% - str_replace_all(":", "") #some gene names contain special characters, remove + str_replace_all(":", "") %>% + str_replace_all("^([0-9])", "X\\1") + #some gene names contain special characters, remove formula_str <- paste( paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') return(list(tfs=peak_tfs, frml=formula_str)) @@ -348,8 +350,10 @@ fit_grn_models.GRNData <- function( } - target <- str_replace_all(g, '-', '_') %>% str_replace_all("[\\(\\)]", "") %>% - str_replace_all(":", "") #some gene names contain special characters, remove + target <- str_replace_all(g, '-', '_') %>% + str_replace_all("[\\(\\)]", "") %>% + str_replace_all(":", "") %>% + str_replace_all("^([0-9])", "X\\1") #some gene names contain special characters, remove model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) @@ -365,7 +369,8 @@ fit_grn_models.GRNData <- function( colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') %>% str_replace_all("[\\(\\)]", "") %>% - str_replace_all(":", "") #some gene names contain special characters, remove + str_replace_all(":", "") %>% + str_replace_all("^([0-9])", "X\\1") #some gene names contain special characters, remove log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( model_frml, From 913e5cdd3f13266db28d32cd9656c66a59861a7c Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 16:54:19 -0400 Subject: [PATCH 09/16] Update grn.R --- R/grn.R | 53 +++++++++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/R/grn.R b/R/grn.R index c36f13b..c961a38 100644 --- a/R/grn.R +++ b/R/grn.R @@ -276,24 +276,24 @@ fit_grn_models.GRNData <- function( model_fits <- map_par(features, function(g){ # Select peaks near gene - if (!g%in%rownames(peaks2gene)){ + if (!`g`%in%rownames(peaks2gene)){ log_message('Warning: ', g, ' not found in EnsDb', verbose=verbose==2) return() } - gene_peaks <- as.logical(peaks2gene[g, ]) + gene_peaks <- as.logical(peaks2gene[`g`, ]) if (sum(gene_peaks)==0){ - log_message('Warning: No peaks found near ', g, verbose=verbose==2) + log_message('Warning: No peaks found near ', `g`, verbose=verbose==2) return() } # Select peaks correlating with target gene expression - g_x <- gene_data[gene_groups, g, drop=FALSE] + g_x <- gene_data[gene_groups, `g`, drop=FALSE] peak_x <- peak_data[peak_groups, gene_peaks, drop=FALSE] peak_g_cor <- as(sparse_cor(peak_x, g_x), 'generalMatrix') peak_g_cor[is.na(peak_g_cor)] <- 0 peaks_use <- rownames(peak_g_cor)[abs(peak_g_cor[, 1]) > peak_cor] if (length(peaks_use)==0){ - log_message('Warning: No correlating peaks found for ', g, verbose=verbose==2) + log_message('Warning: No correlating peaks found for ', `g`, verbose=verbose==2) return() } peak_x <- peak_x[, peaks_use, drop=FALSE] @@ -327,53 +327,42 @@ fit_grn_models.GRNData <- function( rename('tf'=1, 'corr'=2) # Filter TFs and make formula string - frml_string <- map(names(gene_peak_tfs), function(p){ + `frml_string` <- map(names(gene_peak_tfs), function(p){ peak_tfs <- gene_peak_tfs[[p]] peak_tfs <- peak_tfs[peak_tfs%in%tfs_use] if (length(peak_tfs)==0){ return() } - peak_name <- str_replace_all(p, '-', '_') - tf_name <- str_replace_all(peak_tfs, '-', '_') %>% - str_replace_all("[\\(\\)]", "") %>% - str_replace_all(":", "") %>% - str_replace_all("^([0-9])", "X\\1") - #some gene names contain special characters, remove + `peak_name` <- str_replace_all(p, '-', '_') + `tf_name` <- str_replace_all(peak_tfs, '-', '_') formula_str <- paste( - paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') - return(list(tfs=peak_tfs, frml=formula_str)) + paste(`peak_name`, interaction_term, `tf_name`, sep=' '), collapse = ' + ') + return(list(tfs=`peak_tfs`, frml=`formula_str`)) }) - frml_string <- frml_string[!map_lgl(frml_string, is.null)] - if (length(frml_string)==0){ + `frml_string` <- `frml_string`[!map_lgl(`frml_string`, is.null)] + if (length(`frml_string`)==0){ log_message('Warning: No valid peak:TF pairs found for ', g, verbose=verbose==2) return() } - target <- str_replace_all(g, '-', '_') %>% - str_replace_all("[\\(\\)]", "") %>% - str_replace_all(":", "") %>% - str_replace_all("^([0-9])", "X\\1") #some gene names contain special characters, remove - + target <- str_replace_all(g, '-', '_') model_frml <- as.formula( - paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$frml), collapse=' + ')) + paste0(`target`, ' ~ ', paste0(map(`frml_string`, function(x) x$`frml`), collapse=' + ')) ) # Get expression data - nfeats <- sum(map_dbl(frml_string, function(x) length(x$tfs))) - gene_tfs <- purrr::reduce(map(frml_string, function(x) x$tfs), union) + nfeats <- sum(map_dbl(`frml_string`, function(x) length(x$`tfs`))) + gene_tfs <- purrr::reduce(map(`frml_string`, function(x) x$`tfs`), union) gene_x <- gene_data[gene_groups, union(g, gene_tfs), drop=FALSE] model_mat <- as.data.frame(cbind(gene_x, peak_x)) if (scale) model_mat <- as.data.frame(scale(as.matrix(model_mat))) - colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') %>% - str_replace_all("[\\(\\)]", "") %>% - str_replace_all(":", "") %>% - str_replace_all("^([0-9])", "X\\1") #some gene names contain special characters, remove + colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( - model_frml, + `model_frml`, data = model_mat, method = method, ... @@ -394,13 +383,13 @@ fit_grn_models.GRNData <- function( log_message('Warning: Fitting model failed for all genes.', verbose=verbose) } - coefs <- map_dfr(model_fits, function(x) x$coefs, .id='target') + coefs <- map_dfr(model_fits, function(x) x$`coefs`, .id='target') coefs <- format_coefs(coefs, term=interaction_term, adjust_method=adjust_method) - corrs <- map_dfr(model_fits, function(x) x$corr, .id='target') + corrs <- map_dfr(model_fits, function(x) x$`corr`, .id='target') if (nrow(coefs)>0){ coefs <- suppressMessages(left_join(coefs, corrs)) } - gof <- map_dfr(model_fits, function(x) x$gof, .id='target') + gof <- map_dfr(model_fits, function(x) x$`gof`, .id='target') params <- list() params[['method']] <- method From 814c465c52c98fee7915d52379c04e3ba049bcfa Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 17:10:48 -0400 Subject: [PATCH 10/16] Update grn.R escape problematic characters in the formula accession --- R/grn.R | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/R/grn.R b/R/grn.R index c961a38..f53c995 100644 --- a/R/grn.R +++ b/R/grn.R @@ -327,20 +327,20 @@ fit_grn_models.GRNData <- function( rename('tf'=1, 'corr'=2) # Filter TFs and make formula string - `frml_string` <- map(names(gene_peak_tfs), function(p){ + frml_string <- map(names(gene_peak_tfs), function(p){ peak_tfs <- gene_peak_tfs[[p]] peak_tfs <- peak_tfs[peak_tfs%in%tfs_use] if (length(peak_tfs)==0){ return() } - `peak_name` <- str_replace_all(p, '-', '_') - `tf_name` <- str_replace_all(peak_tfs, '-', '_') + peak_name <- str_replace_all(p, '-', '_') + tf_name <- str_replace_all(peak_tfs, '-', '_') formula_str <- paste( - paste(`peak_name`, interaction_term, `tf_name`, sep=' '), collapse = ' + ') - return(list(tfs=`peak_tfs`, frml=`formula_str`)) + paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') + return(list(tfs=peak_tfs, frml=formula_str)) }) - `frml_string` <- `frml_string`[!map_lgl(`frml_string`, is.null)] - if (length(`frml_string`)==0){ + frml_string <- frml_string[!map_lgl(frml_string, is.null)] + if (length(frml_string)==0){ log_message('Warning: No valid peak:TF pairs found for ', g, verbose=verbose==2) return() } @@ -348,21 +348,22 @@ fit_grn_models.GRNData <- function( target <- str_replace_all(g, '-', '_') model_frml <- as.formula( - paste0(`target`, ' ~ ', paste0(map(`frml_string`, function(x) x$`frml`), collapse=' + ')) + paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$`frml`), collapse=' + ')) ) # Get expression data - nfeats <- sum(map_dbl(`frml_string`, function(x) length(x$`tfs`))) - gene_tfs <- purrr::reduce(map(`frml_string`, function(x) x$`tfs`), union) + nfeats <- sum(map_dbl(frml_string, function(x) length(x$`tfs`))) + gene_tfs <- purrr::reduce(map(frml_string, function(x) x$`tfs`), union) gene_x <- gene_data[gene_groups, union(g, gene_tfs), drop=FALSE] model_mat <- as.data.frame(cbind(gene_x, peak_x)) if (scale) model_mat <- as.data.frame(scale(as.matrix(model_mat))) - colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') + colnames(model_mat) <- str_replace_all(colnames(model_mat), '-', '_') + log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( - `model_frml`, + model_frml, data = model_mat, method = method, ... From 834587e2d3717043b3a86b4258411a6c0bb1ad1c Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 18:44:59 -0400 Subject: [PATCH 11/16] Update grn.R --- R/grn.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/grn.R b/R/grn.R index f53c995..13bb164 100644 --- a/R/grn.R +++ b/R/grn.R @@ -335,6 +335,7 @@ fit_grn_models.GRNData <- function( } peak_name <- str_replace_all(p, '-', '_') tf_name <- str_replace_all(peak_tfs, '-', '_') + tf_name <- paste0("`", tf_name, "`") formula_str <- paste( paste(peak_name, interaction_term, tf_name, sep=' '), collapse = ' + ') return(list(tfs=peak_tfs, frml=formula_str)) @@ -347,10 +348,10 @@ fit_grn_models.GRNData <- function( target <- str_replace_all(g, '-', '_') + target <- paste0("`", target, "`") model_frml <- as.formula( paste0(target, ' ~ ', paste0(map(frml_string, function(x) x$`frml`), collapse=' + ')) ) - # Get expression data nfeats <- sum(map_dbl(frml_string, function(x) length(x$`tfs`))) @@ -363,7 +364,7 @@ fit_grn_models.GRNData <- function( log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( - model_frml, + `model_frml`, data = model_mat, method = method, ... @@ -449,8 +450,8 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ select(-region_, -tf_) %>% mutate( region = str_replace_all(region, '_', '-'), - tf = str_replace_all(tf, '_', '-'), - target = str_replace_all(target, '_', '-') + tf = str_replace_all(tf, '_', '-') %>% str_replace_all(target, '`', ''), + target = str_replace_all(target, '_', '-') %>% str_replace_all(target, '`', '') ) %>% select(tf, target, region, term, everything()) return(coefs_use) From e9baaa53f1bb40ec9d9539a703ef4c7d53f66908 Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 18:58:17 -0400 Subject: [PATCH 12/16] Update grn.R --- R/grn.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/grn.R b/R/grn.R index 13bb164..8b103ec 100644 --- a/R/grn.R +++ b/R/grn.R @@ -276,30 +276,31 @@ fit_grn_models.GRNData <- function( model_fits <- map_par(features, function(g){ # Select peaks near gene - if (!`g`%in%rownames(peaks2gene)){ + if (!g%in%rownames(peaks2gene)){ log_message('Warning: ', g, ' not found in EnsDb', verbose=verbose==2) return() } - gene_peaks <- as.logical(peaks2gene[`g`, ]) + gene_peaks <- as.logical(peaks2gene[g, ]) if (sum(gene_peaks)==0){ - log_message('Warning: No peaks found near ', `g`, verbose=verbose==2) + log_message('Warning: No peaks found near ', g, verbose=verbose==2) return() } - + log_message('Selecting peaks correlating with target gene expression...', verbose=verbose) # Select peaks correlating with target gene expression - g_x <- gene_data[gene_groups, `g`, drop=FALSE] + g_x <- gene_data[gene_groups, g, drop=FALSE] peak_x <- peak_data[peak_groups, gene_peaks, drop=FALSE] peak_g_cor <- as(sparse_cor(peak_x, g_x), 'generalMatrix') peak_g_cor[is.na(peak_g_cor)] <- 0 peaks_use <- rownames(peak_g_cor)[abs(peak_g_cor[, 1]) > peak_cor] if (length(peaks_use)==0){ - log_message('Warning: No correlating peaks found for ', `g`, verbose=verbose==2) + log_message('Warning: No correlating peaks found for ', g, verbose=verbose==2) return() } peak_x <- peak_x[, peaks_use, drop=FALSE] peak_motifs <- peaks2motif[gene_peaks, , drop=FALSE][peaks_use, , drop=FALSE] # Select TFs with motifs in peaks + log_message('Selecting TFs with motifs in peaks...', verbose=verbose) gene_peak_tfs <- map(rownames(peak_motifs), function(p){ x <- as.logical(peak_motifs[p, ]) peak_tfs <- colMaxs(motif2tf[x, , drop=FALSE]) @@ -310,6 +311,7 @@ fit_grn_models.GRNData <- function( names(gene_peak_tfs) <- rownames(peak_motifs) # Check correlation of peaks with target gene + log_message('Check correlation of peaks with target gene...', verbose=verbose) gene_tfs <- purrr::reduce(gene_peak_tfs, union) tf_x <- gene_data[gene_groups, gene_tfs, drop=FALSE] tf_g_cor <- as(sparse_cor(tf_x, g_x), 'generalMatrix') @@ -354,7 +356,8 @@ fit_grn_models.GRNData <- function( ) # Get expression data - nfeats <- sum(map_dbl(frml_string, function(x) length(x$`tfs`))) + + nfeats <- sum(map_dbl(frml_string, function(x) length(x$`tfs`))) gene_tfs <- purrr::reduce(map(frml_string, function(x) x$`tfs`), union) gene_x <- gene_data[gene_groups, union(g, gene_tfs), drop=FALSE] model_mat <- as.data.frame(cbind(gene_x, peak_x)) @@ -364,7 +367,7 @@ fit_grn_models.GRNData <- function( log_message('Fitting model with ', nfeats, ' variables for ', g, verbose=verbose==2) result <- try(fit_model( - `model_frml`, + model_frml, data = model_mat, method = method, ... From 5136f52e434d9aaae89c44a831556f8e881bc82a Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Wed, 26 Mar 2025 19:12:35 -0400 Subject: [PATCH 13/16] Update grn.R Update handling for special characters in gene names --- R/grn.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/grn.R b/R/grn.R index 8b103ec..626c216 100644 --- a/R/grn.R +++ b/R/grn.R @@ -453,8 +453,8 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ select(-region_, -tf_) %>% mutate( region = str_replace_all(region, '_', '-'), - tf = str_replace_all(tf, '_', '-') %>% str_replace_all(target, '`', ''), - target = str_replace_all(target, '_', '-') %>% str_replace_all(target, '`', '') + tf = str_replace_all(tf, '_', '-') %>% str_replace_all('`', ''), + target = str_replace_all(target, '_', '-') %>% str_replace_all('`', '') ) %>% select(tf, target, region, term, everything()) return(coefs_use) From f25e6eccc97d3d3d20c680568e5134c3c18ff820 Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Thu, 27 Mar 2025 00:23:11 -0400 Subject: [PATCH 14/16] Update grn.R fixed line from object@meta.data to object@data@meta.data --- R/grn.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/grn.R b/R/grn.R index 626c216..f75ba4a 100644 --- a/R/grn.R +++ b/R/grn.R @@ -201,7 +201,7 @@ fit_grn_models.GRNData <- function( group_name = aggregate_rna_col, verbose = FALSE ) - gene_groups <- object@meta.data[[aggregate_rna_col]] + gene_groups <- object@data@meta.data[[aggregate_rna_col]] } if (is.null(aggregate_peaks_col)){ From 969ac68e50a2e57ef26b53d5d9e2cfef60528a4f Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Thu, 27 Mar 2025 11:38:41 -0400 Subject: [PATCH 15/16] Update grn.R backtick peaks --- R/grn.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/grn.R b/R/grn.R index f75ba4a..cf37fe4 100644 --- a/R/grn.R +++ b/R/grn.R @@ -336,6 +336,7 @@ fit_grn_models.GRNData <- function( return() } peak_name <- str_replace_all(p, '-', '_') + peak_name <- paste0("`", peak_name, "`") tf_name <- str_replace_all(peak_tfs, '-', '_') tf_name <- paste0("`", tf_name, "`") formula_str <- paste( @@ -452,7 +453,7 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ ) %>% select(-region_, -tf_) %>% mutate( - region = str_replace_all(region, '_', '-'), + region = str_replace_all(region, '_', '-') %>% str_replace_all('`', ''), tf = str_replace_all(tf, '_', '-') %>% str_replace_all('`', ''), target = str_replace_all(target, '_', '-') %>% str_replace_all('`', '') ) %>% From 36f72fa939a6b291f86eeac4b5a6c892dadf57ae Mon Sep 17 00:00:00 2001 From: mbrovkin <54407985+mbrovkin@users.noreply.github.com> Date: Fri, 28 Mar 2025 15:01:00 -0400 Subject: [PATCH 16/16] Update grn.R --- R/grn.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/grn.R b/R/grn.R index cf37fe4..cf3c891 100644 --- a/R/grn.R +++ b/R/grn.R @@ -453,6 +453,7 @@ format_coefs <- function(coefs, term=':', adjust_method='fdr'){ ) %>% select(-region_, -tf_) %>% mutate( + term = str_replace_all(term, '_', '-') %>% str_replace_all('`', ''), region = str_replace_all(region, '_', '-') %>% str_replace_all('`', ''), tf = str_replace_all(tf, '_', '-') %>% str_replace_all('`', ''), target = str_replace_all(target, '_', '-') %>% str_replace_all('`', '')