diff --git a/DESCRIPTION b/DESCRIPTION index 9335eba..126c994 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: vsp Title: Vintage Sparse PCA for Semi-Parametric Factor Analysis -Version: 0.1.1.9000 +Version: 0.1.1.9001 Authors@R: c( person("Karl", "Rohe", , "karlrohe@stat.wisc.edu", role = "aut"), person("Muzhe", "Zeng", , "mzeng6@wisc.edu", role = "aut"), @@ -51,4 +51,13 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1.9000 +RoxygenNote: 7.2.3 +Collate: + 'accessors.R' + 'bff.R' + 'utils.R' + 'localization.R' + 'object.R' + 'plots.R' + 'vsp-package.R' + 'vsp.R' diff --git a/NAMESPACE b/NAMESPACE index 401e257..aaafa73 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,12 +14,18 @@ export(bind_svd_u) export(bind_svd_v) export(bind_varimax_y) export(bind_varimax_z) +export(cumulative_participation) export(get_svd_u) export(get_svd_v) export(get_varimax_y) export(get_varimax_z) export(get_y_hubs) export(get_z_hubs) +export(ipr) +export(iprs) +export(localization_statistics) +export(plot_cumulative_curves) +export(plot_ipr_curves) export(plot_ipr_pairs) export(plot_mixing_matrix) export(plot_svd_u) diff --git a/R/localization.R b/R/localization.R new file mode 100644 index 0000000..b930d3e --- /dev/null +++ b/R/localization.R @@ -0,0 +1,171 @@ +#' Title +#' +#' @param U +#' +#' @return +#' @export +#' +cumulative_participation <- function(U) { + sum(rowSums(U^2)^2) +} + +#' Title +#' +#' @param x +#' +#' @return +#' @export +#' +ipr <- function(x) { + sum(x^4) +} + +#' Title +#' +#' @param s +#' +#' @return +#' @export +#' +iprs <- function(s) { + tibble( + ipr_u = apply(s$u, 2, ipr), + ipr_v = apply(s$v, 2, ipr), + i = 1:ncol(s$u) + ) +} + +#' Title +#' +#' @param graph +#' @param max_rank +#' @param ... +#' @param tau_min +#' @param tau_max +#' @param num_tau +#' +#' @return +#' @export +#' @include utils.R +#' +#' @examples +#' +#' library(igraphdata) +#' library(furrr) +#' +#' data(karate, package = "igraphdata") +#' +#' plan(multicore, workers = 10) +#' +#' # karate is undirected, enron is directed +#' +#' stats <- localization_statistics(karate, max_rank = 15, num_tau = 200) +#' +#' plot_cumulative_curves(stats) +#' plot_ipr_curves(stats) +#' +localization_statistics <- function(graph, max_rank, ..., tau_min = 10^-2, tau_max = 10^4, num_tau = 50) { + + stop_if_not_installed("invertiforms") + stop_if_not_installed("furrr") + + A <- as_csparse(graph, ...) + + avg_row_sum <- mean(rowSums(A)) # U / left + avg_col_sum <- mean(colSums(A)) # V / right + + tau <- 10^seq(-2, 4, length.out = num_tau) + + laplacians <- dplyr::tibble(tau = tau) %>% + dplyr::mutate( + scaler = furrr::future_map(tau, ~ invertiforms::RegularizedLaplacian(A, .x, .x)), + L_tau = furrr::future_map(scaler, ~ invertiforms::transform(.x, A)) + ) + + localization <- laplacians %>% + dplyr::mutate( + s = furrr::future_map(L_tau, RSpectra::svds, max_rank, .options = furrr::furrr_options(seed = TRUE)), + ipr = furrr::future_map(s, iprs), + cum_u = furrr::future_map_dbl(s, ~ cumulative_participation(.x$u)), + cum_v = furrr::future_map_dbl(s, ~ cumulative_participation(.x$v)) + ) |> + # dplyr::select(tau, ipr, cum_u, cum_v) |> + tidyr::unnest(ipr) + + localization <- list( + stats = localization, + avg_row_sum = avg_row_sum, + avg_col_sum = avg_col_sum + ) + + class(localization) <- c("localization_stats") + + localization +} + +#' Title +#' +#' @param stats +#' +#' @return +#' @export +#' +#' @examples +plot_cumulative_curves <- function(localization) { + localization$stats %>% + tidyr::pivot_longer(contains("cum")) %>% + dplyr::mutate( + name = dplyr::if_else(name == "cum_u", "U (left)", "V (right)") + ) |> + ggplot() + + aes(x = tau, y = value, color = name) + + geom_line() + + scale_x_log10( + breaks = scales::trans_breaks("log10", function(x) 10^x), + labels = scales::trans_format("log10", scales::math_format(10^.x)) + ) + + labs( + y = "Cumulative participation", + x = "Regularization parameter (tau)", + color = "Singular vectors" + ) +} + +#' Title +#' +#' @param stats +#' @param indices +#' +#' @return +#' @export +#' +#' @examples +plot_ipr_curves <- function(localization, indices = NULL) { + + if (is.null(indices)) { + max_index <- min(max(localization$stats$i), 10) + indices <- 1:max_index + } + + localization$stats %>% + dplyr::filter(i %in% indices) |> + tidyr::pivot_longer(contains("ipr")) %>% + dplyr::mutate( + name = dplyr::if_else(name == "ipr_u", "U (left)", "V (right)") + ) |> + ggplot() + + aes(x = tau, y = value, color = name) + + geom_line() + + scale_x_log10( + breaks = scales::trans_breaks("log10", function(x) 10^x), + labels = scales::trans_format("log10", scales::math_format(10^.x)) + ) + + labs( + y = "Inverse participation ratio", + x = "Regularization parameter (tau)", + color = "Singular vectors" + ) + + facet_grid(rows = vars(i)) +} + + diff --git a/R/plots.R b/R/plots.R index a87cb7d..f83366d 100644 --- a/R/plots.R +++ b/R/plots.R @@ -154,8 +154,6 @@ plot_mixing_matrix <- function(fa) { #' @export plot_ipr_pairs <- function(fa) { - ipr <- function(x) sum(x^4) - ipr_u <- apply(fa$u, 2, ipr) ipr_v <- apply(fa$v, 2, ipr) diff --git a/R/utils.R b/R/utils.R index e26a9a1..7f27e82 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,6 +10,26 @@ #' @usage lhs \%>\% rhs NULL + +as_csparse <- function(graph, ...) { + UseMethod("as_csparse") +} + +as_csparse.Matrix <- function(graph, ...) { + methods::as(graph, "CsparseMatrix") +} + +as_csparse.igraph <- function(graph, ..., edge_weights = NULL) { + + if (igraph::is.bipartite(graph)) { + A <- igraph::as_incidence_matrix(graph, sparse = TRUE, attr = edge_weights) + } else { + A <- igraph::as_adjacency_matrix(graph, sparse = TRUE, attr = edge_weights) + } + + methods::as(A, "CsparseMatrix") +} + left_padded_sequence <- function(x) { original <- withr::with_options( diff --git a/man/cumulative_participation.Rd b/man/cumulative_participation.Rd new file mode 100644 index 0000000..dbbcc35 --- /dev/null +++ b/man/cumulative_participation.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/localization.R +\name{cumulative_participation} +\alias{cumulative_participation} +\title{Title} +\usage{ +cumulative_participation(U) +} +\arguments{ +\item{U}{} +} +\description{ +Title +} diff --git a/man/ipr.Rd b/man/ipr.Rd new file mode 100644 index 0000000..d2dde7c --- /dev/null +++ b/man/ipr.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/localization.R +\name{ipr} +\alias{ipr} +\title{Title} +\usage{ +ipr(x) +} +\arguments{ +\item{x}{} +} +\description{ +Title +} diff --git a/man/iprs.Rd b/man/iprs.Rd new file mode 100644 index 0000000..9a6a88f --- /dev/null +++ b/man/iprs.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/localization.R +\name{iprs} +\alias{iprs} +\title{Title} +\usage{ +iprs(s) +} +\arguments{ +\item{s}{} +} +\description{ +Title +} diff --git a/man/localization_statistics.Rd b/man/localization_statistics.Rd new file mode 100644 index 0000000..4eb53e0 --- /dev/null +++ b/man/localization_statistics.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/localization.R +\name{localization_statistics} +\alias{localization_statistics} +\title{Title} +\usage{ +localization_statistics( + graph, + max_rank, + ..., + tau_min = 10^-2, + tau_max = 10^4, + num_tau = 50 +) +} +\arguments{ +\item{num_tau}{} +} +\description{ +Title +} +\examples{ + +library(igraphdata) +library(furrr) + +data(karate, package = "igraphdata") + +plan(multicore, workers = 10) + +# karate is undirected, enron is directed + +stats <- localization_statistics(karate, max_rank = 15, num_tau = 200) + +plot_cumulative_curves(stats) +plot_ipr_curves(stats) + +} diff --git a/man/plot_cumulative_curves.Rd b/man/plot_cumulative_curves.Rd new file mode 100644 index 0000000..32f754e --- /dev/null +++ b/man/plot_cumulative_curves.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/localization.R +\name{plot_cumulative_curves} +\alias{plot_cumulative_curves} +\title{Title} +\usage{ +plot_cumulative_curves(localization) +} +\arguments{ +\item{stats}{} +} +\description{ +Title +} diff --git a/man/plot_ipr_curves.Rd b/man/plot_ipr_curves.Rd new file mode 100644 index 0000000..db10606 --- /dev/null +++ b/man/plot_ipr_curves.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/localization.R +\name{plot_ipr_curves} +\alias{plot_ipr_curves} +\title{Title} +\usage{ +plot_ipr_curves(localization, indices = NULL) +} +\arguments{ +\item{indices}{} +} +\description{ +Title +}