From e4dc3aae7ff78c6fbce6805ab863736b15efa9c6 Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Fri, 7 Nov 2025 19:55:58 -0800 Subject: [PATCH 01/12] Mission fix the off by factor of 2 error begins --- NAMESPACE | 2 - R/expected-degrees.R | 18 ------- R/sample_edgelist.R | 11 ++-- R/sample_igraph.R | 2 +- R/undirected_dcsbm.R | 2 +- R/undirected_factor_model.R | 5 +- man/dcsbm.Rd | 2 +- man/expected_edges.Rd | 3 -- man/mmsbm.Rd | 2 +- man/sample_edgelist.matrix.Rd | 6 +-- man/sbm.Rd | 2 +- tests/testthat/test-degree-scaling.R | 65 +++++++----------------- tests/testthat/test-population-spectra.R | 28 +++++++++- 13 files changed, 64 insertions(+), 84 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3169658..756bc17 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,6 @@ S3method(eigs_sym,undirected_factor_model) S3method(expectation,directed_factor_model) S3method(expectation,undirected_factor_model) S3method(expected_degree,undirected_factor_model) -S3method(expected_degrees,undirected_factor_model) S3method(expected_density,directed_factor_model) S3method(expected_density,undirected_factor_model) S3method(expected_edges,directed_factor_model) @@ -41,7 +40,6 @@ export(eigs_sym) export(erdos_renyi) export(expectation) export(expected_degree) -export(expected_degrees) export(expected_density) export(expected_edges) export(expected_in_degree) diff --git a/R/expected-degrees.R b/R/expected-degrees.R index c36d53d..f5ef99b 100644 --- a/R/expected-degrees.R +++ b/R/expected-degrees.R @@ -123,24 +123,6 @@ expected_degree.undirected_factor_model <- function(factor_model, ...) { expected_edges(factor_model) / as.numeric(factor_model$n) } - -#' @rdname expected_edges -#' @export -expected_degrees <- function(factor_model, ...) { - UseMethod("expected_degrees") -} - -#' @export -expected_degrees.undirected_factor_model <- function(factor_model, ...) { - # rowSums of E[A|X, S] = XSX' are XSX'1 for 1 a column vector of ones - # want to avoid memory cost of instantiating all of E[A|X, S], which is - # typically large and dense - X <- factor_model$X - S <- factor_model$S - ones <- matrix(1, nrow = nrow(X)) - as.numeric(X %*% (tcrossprod(S, X) %*% ones)) -} - #' @export expected_density.undirected_factor_model <- function(factor_model, ...) { expected_edges(factor_model) / as.numeric(choose(factor_model$n, 2)) diff --git a/R/sample_edgelist.R b/R/sample_edgelist.R index 8675c06..04b0e55 100644 --- a/R/sample_edgelist.R +++ b/R/sample_edgelist.R @@ -156,7 +156,10 @@ sample_edgelist.undirected_factor_model <- function( factor_model, ...) { X <- factor_model$X - S <- factor_model$S + + # see #43, move the scaling factor here so it's temporary during sampling + # E(A) = U S U' holds for undirected factor models + S <- factor_model$S / 2 sample_edgelist( X, S, X, @@ -185,14 +188,14 @@ sample_edgelist.directed_factor_model <- function( #' Low level interface to sample RPDG edgelists #' -#' **This is a breaks-off, no safety checks interface.** +#' **This is a brakes-off, no safety checks interface.** #' We strongly recommend that you do not call #' `sample_edgelist.matrix()` unless you know what you are doing, #' and even then, we still do not recommend it, as you will #' bypass all typical input validation. -#' ***extremely loud coughing*** All those who bypass input +#' **extremely loud coughing** All those who bypass input #' validation suffer foolishly at their own hand. -#' ***extremely loud coughing*** +#' **extremely loud coughing** #' #' @param factor_model An `n` by `k1` [matrix()] or [Matrix::Matrix()] #' of latent node positions encoding incoming edge community membership. diff --git a/R/sample_igraph.R b/R/sample_igraph.R index c830aa3..3097cf6 100644 --- a/R/sample_igraph.R +++ b/R/sample_igraph.R @@ -60,5 +60,5 @@ sample_igraph.directed_factor_model <- function( # a bipartite graph rather than using the usual one-mode tools A <- sample_sparse(factor_model, ...) - igraph::graph_from_incidence_matrix(A, directed = FALSE, multiple = TRUE) + igraph::graph_from_biadjacency_matrix(A, directed = FALSE, multiple = TRUE) } diff --git a/R/undirected_dcsbm.R b/R/undirected_dcsbm.R index e320ddb..b86988b 100644 --- a/R/undirected_dcsbm.R +++ b/R/undirected_dcsbm.R @@ -107,7 +107,7 @@ validate_undirected_dcsbm <- function(x) { #' to a node in community `j` is `Poisson(B[i, j])`. Must be #' a square matrix. `matrix` and `Matrix` objects are both #' acceptable. If `B` is not symmetric, it will be -#' symmetrized via the update `B := B + t(B)`. Defaults to `NULL`. +#' symmetrized via the update `B := B + t(B) / 2`. Defaults to `NULL`. #' You must specify either `k` or `B`, but not both. #' #' @param block_sizes (block sizes) Number of nodes in each block, diff --git a/R/undirected_factor_model.R b/R/undirected_factor_model.R index bc1c9a4..f85de2e 100644 --- a/R/undirected_factor_model.R +++ b/R/undirected_factor_model.R @@ -164,7 +164,10 @@ undirected_factor_model <- function( allow_self_loops = allow_self_loops ) - ufm$S <- (S + t(S)) / 2 # symmetrize S. idempotent if S already symmetric + # make sure probabilities of A_ij and A_ji are equal by forcing S to be + # symmetric just in case it isn't. if S is already symmetric, this + # operation leaves S unchanged + ufm$S <- (S + t(S)) / 2 if (!is.null(expected_degree)) { if (expected_degree <= 0) { diff --git a/man/dcsbm.Rd b/man/dcsbm.Rd index b3661e9..8d0c345 100644 --- a/man/dcsbm.Rd +++ b/man/dcsbm.Rd @@ -49,7 +49,7 @@ probabilities. The probability that a node in block \code{i} connects to a node in community \code{j} is \code{Poisson(B[i, j])}. Must be a square matrix. \code{matrix} and \code{Matrix} objects are both acceptable. If \code{B} is not symmetric, it will be -symmetrized via the update \code{B := B + t(B)}. Defaults to \code{NULL}. +symmetrized via the update \code{B := B + t(B) / 2}. Defaults to \code{NULL}. You must specify either \code{k} or \code{B}, but not both.} \item{...}{ diff --git a/man/expected_edges.Rd b/man/expected_edges.Rd index 0de11eb..64a7b76 100644 --- a/man/expected_edges.Rd +++ b/man/expected_edges.Rd @@ -6,7 +6,6 @@ \alias{expected_in_degree} \alias{expected_out_degree} \alias{expected_density} -\alias{expected_degrees} \title{Calculate the expected edges in Poisson RDPG graph} \usage{ expected_edges(factor_model, ...) @@ -18,8 +17,6 @@ expected_in_degree(factor_model, ...) expected_out_degree(factor_model, ...) expected_density(factor_model, ...) - -expected_degrees(factor_model, ...) } \arguments{ \item{factor_model}{A \code{\link[=directed_factor_model]{directed_factor_model()}} or diff --git a/man/mmsbm.Rd b/man/mmsbm.Rd index 769d859..4701720 100644 --- a/man/mmsbm.Rd +++ b/man/mmsbm.Rd @@ -48,7 +48,7 @@ probabilities. The probability that a node in block \code{i} connects to a node in community \code{j} is \code{Poisson(B[i, j])}. Must be a square matrix. \code{matrix} and \code{Matrix} objects are both acceptable. If \code{B} is not symmetric, it will be -symmetrized via the update \code{B := B + t(B)}. Defaults to \code{NULL}. +symmetrized via the update \code{B := B + t(B) / 2}. Defaults to \code{NULL}. You must specify either \code{k} or \code{B}, but not both.} \item{...}{ diff --git a/man/sample_edgelist.matrix.Rd b/man/sample_edgelist.matrix.Rd index 6089873..17511c0 100644 --- a/man/sample_edgelist.matrix.Rd +++ b/man/sample_edgelist.matrix.Rd @@ -75,14 +75,14 @@ To avoid handling such considerations yourself, we recommend using over \code{\link[=sample_edgelist]{sample_edgelist()}}. } \description{ -\strong{This is a breaks-off, no safety checks interface.} +\strong{This is a brakes-off, no safety checks interface.} We strongly recommend that you do not call \code{sample_edgelist.matrix()} unless you know what you are doing, and even then, we still do not recommend it, as you will bypass all typical input validation. -\emph{\strong{extremely loud coughing}} All those who bypass input +\strong{extremely loud coughing} All those who bypass input validation suffer foolishly at their own hand. -\emph{\strong{extremely loud coughing}} +\strong{extremely loud coughing} } \details{ This function implements the \code{fastRG} algorithm as diff --git a/man/sbm.Rd b/man/sbm.Rd index eb61e21..85ac456 100644 --- a/man/sbm.Rd +++ b/man/sbm.Rd @@ -33,7 +33,7 @@ probabilities. The probability that a node in block \code{i} connects to a node in community \code{j} is \code{Poisson(B[i, j])}. Must be a square matrix. \code{matrix} and \code{Matrix} objects are both acceptable. If \code{B} is not symmetric, it will be -symmetrized via the update \code{B := B + t(B)}. Defaults to \code{NULL}. +symmetrized via the update \code{B := B + t(B) / 2}. Defaults to \code{NULL}. You must specify either \code{k} or \code{B}, but not both.} \item{...}{ diff --git a/tests/testthat/test-degree-scaling.R b/tests/testthat/test-degree-scaling.R index 5dba464..cd53650 100644 --- a/tests/testthat/test-degree-scaling.R +++ b/tests/testthat/test-degree-scaling.R @@ -1,30 +1,3 @@ -library(magrittr) - -test_that("expected_degrees() has dimension and correctness", { - n <- 100 - k <- 5 - - set.seed(143) - - X <- matrix(rpois(n = n * k, 1), nrow = n) - S <- matrix(runif(n = k * k, 0, .1), nrow = k) - - ufm <- undirected_factor_model(X, S) - - # dimensional correctness - expect_equal( - length(expected_degrees(ufm)), - n - ) - - # numerical correctness - expect_equal( - expected_degrees(ufm), - rowSums(ufm$X %*% tcrossprod(ufm$S, ufm$X)) - ) -}) - - test_that("undirected expected degree computed consistently", { # see issue 19 @@ -45,7 +18,7 @@ test_that("undirected expected degree computed consistently", { expect_equal( expected_degree(b_model), # computed pop * a + pop * b, # expected "undirected edge degree", - tolerance = 5 + tolerance = 1.5 ) A <- sample_sparse(b_model) @@ -60,7 +33,7 @@ test_that("undirected expected degree computed consistently", { expect_equal( mean(rowSums(triu(A))), # computed pop * a + pop * b, # expected "undirected edge degree" - tolerance = 5 + tolerance = 0.5 ) model2 <- sbm(n = n, B = B, poisson_edges = FALSE, expected_degree = 75) @@ -68,7 +41,7 @@ test_that("undirected expected degree computed consistently", { expect_equal( expected_degree(model2), # computed pop * a + pop * b, # expected "undirected edge degree", - tolerance = 5 + tolerance = 0.5 ) A2 <- sample_sparse(model2) @@ -76,7 +49,7 @@ test_that("undirected expected degree computed consistently", { expect_equal( mean(rowSums(triu(A2))), # computed pop * a + pop * b, # expected "undirected edge degree", - tolerance = 5 + tolerance = 1.5 ) }) @@ -94,7 +67,7 @@ test_that("undirected density computed consistently", { b_model <- sbm( n = n, B = B, - poisson_edges = FALSE + poisson_edges = TRUE ) expect_equal( @@ -161,7 +134,7 @@ test_that("undirected factor model", { expect_lt(el_mean_degree, 11) g2 <- igraph::graph_from_data_frame(el, directed = TRUE) - A <- igraph::as_adj(g2) + A <- igraph::as_adjacency_matrix(g2) # NOTE: see issue #19 about the following # @@ -185,9 +158,9 @@ test_that("undirected factor model", { tbl_graph <- sample_tidygraph(ufm) - tbl_graph_edges <- tbl_graph %>% - activate(edges) %>% - as_tibble() %>% + tbl_graph_edges <- tbl_graph |> + activate(edges) |> + as_tibble() |> nrow() tbl_graph_mean_degree <- tbl_graph_edges / n @@ -227,9 +200,9 @@ test_that("directed factor model", { el <- sample_edgelist(dfm) - el_mean_in_degree <- el %>% - count(to) %>% - pull(n) %>% + el_mean_in_degree <- el |> + count(to) |> + pull(n) |> mean() expect_lt(9, el_mean_in_degree) @@ -239,9 +212,9 @@ test_that("directed factor model", { el2 <- sample_edgelist(dfm2) - el2_mean_out_degree <- el2 %>% - count(from) %>% - pull(n) %>% + el2_mean_out_degree <- el2 |> + count(from) |> + pull(n) |> mean() expect_lt(95, el2_mean_out_degree) @@ -282,7 +255,7 @@ test_that("directed factor model", { ### igraph tests -------------------------------------------------------------- ig <- sample_igraph(dfm) - A_ig <- igraph::as_incidence_matrix(ig, sparse = TRUE, names = FALSE) + A_ig <- igraph::as_biadjacency_matrix(ig, sparse = TRUE, names = FALSE) ig_mean_in_degree <- mean(colSums(A_ig)) expect_lt(9, ig_mean_in_degree) @@ -290,7 +263,7 @@ test_that("directed factor model", { ig2 <- sample_igraph(dfm2) - A2_ig <- igraph::as_incidence_matrix(ig2, sparse = TRUE, names = FALSE) + A2_ig <- igraph::as_biadjacency_matrix(ig2, sparse = TRUE, names = FALSE) ig2_mean_out_degree <- mean(rowSums(A2_ig)) @@ -308,7 +281,7 @@ test_that("directed factor model", { ### tidygraph tests ---------------------------------------------------------- tg <- sample_tidygraph(dfm) - A_tg <- igraph::as_incidence_matrix(tg, sparse = TRUE, names = FALSE) + A_tg <- igraph::as_biadjacency_matrix(tg, sparse = TRUE, names = FALSE) tg_mean_in_degree <- mean(colSums(A_tg)) expect_lt(9, tg_mean_in_degree) @@ -316,7 +289,7 @@ test_that("directed factor model", { tg2 <- sample_tidygraph(dfm2) - A2_tg <- igraph::as_incidence_matrix(tg2, sparse = TRUE, names = FALSE) + A2_tg <- igraph::as_biadjacency_matrix(tg2, sparse = TRUE, names = FALSE) tg2_mean_out_degree <- mean(rowSums(A2_tg)) diff --git a/tests/testthat/test-population-spectra.R b/tests/testthat/test-population-spectra.R index a923fd7..d20973c 100644 --- a/tests/testthat/test-population-spectra.R +++ b/tests/testthat/test-population-spectra.R @@ -8,14 +8,18 @@ test_that("tested eigs_sym and svds for population spectra on sbm with repeated set.seed(27) n <- 100 - k <- 10 + k <- 5 B <- diag(rep(0.5, k)) - sbm <- sbm(n = n, B = B) + sbm <- sbm(n = n, B = B, allow_self_loops = FALSE) EA <- expectation(sbm) EA_manual <- sbm$X %*% tcrossprod(sbm$S, sbm$X) + expect_equal( + expected_degree(sbm), + 10 + ) expect_equal(EA_manual, EA) @@ -31,6 +35,19 @@ test_that("tested eigs_sym and svds for population spectra on sbm with repeated expect_equal(0, sin_theta_distance(eig$vectors, eig_manual$vectors)) expect_equal(0, sin_theta_distance(s$u, eig_manual$vectors)) expect_equal(0, sin_theta_distance(s$v, eig_manual$vectors)) + + el <- sample_edgelist(sbm) + A <- sample_sparse(sbm) + + nrow(el) # edges as in the upper triangle only + sum(EA) + sum(A) + + s_obs <- svds(A, k) + + expect_true( + all(s_obs$d >= s$d - log(n) & s_obs$d <= s$d + log(n)) + ) }) test_that("tested eigs_sym and svds for population spectra on sbm with distinct eigenvalues", { @@ -60,4 +77,11 @@ test_that("tested eigs_sym and svds for population spectra on sbm with distinct expect_equal(0, sin_theta_distance(eig$vectors, eig_manual$vectors)) expect_equal(0, sin_theta_distance(s$u, eig_manual$vectors)) expect_equal(0, sin_theta_distance(s$v, eig_manual$vectors)) + + A <- sample_sparse(sbm) + s_obs <- svds(A, k) + + expect_true( + all(s_obs$d >= s$d - log(n) & s_obs$d <= s$d + log(n)) + ) }) From 15d84b43f5e752e4ef8bdc8c4439d9294645e638 Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:04:05 -0800 Subject: [PATCH 02/12] Add expected edges back for use in vignette --- R/expected-degrees.R | 17 +++++++++++++++++ man/expected_edges.Rd | 3 +++ 2 files changed, 20 insertions(+) diff --git a/R/expected-degrees.R b/R/expected-degrees.R index f5ef99b..031cb3e 100644 --- a/R/expected-degrees.R +++ b/R/expected-degrees.R @@ -93,6 +93,23 @@ expected_edges.undirected_factor_model <- function(factor_model, ...) { sum(Cx %*% S %*% Cx) } +#' @rdname expected_edges +#' @export +expected_degrees <- function(factor_model, ...) { + UseMethod("expected_degrees") +} + +#' @export +expected_degrees.undirected_factor_model <- function(factor_model, ...) { + # rowSums of E[A|X, S] = XSX' are XSX'1 for 1 a column vector of ones + # want to avoid memory cost of instantiating all of E[A|X, S], which is + # typically large and dense + X <- factor_model$X + S <- factor_model$S + ones <- matrix(1, nrow = nrow(X)) + as.numeric(X %*% (tcrossprod(S, X) %*% ones)) +} + #' @rdname expected_edges #' @export expected_degree <- function(factor_model, ...) { diff --git a/man/expected_edges.Rd b/man/expected_edges.Rd index 64a7b76..551b5a0 100644 --- a/man/expected_edges.Rd +++ b/man/expected_edges.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/expected-degrees.R \name{expected_edges} \alias{expected_edges} +\alias{expected_degrees} \alias{expected_degree} \alias{expected_in_degree} \alias{expected_out_degree} @@ -10,6 +11,8 @@ \usage{ expected_edges(factor_model, ...) +expected_degrees(factor_model, ...) + expected_degree(factor_model, ...) expected_in_degree(factor_model, ...) From 1460db03711216c812522d5484c7fe774860389d Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:04:36 -0800 Subject: [PATCH 03/12] Air --- R/sample_edgelist.R | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/R/sample_edgelist.R b/R/sample_edgelist.R index 04b0e55..68c903e 100644 --- a/R/sample_edgelist.R +++ b/R/sample_edgelist.R @@ -144,8 +144,9 @@ #' sample_tidygraph(fm) #' sample_edgelist <- function( - factor_model, - ...) { + factor_model, + ... +) { rlang::check_dots_unnamed() UseMethod("sample_edgelist") } @@ -153,8 +154,9 @@ sample_edgelist <- function( #' @rdname sample_edgelist #' @export sample_edgelist.undirected_factor_model <- function( - factor_model, - ...) { + factor_model, + ... +) { X <- factor_model$X # see #43, move the scaling factor here so it's temporary during sampling @@ -162,7 +164,9 @@ sample_edgelist.undirected_factor_model <- function( S <- factor_model$S / 2 sample_edgelist( - X, S, X, + X, + S, + X, FALSE, factor_model$poisson_edges, factor_model$allow_self_loops @@ -172,14 +176,17 @@ sample_edgelist.undirected_factor_model <- function( #' @rdname sample_edgelist #' @export sample_edgelist.directed_factor_model <- function( - factor_model, - ...) { + factor_model, + ... +) { X <- factor_model$X S <- factor_model$S Y <- factor_model$Y sample_edgelist( - X, S, Y, + X, + S, + Y, TRUE, factor_model$poisson_edges, factor_model$allow_self_loops @@ -251,11 +258,14 @@ sample_edgelist.directed_factor_model <- function( #' sample_edgelist(X, S, Y, TRUE, TRUE, TRUE) #' sample_edgelist.matrix <- function( - factor_model, S, Y, - directed, - poisson_edges, - allow_self_loops, - ...) { + factor_model, + S, + Y, + directed, + poisson_edges, + allow_self_loops, + ... +) { X <- factor_model stopifnot(is.logical(directed)) From bab54e3d08dfe3f33a77b3af3c2b4da160d7d83e Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:05:20 -0800 Subject: [PATCH 04/12] Improve degree tests --- tests/testthat/test-degree-scaling.R | 82 ++++++++++------------------ 1 file changed, 30 insertions(+), 52 deletions(-) diff --git a/tests/testthat/test-degree-scaling.R b/tests/testthat/test-degree-scaling.R index cd53650..0d8843b 100644 --- a/tests/testthat/test-degree-scaling.R +++ b/tests/testthat/test-degree-scaling.R @@ -12,44 +12,36 @@ test_that("undirected expected degree computed consistently", { b_model <- sbm( n = n, B = B, - poisson_edges = FALSE + poisson_edges = TRUE ) expect_equal( expected_degree(b_model), # computed - pop * a + pop * b, # expected "undirected edge degree", - tolerance = 1.5 + pop * a + pop * b # expected "undirected edge degree", ) A <- sample_sparse(b_model) - ### degree computation gotchas - - mean(rowSums(A)) # double counts undirected edges - #> [1] 156.711 - mean(rowSums(triu(A))) # right way to count undirected edges in A - #> [1] 78.413 - expect_equal( - mean(rowSums(triu(A))), # computed + mean(rowSums(A)), # computed pop * a + pop * b, # expected "undirected edge degree" - tolerance = 0.5 + tolerance = 0.05 ) - model2 <- sbm(n = n, B = B, poisson_edges = FALSE, expected_degree = 75) + model2 <- sbm(n = n, B = B, poisson_edges = TRUE, expected_degree = 75) expect_equal( expected_degree(model2), # computed pop * a + pop * b, # expected "undirected edge degree", - tolerance = 0.5 + tolerance = 0.05 ) A2 <- sample_sparse(model2) expect_equal( - mean(rowSums(triu(A2))), # computed + mean(rowSums(A2)), # computed pop * a + pop * b, # expected "undirected edge degree", - tolerance = 1.5 + tolerance = 0.05 ) }) @@ -78,16 +70,8 @@ test_that("undirected density computed consistently", { A <- sample_sparse(b_model) - ### density computation gotchas - - # almost correct because double counts UT and LT in num and denom, - # but diagonal gets too much weight. slight over-estimate of density - sum(A) / n^2 - - sum(triu(A)) / choose(n, 2) # correct density estimate - expect_equal( - sum(triu(A)) / choose(n, 2), # computed + sum(A) / choose(n, 2), # computed n * (pop * a + pop * b) / choose(n, 2), # expected "undirected edge degree", tolerance = 0.05 ) @@ -103,7 +87,7 @@ test_that("undirected density computed consistently", { A2 <- sample_sparse(model2) expect_equal( - sum(triu(A2)) / choose(n, 2), # computed + sum(A2) / choose(n, 2), # computed 0.15, # expected "undirected edge degree", tolerance = 0.05 ) @@ -127,9 +111,10 @@ test_that("undirected factor model", { expect_equal(expected_degree(ufm), 10) expect_equal(expected_density(ufm), 0.02, tolerance = 0.05) # tolerance should be relative here - + # this is only edges in the upper triangle! undercounts edges by a factor of 2 el <- sample_edgelist(ufm) - el_mean_degree <- nrow(el) / n + + el_mean_degree <- 2 * nrow(el) / n expect_lt(9, el_mean_degree) expect_lt(el_mean_degree, 11) @@ -142,31 +127,40 @@ test_that("undirected factor model", { # mean(rowSums(triu(A))) # right way to count undirected edges A <- sample_sparse(ufm) - matrix_mean_degree <- mean(rowSums(triu(A))) + matrix_mean_degree <- mean(rowSums(A)) expect_equal(rowSums(A), colSums(A)) expect_lt(9, matrix_mean_degree) expect_lt(matrix_mean_degree, 11) - graph <- sample_igraph(ufm) - # igraph doubles edge counts relative to the way we want to count - igraph_mean_degree <- mean(igraph::degree(graph)) / 2 + igraph_mean_degree <- mean(igraph::degree(graph)) expect_lt(9, igraph_mean_degree) expect_lt(igraph_mean_degree, 11) - tbl_graph <- sample_tidygraph(ufm) tbl_graph_edges <- tbl_graph |> activate(edges) |> as_tibble() |> nrow() - tbl_graph_mean_degree <- tbl_graph_edges / n + # edge table is undirected so undercounts just like the edgelist does + # sanity check this by comparing to the programmatic igraph computation + # that follows. undercounts follow from only recording edges in upper triangle + # once again + + # another way to sanity check this + # tbl_graph |> activate(edges) |> as_tibble() |> mutate(comp = from <= to) |> pull(comp) |> all() + tbl_graph_mean_degree_tbl <- 2 * tbl_graph_edges / n + + expect_lt(9, tbl_graph_mean_degree_tbl) + expect_lt(tbl_graph_mean_degree_tbl, 11) + + tbl_graph_mean_degree_ig <- mean(degree(tbl_graph)) - expect_lt(9, tbl_graph_mean_degree) - expect_lt(tbl_graph_mean_degree, 11) + expect_lt(9, tbl_graph_mean_degree_ig) + expect_lt(tbl_graph_mean_degree_ig, 11) expect_silent(eigs_sym(ufm)) }) @@ -195,7 +189,6 @@ test_that("directed factor model", { dfm3 <- directed_factor_model(X = X, S = S, Y = Y, expected_density = 0.1) expect_equal(expected_density(dfm3), 0.1) - ### edgelist tests ----------------------------------------------------------- el <- sample_edgelist(dfm) @@ -208,8 +201,6 @@ test_that("directed factor model", { expect_lt(9, el_mean_in_degree) expect_lt(el_mean_in_degree, 11) - - el2 <- sample_edgelist(dfm2) el2_mean_out_degree <- el2 |> @@ -220,8 +211,6 @@ test_that("directed factor model", { expect_lt(95, el2_mean_out_degree) expect_lt(el2_mean_out_degree, 105) - - el3 <- sample_edgelist(dfm3) el3_density <- nrow(el3) / as.numeric(n * d) @@ -229,7 +218,6 @@ test_that("directed factor model", { expect_lt(0.08, el3_density) expect_lt(el3_density, 0.12) - ### sparse matrix tests ------------------------------------------------------ A <- sample_sparse(dfm) @@ -238,14 +226,12 @@ test_that("directed factor model", { expect_lt(9, matrix_mean_in_degree) expect_lt(matrix_mean_in_degree, 11) - A2 <- sample_sparse(dfm2) matrix_mean_out_degree <- mean(rowSums(A2)) expect_lt(95, matrix_mean_out_degree) expect_lt(matrix_mean_out_degree, 105) - A3 <- sample_sparse(dfm3) A3_density <- mean(A3) @@ -261,23 +247,19 @@ test_that("directed factor model", { expect_lt(9, ig_mean_in_degree) expect_lt(ig_mean_in_degree, 11) - ig2 <- sample_igraph(dfm2) A2_ig <- igraph::as_biadjacency_matrix(ig2, sparse = TRUE, names = FALSE) ig2_mean_out_degree <- mean(rowSums(A2_ig)) - expect_lt(95, ig2_mean_out_degree) expect_lt(ig2_mean_out_degree, 105) - ig3 <- sample_igraph(dfm3) ig3_density <- igraph::ecount(ig3) / as.numeric(n * d) expect_lt(0.08, ig3_density) expect_lt(ig3_density, 0.12) - ### tidygraph tests ---------------------------------------------------------- tg <- sample_tidygraph(dfm) @@ -287,23 +269,19 @@ test_that("directed factor model", { expect_lt(9, tg_mean_in_degree) expect_lt(tg_mean_in_degree, 11) - tg2 <- sample_tidygraph(dfm2) A2_tg <- igraph::as_biadjacency_matrix(tg2, sparse = TRUE, names = FALSE) tg2_mean_out_degree <- mean(rowSums(A2_tg)) - expect_lt(95, tg2_mean_out_degree) expect_lt(tg2_mean_out_degree, 105) - tg3 <- sample_tidygraph(dfm3) tg3_density <- igraph::ecount(tg3) / as.numeric(n * d) expect_lt(0.08, tg3_density) expect_lt(tg3_density, 0.12) - ### decomposition sanity check ----------------------------------------------- expect_silent(svds(dfm)) From 72e3fb86b4079a106fba24e005b3ebdc1a59d49f Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:05:57 -0800 Subject: [PATCH 05/12] Test population and sample spectra are close --- tests/testthat/test-population-spectra.R | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-population-spectra.R b/tests/testthat/test-population-spectra.R index d20973c..e7f58aa 100644 --- a/tests/testthat/test-population-spectra.R +++ b/tests/testthat/test-population-spectra.R @@ -38,15 +38,10 @@ test_that("tested eigs_sym and svds for population spectra on sbm with repeated el <- sample_edgelist(sbm) A <- sample_sparse(sbm) - - nrow(el) # edges as in the upper triangle only - sum(EA) - sum(A) - s_obs <- svds(A, k) expect_true( - all(s_obs$d >= s$d - log(n) & s_obs$d <= s$d + log(n)) + all(s_obs$d >= s$d - 2 * log(n) & s_obs$d <= s$d + 2 * log(n)) ) }) @@ -79,9 +74,9 @@ test_that("tested eigs_sym and svds for population spectra on sbm with distinct expect_equal(0, sin_theta_distance(s$v, eig_manual$vectors)) A <- sample_sparse(sbm) - s_obs <- svds(A, k) + s_obs <- irlba::irlba(A, k) expect_true( - all(s_obs$d >= s$d - log(n) & s_obs$d <= s$d + log(n)) + all(s_obs$d >= s$d - 2 * log(n) & s_obs$d <= s$d + 2 * log(n)) ) }) From 0a2b3d08c50b0f6d6ff538feeec344328c0ede66 Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:06:23 -0800 Subject: [PATCH 06/12] Add consistency vignette --- DESCRIPTION | 6 +- vignettes/.gitignore | 2 + vignettes/consistency.Rmd | 270 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/consistency.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 795d29d..410250d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: fastRG Title: Sample Generalized Random Dot Product Graphs in Linear Time -Version: 0.3.3.9000 +Version: 0.3.3.9001 Authors@R: c( person("Alex", "Hayes", , "alexpghayes@gmail.com", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0002-4985-5160")), @@ -38,11 +38,15 @@ Imports: tidyr Suggests: covr, + irlba, knitr, magrittr, + purrr, rmarkdown, + scales, testthat (>= 3.0.0) Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 +VignetteBuilder: knitr diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/consistency.Rmd b/vignettes/consistency.Rmd new file mode 100644 index 0000000..8325a43 --- /dev/null +++ b/vignettes/consistency.Rmd @@ -0,0 +1,270 @@ +--- +title: "Demonstration: simulation study" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{consistency} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +In this vignette we demonstrate how to use the `fastRG` package to study the finite sample performance of two spectral estimators, the Adjacency Spectral Embedding and the Laplacian Spectral Embedding. + +We'll consider how these estimators perform in two different cases: a stochastic blockmodel where the signal eigenvalues are all well-separated, and a stochastic blockmodel with exactly repeated eigenvalues. We define the data generating process as follows: + +```{r} +#| message: false +#| warning: false +library(dplyr) +library(ggplot2) +library(fastRG) +library(irlba) +library(purrr) +library(scales) +library(tidyr) + +set.seed(27) + +model_distinct <- function(n, k = 5) { + B <- matrix(0.05, nrow = k, ncol = k) + diag(B) <- seq(0.8, 0.4, length.out = k) + latent <- dcsbm( + theta = rexp(n) + 1, + B = B, + expected_degree = 2 * sqrt(n) + ) +} + +model_repeated <- function(n, k = 5) { + latent <- dcsbm( + theta = rep(1, n), + B = diag(0.5, k), + expected_degree = 2 * sqrt(n) + ) +} +``` + +Now, if we want to compare the population singular values of the distinct eigenvalue model with the sample singular values, we could do so as follows: + +```{r} +# sample the latent parameters of the blockmodel +latent <- model_distinct(100) + +# compute the population singular value decomposition of the blockmodel +s_pop <- svds(latent) + +# sample a network conditional on the latent factors +A <- sample_sparse(latent) + +# singular value decomposition of the observed network +s_obs <- irlba(A, 5) + +# difference between population and sample singular values +s_pop$d - s_obs$d +``` + +That's really it! To run a short simulation study, most of the remaining work comes down to choosing various losses that you want to compute and implemeting them in. The following chunk computes several losses that we might care about: + +- sin Theta loss for subspace spanned by all the singular vectors +- sin Theta loss for each individual singular vector +- two-to-infinity loss of the singular vectors, both scaled and unscaled by the square root of the singular values +- the spectral norm of the difference between the population and sample singular values + +```{r setup} +sin_theta_distance <- function(u, v) { + s <- svd(crossprod(u, v)) + ncol(u) - sum(s$d^2) +} + +two_to_infinity_loss <- function(X, Y) { + s <- svd(crossprod(X, Y)) + rotation <- tcrossprod(s$v, s$u) + Yrot <- Y %*% rotation + diff <- X - Yrot + max(sqrt(Matrix::rowSums(diff^2))) +} + +loss_helper <- function(s_pop, s_obs) { + u_pop <- s_pop$u + u_obs <- s_obs$u + + d_pop <- s_pop$d + d_obs <- s_obs$d + + x_pop <- u_pop %*% sqrt(diag(d_pop)) + x_obs <- u_obs %*% sqrt(diag(d_obs)) + + # spectral norm difference of pop and obs, simplified computation + # since d_pop and d_obs are diagonal + spectral_loss <- max(abs(d_pop - d_obs)) + + k <- ncol(u_pop) + + column_sin_theta_loss <- map_dbl( + 1:k, + \(j) { + sin_theta_distance(u_pop[, j, drop = FALSE], u_obs[, j, drop = FALSE]) + } + ) + + tibble( + sin_theta_loss = sin_theta_distance(u_pop, u_obs), + u_two_inf_loss = two_to_infinity_loss(u_pop, u_obs), + x_two_inf_loss = two_to_infinity_loss(x_pop, x_obs), + spectral_loss = spectral_loss, + sin_theta_loss1 = column_sin_theta_loss[1], + sin_theta_loss2 = column_sin_theta_loss[2], + sin_theta_loss3 = column_sin_theta_loss[3], + sin_theta_loss4 = column_sin_theta_loss[4], + sin_theta_loss5 = column_sin_theta_loss[5] + ) +} +``` + +With these tools in handle, we can define a simulation runner. Since this is just a basic simulation, we aren't too worried about computational efficiency, and we create a grid of sample sizes, crossed with replication indices. Then we compute out hearts out. + +```{r} + +run_simulation <- function(model, num_reps = 30) { + expand_grid( + n = c(100, 180, 330, 600, 1100, 2000), + reps = 1:num_reps + ) |> + mutate( + pop = map(n, model), + s_pop = map(pop, svds), + A = map(pop, sample_sparse), + s_obs = map(A, irlba, 5), # rank five svd, + loss = map2(s_pop, s_obs, loss_helper) + ) +} + +sims <- run_simulation(model_distinct) +sims +``` + +Now we'd like to summarize the results across the 30 different runs of the simulation, so we write a short helper to do this as well. + +```{r} +summarize_simulations <- function(sims) { + sims |> + unnest_wider(c(loss)) |> + select(contains("loss"), everything()) |> + summarize( + across(contains("loss"), mean), + .by = n + ) |> + pivot_longer( + contains("loss"), + names_to = "loss_type", + values_to = "loss" + ) |> + mutate( + loss_type = recode( + loss_type, + "sin_theta_loss" = "Sin Theta Loss", + "u_two_inf_loss" = "U two-inf", + "x_two_inf_loss" = "X two-inf", + "spectral_loss" = "Spectral", + "sin_theta_loss1" = "Sin Theta Loss (column 1)", + "sin_theta_loss2" = "Sin Theta Loss (column 2)", + "sin_theta_loss3" = "Sin Theta Loss (column 3)", + "sin_theta_loss4" = "Sin Theta Loss (column 4)", + "sin_theta_loss5" = "Sin Theta Loss (column 5)" + ) + ) +} + +results <- summarize_simulations(sims) +results +``` + +And now that we have our results, all that remains is to plot them. + +```{r} +plot_results <- function(results) { + results |> + ggplot() + + aes(x = n, y = loss, color = loss_type) + + geom_line() + + scale_x_log10(labels = label_log(digits = 2)) + + scale_y_log10(labels = label_log(digits = 2)) + + scale_color_viridis_d(begin = 0.15, end = 0.85, guide = "none") + + facet_wrap(vars(loss_type), scales = "free_y") + + labs( + title = "Estimation error in adjacency spectral embedding", + y = "Estimation error (log scale)", + x = "Number of nodes (log scale)" + ) + + theme_minimal() +} + +plot_results(results) +``` + +Here we see that all of our losses are decreasing except for the spectral loss, which is exactly what we would expect from theory. + +Since we defined helpers to run the simulations for us, when we want to see results for the model with repeated eigenvalues, it's as straightforward as: + +```{r} +model_repeated |> + run_simulation() |> + summarize_simulations() |> + plot_results() +``` + +Now we see that we can only recover the subspace spanned by the singular vectors, but not the singular vectors themselves, exactly as expected. + +Here's one last trick. We might also be interested in using a different estimator, the Laplacian Spectral Embedding, to recover the singular value decomposition of the expected (degree-normalized, regularized) graph Laplacian. This is also straightforward using `fastRG`. + +```{r} +graph_laplacian <- function(A) { + degrees <- rowSums(A) + tau <- mean(degrees) + DA <- rowScale(A, 1 / sqrt(degrees + tau)) + colScale(DA, 1 / sqrt(degrees + tau)) +} + +svds_laplacian <- function(pop) { + # regularize by expected mean degree (scalar) + tau <- expected_degree(pop) + # vector!! + degrees_pop <- expected_degrees(pop) + + # rescale X in the population model so that XSX' is the expected + # graph Laplacian. we can't use this to sample networks anymore, but + # we can use it to bootstrap a population SVD calculation + pop$X <- rowScale(pop$X, 1 / sqrt(degrees_pop + tau)) + svds(pop) +} + +run_laplacian_simulation <- function(model, num_reps = 30) { + expand_grid( + n = c(100, 180, 330, 600, 1100, 2000), + reps = 1:num_reps + ) |> + mutate( + pop = map(n, model), + s_pop = map(pop, svds_laplacian), + A = map(pop, sample_sparse), + L = map(A, graph_laplacian), + s_obs = map(L, irlba, 5), # rank five svd, + loss = map2(s_pop, s_obs, loss_helper) + ) +} +``` + +With our helpers defined we're once again off to the races and see that Laplacian Spectral Embedding also does well + +```{r} +model_distinct |> + run_laplacian_simulation() |> + summarize_simulations() |> + plot_results() +``` \ No newline at end of file From bdcc3dddd4e99aeff73657081415dbfba5f15133 Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:06:37 -0800 Subject: [PATCH 07/12] Re-document --- NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 756bc17..3169658 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(eigs_sym,undirected_factor_model) S3method(expectation,directed_factor_model) S3method(expectation,undirected_factor_model) S3method(expected_degree,undirected_factor_model) +S3method(expected_degrees,undirected_factor_model) S3method(expected_density,directed_factor_model) S3method(expected_density,undirected_factor_model) S3method(expected_edges,directed_factor_model) @@ -40,6 +41,7 @@ export(eigs_sym) export(erdos_renyi) export(expectation) export(expected_degree) +export(expected_degrees) export(expected_density) export(expected_edges) export(expected_in_degree) From 40e43859daf223cfd91672b1f5269dba7df78988 Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:12:19 -0800 Subject: [PATCH 08/12] Fixes #19 for real. Closes #43. Closes #33. Closes #31. --- NEWS.md | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index eb4161f..4b1a4be 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,32 @@ # fastRG (development version) -- Added option to specify precise number of nodes in each block of a `dcsbm()` or `sbm()` via the `block_sizes` argument. This makes it easier to construct blockmodels with exactly repeated eigenvalues. -- The default behavior of `dcsbm()`, `sbm()` and `planted_partition()` has changed: when `block_sizes` or `pi` is unspecified, the new default is to balance block sizes as evenly as possible. Previously, `pi` was set to a constant vector, balancing block sizes in expectation only. -- Specifying both `k` and `B` in `dcsbm()` and `sbm()` now results in an error; only specify one of these arguments. +## Major changes + +- This release fixes a long-standing inconsistency in how degrees are counted, which resulted in sampling twice as many edges as desired in `undirected_factor_model()`. See below for details (#19). This also caused population singular values for undirected factors models to be off by a factor of 2 (#31). + +## Breaking changes + +- Specifying both `k` and `B` in `dcsbm()` and `sbm()` now results in an error. You should only specify one of these arguments. + +- It is now possible to specify the precise number of nodes in each block of a `dcsbm()` (and subclasses `sbm()` and `planted_partition()`) via the `block_sizes` argument. This makes it easier to construct blockmodels with exactly repeated eigenvalues. Additionally, the default behavior is now to use this argument and to balance block sizes as evenly as possible. Previously, the default behavior was to sample blocks memberships with equal probability. + +## Non-breaking changes + +- Added `vignette("consistency")` demonstratingn how to check consistency of spectral estimators using `fastRG` for sampling and population spectra computations (#33, #43) + +## Details about degree over-sampling bug and the fix + +The `fastRG` sampling algorithm, as implemented in `sample_edgelist.matrix()`, is fundamentally a sampler for asymmetric, directed networks with conditional expectation $\mathbb E[A \mid X, S, Y] = X S Y^top \in \mathbb R^{n_1 \times n_2}$. That is, you can think of the sampler as a very efficient procedure for iterating through $i = 1, ..., n_1$ and $j = 1, ..., n_2$ and sampling from a Poisson with rate $(X S Y^\top)_{ij}$. + +However, we would also like to use this same sampler to sample from undirected networks. In an undirected networks, we have the conditional expectation $\mathbb E[A \mid X, S, Y] = Z B Y^top \in \mathbb R^{n \times n}$ is a square matrix with $(X S Y^\top)_{ij} = (X S Y^\top)_{ji}$. To sample from this matrix, you want to sample the upper triangle of $A$ from a Poisson with rate $(X S Y^\top)_{ij}$ for all $1 \le i \le j \le n$, and then symmetrize $A$. + +Since the `fastRG` algorithm samples $A_{ij}$ for all $i, j$, not just the upper triangle of $A$, we use a trick to sample from undirected networks. First, we force the conditional expectation to the symmetric by symmetrizing $S$. Then, we still sample for all $i, j$. To set $A_{ij}$ we sample once from a Poisson with rate $(X S Y^\top)_{ij}$ and once from a Poisson with rate $(X S Y^\top)_{ji}$ (these rates are equal by symmetry!). Then we set $A_{ij} = A_{ji}$ to the sum of these sample Poisson random variables. The issue is that this doubles the expected value of $A_{ij} = A_{ji}$ and so we sample twice as many edges as we should. Up until this release of `fastRG`, we've unfortunately been doing this double sampling (see #19). + +In this release, we fix this over-sampling. The key is that we divide $S$ by two at sampling time. We do not modify $S$ at all in the `undirected_factor_model()`! You can always use $X S X^\top$ to compute the expected value of $A$. This new change *only affects sampling*. + +That is, instead of passing the $S$ from an `undirected_factor_model()` to the sampler `sample_edgelist.matrix()`, we pass $S / 2$ (see `sample_edgelist.undirected_factor_model()`). This fixes double sampling on the off-diagonal of $A$. The downside is that we're now undersampling by half the diagonal of $A$. I'm assuming that for most use cases this doesn't matter. We could correct for this undersampling of the diagonal of $A$, so please open an issue if self-loops are important to your project. + +As a consequence of this change, $A$ and $\mathbb E[A | X, S, Y]$ show now be on the same scale, rather than off by a factor of 2. Importantly, the spectrums should match up now, so you can now use `fastRG` to check how closely you're recovering the spectrum of the your model. See `vignette("consistency")` for a quick demonstration show consistency of spectral estimates. # fastRG 0.3.3 From ef682a0cf13fc69839d483152776fefd5d064c9a Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:17:37 -0800 Subject: [PATCH 09/12] Fix typos in NEWS --- NEWS.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4b1a4be..85a9719 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,21 +12,21 @@ ## Non-breaking changes -- Added `vignette("consistency")` demonstratingn how to check consistency of spectral estimators using `fastRG` for sampling and population spectra computations (#33, #43) +- Added `vignette("consistency")` demonstrating how to check consistency of spectral estimators using `fastRG` for sampling and population spectra computations (#33, #43) ## Details about degree over-sampling bug and the fix -The `fastRG` sampling algorithm, as implemented in `sample_edgelist.matrix()`, is fundamentally a sampler for asymmetric, directed networks with conditional expectation $\mathbb E[A \mid X, S, Y] = X S Y^top \in \mathbb R^{n_1 \times n_2}$. That is, you can think of the sampler as a very efficient procedure for iterating through $i = 1, ..., n_1$ and $j = 1, ..., n_2$ and sampling from a Poisson with rate $(X S Y^\top)_{ij}$. +The `fastRG` sampling algorithm, as implemented in `sample_edgelist.matrix()`, is fundamentally a sampler for asymmetric, directed networks with conditional expectation $\mathbb E[A \mid X, S, Y] = X S Y^\top \in \mathbb R^{n_1 \times n_2}$. That is, you can think of the sampler as a very efficient procedure for iterating through $i = 1, ..., n_1$ and $j = 1, ..., n_2$ and sampling from a Poisson with rate $(X S Y^\top)_{ij}$. -However, we would also like to use this same sampler to sample from undirected networks. In an undirected networks, we have the conditional expectation $\mathbb E[A \mid X, S, Y] = Z B Y^top \in \mathbb R^{n \times n}$ is a square matrix with $(X S Y^\top)_{ij} = (X S Y^\top)_{ji}$. To sample from this matrix, you want to sample the upper triangle of $A$ from a Poisson with rate $(X S Y^\top)_{ij}$ for all $1 \le i \le j \le n$, and then symmetrize $A$. +However, we would also like to use this same sampler to sample from undirected networks. In an undirected networks, the conditional expectation $\mathbb E[A \mid X, S] = X S X^\top \in \mathbb R^{n \times n}$ is a square matrix with $(X S X^\top)_{ij} = (X S X^\top)_{ji}$. To sample from this matrix, it's typical to sample the upper triangle of $A$ from a Poisson with rate $(X S X^\top)_{ij}$ for all $1 \le i \le j \le n$, and then symmetrize $A$. -Since the `fastRG` algorithm samples $A_{ij}$ for all $i, j$, not just the upper triangle of $A$, we use a trick to sample from undirected networks. First, we force the conditional expectation to the symmetric by symmetrizing $S$. Then, we still sample for all $i, j$. To set $A_{ij}$ we sample once from a Poisson with rate $(X S Y^\top)_{ij}$ and once from a Poisson with rate $(X S Y^\top)_{ji}$ (these rates are equal by symmetry!). Then we set $A_{ij} = A_{ji}$ to the sum of these sample Poisson random variables. The issue is that this doubles the expected value of $A_{ij} = A_{ji}$ and so we sample twice as many edges as we should. Up until this release of `fastRG`, we've unfortunately been doing this double sampling (see #19). +Since the `fastRG` algorithm samples $A_{ij}$ for all $i, j$, not just the upper triangle of $A$, we use a trick to sample from undirected networks. First, we force the conditional expectation to the symmetric by symmetrizing $S$. Then, we still sample for all $i, j$. To set $A_{ij}$ we sample once from a Poisson with rate $(X S X^\top)_{ij}$ and once from a Poisson with rate $(X S X^\top)_{ji}$ (these rates are equal by symmetry!). Then we set $A_{ij} = A_{ji}$ to the sum of these Poisson random variables. The issue is that this doubles the expected value of $A_{ij} = A_{ji}$ and so we sample twice as many edges as we should. Up until this release of `fastRG`, we've unfortunately been doing this double sampling in undirected networks (#19). In this release, we fix this over-sampling. The key is that we divide $S$ by two at sampling time. We do not modify $S$ at all in the `undirected_factor_model()`! You can always use $X S X^\top$ to compute the expected value of $A$. This new change *only affects sampling*. That is, instead of passing the $S$ from an `undirected_factor_model()` to the sampler `sample_edgelist.matrix()`, we pass $S / 2$ (see `sample_edgelist.undirected_factor_model()`). This fixes double sampling on the off-diagonal of $A$. The downside is that we're now undersampling by half the diagonal of $A$. I'm assuming that for most use cases this doesn't matter. We could correct for this undersampling of the diagonal of $A$, so please open an issue if self-loops are important to your project. -As a consequence of this change, $A$ and $\mathbb E[A | X, S, Y]$ show now be on the same scale, rather than off by a factor of 2. Importantly, the spectrums should match up now, so you can now use `fastRG` to check how closely you're recovering the spectrum of the your model. See `vignette("consistency")` for a quick demonstration show consistency of spectral estimates. +As a consequence of this change, $A$ and $\mathbb E[A | X, S]$ show now be on the same scale, rather than off by a factor of 2. Importantly, the spectrums should match up now, so you can now use `fastRG` to check how closely you're recovering the spectrum of the your model. See `vignette("consistency")` for a quick demonstration showing consistency of spectral estimates. # fastRG 0.3.3 From 4379fecb4b7e7b2d8657695dc303d6dd4ae08dfc Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:20:05 -0800 Subject: [PATCH 10/12] Explicitly load matrix for rowScale/colScale --- vignettes/consistency.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/consistency.Rmd b/vignettes/consistency.Rmd index 8325a43..fb76858 100644 --- a/vignettes/consistency.Rmd +++ b/vignettes/consistency.Rmd @@ -25,6 +25,7 @@ library(dplyr) library(ggplot2) library(fastRG) library(irlba) +library(Matrix) library(purrr) library(scales) library(tidyr) From 64980e0790920d13d806e077b690ee82671f6e11 Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:25:57 -0800 Subject: [PATCH 11/12] Air --- R/directed_dcsbm.R | 69 +++++++++++++-------- R/directed_erdos_renyi.R | 33 +++++++--- R/directed_factor_model.R | 34 ++++++---- R/expected-spectra.R | 37 ++++++----- R/sample_igraph.R | 21 ++++--- R/sample_sparse.R | 15 +++-- R/sample_tidygraph.R | 15 +++-- R/undirected_chung_lu.R | 19 +++--- R/undirected_dcsbm.R | 58 ++++++++++------- R/undirected_erdos_renyi.R | 20 ++++-- R/undirected_factor_model.R | 31 +++++---- R/undirected_mmsbm.R | 46 ++++++++------ R/undirected_overlapping_sbm.R | 47 ++++++++------ R/undirected_planted_partition.R | 30 ++++----- R/undirected_sbm.R | 18 +++--- tests/testthat/test-allow_self_loops.R | 11 +++- tests/testthat/test-directedness.R | 3 +- tests/testthat/test-poisson_edges.R | 14 ++++- tests/testthat/test-retain-isolated-nodes.R | 4 +- tests/testthat/test-sampling-index-bug.R | 18 +++++- 20 files changed, 345 insertions(+), 198 deletions(-) diff --git a/R/directed_dcsbm.R b/R/directed_dcsbm.R index 70c937e..b3ae2e4 100644 --- a/R/directed_dcsbm.R +++ b/R/directed_dcsbm.R @@ -1,14 +1,17 @@ new_directed_dcsbm <- function( - X, S, Y, - theta_out, - theta_in, - z_out, - z_in, - pi_out, - pi_in, - sorted, - ..., - subclass = character()) { + X, + S, + Y, + theta_out, + theta_in, + z_out, + z_in, + pi_out, + pi_in, + sorted, + ..., + subclass = character() +) { subclass <- c(subclass, "directed_dcsbm") dcsbm <- directed_factor_model(X, S, Y, ..., subclass = subclass) dcsbm$theta_out <- theta_out @@ -334,16 +337,20 @@ validate_directed_dcsbm <- function(x) { #' population_svd <- svds(ddcsbm) #' directed_dcsbm <- function( - n = NULL, - theta_out = NULL, theta_in = NULL, - k_out = NULL, k_in = NULL, B = NULL, - ..., - pi_out = rep(1 / k_out, k_out), - pi_in = rep(1 / k_in, k_in), - sort_nodes = TRUE, - force_identifiability = TRUE, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + n = NULL, + theta_out = NULL, + theta_in = NULL, + k_out = NULL, + k_in = NULL, + B = NULL, + ..., + pi_out = rep(1 / k_out, k_out), + pi_in = rep(1 / k_in, k_in), + sort_nodes = TRUE, + force_identifiability = TRUE, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { # NOTE: # - X corresponds to outgoing communities, outgoing edges and rows of A # - Y corresponds to incoming communities, incoming edges and columns of A @@ -462,8 +469,6 @@ directed_dcsbm <- function( z_in <- sort(z_in) } - - if (k_out > 1) { X <- sparse.model.matrix(~ z_out + 0) } else { @@ -525,7 +530,10 @@ directed_dcsbm <- function( #' @export print.directed_dcsbm <- function(x, ...) { cat(glue("Directed Degree-Corrected Stochastic Blockmodel\n", .trim = FALSE)) - cat(glue("-----------------------------------------------\n\n", .trim = FALSE)) + cat(glue( + "-----------------------------------------------\n\n", + .trim = FALSE + )) sorted <- if (x$sorted) "arranged by block" else "not arranged by block" @@ -550,7 +558,16 @@ print.directed_dcsbm <- function(x, ...) { cat("Allow self loops:", as.character(x$allow_self_loops), "\n\n") cat(glue("Expected edges: {round(expected_edges(x))}\n", .trim = FALSE)) - cat(glue("Expected in degree: {round(expected_out_degree(x), 1)}\n", .trim = FALSE)) - cat(glue("Expected out degree: {round(expected_in_degree(x), 1)}\n", .trim = FALSE)) - cat(glue("Expected density: {round(expected_density(x), 5)}\n", .trim = FALSE)) + cat(glue( + "Expected in degree: {round(expected_out_degree(x), 1)}\n", + .trim = FALSE + )) + cat(glue( + "Expected out degree: {round(expected_in_degree(x), 1)}\n", + .trim = FALSE + )) + cat(glue( + "Expected density: {round(expected_density(x), 5)}\n", + .trim = FALSE + )) } diff --git a/R/directed_erdos_renyi.R b/R/directed_erdos_renyi.R index dfc33b9..ecd9863 100644 --- a/R/directed_erdos_renyi.R +++ b/R/directed_erdos_renyi.R @@ -1,6 +1,17 @@ -new_directed_erdos_renyi <- function(X, S, Y, p, poisson_edges, allow_self_loops, ...) { +new_directed_erdos_renyi <- function( + X, + S, + Y, + p, + poisson_edges, + allow_self_loops, + ... +) { er <- directed_factor_model( - X, S, Y, ..., + X, + S, + Y, + ..., subclass = "directed_erdos_renyi", poisson_edges = poisson_edges, allow_self_loops = allow_self_loops @@ -57,14 +68,18 @@ validate_directed_erdos_renyi <- function(x) { #' A #' directed_erdos_renyi <- function( - n, ..., p = NULL, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + n, + ..., + p = NULL, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { X <- Matrix(1, nrow = n, ncol = 1) Y <- Matrix(1, nrow = n, ncol = 1) - if (is.null(p) && is.null(expected_in_degree) && - is.null(expected_out_degree)) { + if ( + is.null(p) && is.null(expected_in_degree) && is.null(expected_out_degree) + ) { stop( "Must specify either `p`, `expected_in_degree` or ", " `expected_out_degree`.", @@ -79,7 +94,9 @@ directed_erdos_renyi <- function( S <- matrix(p, nrow = 1, ncol = 1) er <- new_directed_erdos_renyi( - X, S, Y, + X, + S, + Y, p = p, poisson_edges = poisson_edges, allow_self_loops = allow_self_loops, diff --git a/R/directed_factor_model.R b/R/directed_factor_model.R index c3e730d..0e084cd 100644 --- a/R/directed_factor_model.R +++ b/R/directed_factor_model.R @@ -1,9 +1,12 @@ new_directed_factor_model <- function( - X, S, Y, - poisson_edges, - allow_self_loops, - ..., - subclass = character()) { + X, + S, + Y, + poisson_edges, + allow_self_loops, + ..., + subclass = character() +) { rlang::check_dots_unnamed() n <- nrow(X) @@ -160,13 +163,16 @@ validate_directed_factor_model <- function(x) { #' fm2 #' directed_factor_model <- function( - X, S, Y, - ..., - expected_in_degree = NULL, - expected_out_degree = NULL, - expected_density = NULL, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + X, + S, + Y, + ..., + expected_in_degree = NULL, + expected_out_degree = NULL, + expected_density = NULL, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { X <- Matrix(X) S <- Matrix(S) Y <- Matrix(Y) @@ -186,7 +192,9 @@ directed_factor_model <- function( } fm <- new_directed_factor_model( - X, S, Y, + X, + S, + Y, poisson_edges = poisson_edges, allow_self_loops = allow_self_loops, ... diff --git a/R/expected-spectra.R b/R/expected-spectra.R index 9863904..f5af2fe 100644 --- a/R/expected-spectra.R +++ b/R/expected-spectra.R @@ -16,10 +16,13 @@ RSpectra::eigs_sym #' @method eigs_sym undirected_factor_model #' @export eigs_sym.undirected_factor_model <- function( - A, k = A$k, - which = "LM", sigma = NULL, - opts = list(), - ...) { + A, + k = A$k, + which = "LM", + sigma = NULL, + opts = list(), + ... +) { rlang::check_installed("RSpectra") Ax <- function(x, args) as.numeric(args$X %*% (args$SXt %*% x)) @@ -37,12 +40,13 @@ eigs_sym.undirected_factor_model <- function( #' @method svds undirected_factor_model #' @export svds.undirected_factor_model <- function( - A, - k = A$k, - nu = k, - nv = k, - opts = list(), - ...) { + A, + k = A$k, + nu = k, + nv = k, + opts = list(), + ... +) { rlang::check_installed("RSpectra") Ax <- function(x, args) { @@ -76,12 +80,13 @@ svds.undirected_factor_model <- function( #' @method svds directed_factor_model #' @export svds.directed_factor_model <- function( - A, - k = min(A$k1, A$k2), - nu = k, - nv = k, - opts = list(), - ...) { + A, + k = min(A$k1, A$k2), + nu = k, + nv = k, + opts = list(), + ... +) { rlang::check_installed("RSpectra") Ax <- function(x, args) { diff --git a/R/sample_igraph.R b/R/sample_igraph.R index 3097cf6..18fad06 100644 --- a/R/sample_igraph.R +++ b/R/sample_igraph.R @@ -25,8 +25,9 @@ #' @family samplers #' sample_igraph <- function( - factor_model, - ...) { + factor_model, + ... +) { rlang::check_dots_unnamed() rlang::check_installed("igraph", "to return graphs as `igraph` objects.") @@ -37,8 +38,9 @@ sample_igraph <- function( #' @rdname sample_igraph #' @export sample_igraph.undirected_factor_model <- function( - factor_model, - ...) { + factor_model, + ... +) { edgelist <- sample_edgelist(factor_model, ...) nodes <- tibble(name = 1:nrow(factor_model$X)) igraph::graph_from_data_frame(edgelist, directed = FALSE, vertices = nodes) @@ -47,12 +49,17 @@ sample_igraph.undirected_factor_model <- function( #' @rdname sample_igraph #' @export sample_igraph.directed_factor_model <- function( - factor_model, - ...) { + factor_model, + ... +) { if (factor_model$n == factor_model$d) { edgelist <- sample_edgelist(factor_model, ...) nodes <- tibble(name = 1:nrow(factor_model$X)) - one_mode <- igraph::graph_from_data_frame(edgelist, directed = TRUE, vertices = nodes) + one_mode <- igraph::graph_from_data_frame( + edgelist, + directed = TRUE, + vertices = nodes + ) return(one_mode) } diff --git a/R/sample_sparse.R b/R/sample_sparse.R index 0b5ec95..f7b2fe7 100644 --- a/R/sample_sparse.R +++ b/R/sample_sparse.R @@ -22,8 +22,9 @@ #' @family samplers #' sample_sparse <- function( - factor_model, - ...) { + factor_model, + ... +) { rlang::check_dots_unnamed() UseMethod("sample_sparse") } @@ -31,8 +32,9 @@ sample_sparse <- function( #' @rdname sample_sparse #' @export sample_sparse.undirected_factor_model <- function( - factor_model, - ...) { + factor_model, + ... +) { # to construct a symmetric sparseMatrix, we only pass in elements # of either the upper or lower diagonal (otherwise we'll get an error) # so we're going to sample as we want a directed graph, and then @@ -90,8 +92,9 @@ sample_sparse.undirected_factor_model <- function( #' @rdname sample_sparse #' @export sample_sparse.directed_factor_model <- function( - factor_model, - ...) { + factor_model, + ... +) { edgelist <- sample_edgelist(factor_model, ...) n <- factor_model$n diff --git a/R/sample_tidygraph.R b/R/sample_tidygraph.R index b281af9..18174a0 100644 --- a/R/sample_tidygraph.R +++ b/R/sample_tidygraph.R @@ -25,8 +25,9 @@ #' @family samplers #' sample_tidygraph <- function( - factor_model, - ...) { + factor_model, + ... +) { rlang::check_dots_unnamed() rlang::check_installed("igraph", "to return graphs as `tidygraph` objects.") @@ -36,8 +37,9 @@ sample_tidygraph <- function( #' @rdname sample_tidygraph #' @export sample_tidygraph.undirected_factor_model <- function( - factor_model, - ...) { + factor_model, + ... +) { ig <- sample_igraph(factor_model, ...) tidygraph::as_tbl_graph(ig, directed = FALSE) } @@ -45,8 +47,9 @@ sample_tidygraph.undirected_factor_model <- function( #' @rdname sample_tidygraph #' @export sample_tidygraph.directed_factor_model <- function( - factor_model, - ...) { + factor_model, + ... +) { ig <- sample_igraph(factor_model, ...) tidygraph::as_tbl_graph(ig, directed = TRUE) } diff --git a/R/undirected_chung_lu.R b/R/undirected_chung_lu.R index 2150ac4..56f5fbe 100644 --- a/R/undirected_chung_lu.R +++ b/R/undirected_chung_lu.R @@ -59,12 +59,14 @@ #' edgelist #' chung_lu <- function( - n = NULL, theta = NULL, - ..., - sort_nodes = TRUE, - poisson_edges = TRUE, - allow_self_loops = TRUE, - force_identifiability = FALSE) { + n = NULL, + theta = NULL, + ..., + sort_nodes = TRUE, + poisson_edges = TRUE, + allow_self_loops = TRUE, + force_identifiability = FALSE +) { ### degree heterogeneity parameters if (is.null(n) && is.null(theta)) { @@ -105,7 +107,10 @@ chung_lu <- function( #' @export print.undirected_chung_lu <- function(x, ...) { cat(glue("Undirected Chung-Lu Graph\n", .trim = FALSE)) - cat(glue("-------------------------------------------------\n\n", .trim = FALSE)) + cat(glue( + "-------------------------------------------------\n\n", + .trim = FALSE + )) sorted <- if (x$sorted) "arranged by block" else "not arranged by block" diff --git a/R/undirected_dcsbm.R b/R/undirected_dcsbm.R index b86988b..0a82f65 100644 --- a/R/undirected_dcsbm.R +++ b/R/undirected_dcsbm.R @@ -1,11 +1,13 @@ new_undirected_dcsbm <- function( - X, S, - theta, - z, - pi, - sorted, - ..., - subclass = character()) { + X, + S, + theta, + z, + pi, + sorted, + ..., + subclass = character() +) { subclass <- c(subclass, "undirected_dcsbm") dcsbm <- undirected_factor_model(X, S, ..., subclass = subclass) dcsbm$theta <- theta @@ -267,15 +269,18 @@ validate_undirected_dcsbm <- function(x) { #' #' dcsbm <- function( - n = NULL, theta = NULL, - k = NULL, B = NULL, - ..., - block_sizes = NULL, - pi = NULL, - sort_nodes = TRUE, - force_identifiability = FALSE, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + n = NULL, + theta = NULL, + k = NULL, + B = NULL, + ..., + block_sizes = NULL, + pi = NULL, + sort_nodes = TRUE, + force_identifiability = FALSE, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { ### degree heterogeneity parameters if (is.null(n) && is.null(theta)) { @@ -327,7 +332,6 @@ dcsbm <- function( ### block membership if (is.null(block_sizes) && is.null(pi)) { - base_value <- floor(n / k) remainder <- n %% k num_base_values <- k - remainder @@ -342,16 +346,17 @@ dcsbm <- function( # for sorting by block size later pi <- block_sizes / n } else if (!is.null(block_sizes)) { - - if(sum(block_sizes) != n) { - stop("Sum of `block_sizes` must equal `n` or `length(theta)`.", call. = FALSE) + if (sum(block_sizes) != n) { + stop( + "Sum of `block_sizes` must equal `n` or `length(theta)`.", + call. = FALSE + ) } z <- sample(rep(1:k, times = block_sizes)) # for sorting by block size later pi <- block_sizes / n - } else if (!is.null(pi)) { if (!is.null(pi) && !is.null(block_sizes)) { stop("Length of `pi` must match dimensions of `B`.", call. = FALSE) @@ -365,7 +370,6 @@ dcsbm <- function( stop("All elements of `pi` must be >= 0.", call. = FALSE) } - z <- sample(k, n, replace = TRUE, prob = pi) } else { stop("Must at most one of `block_sizes` and `pi`.", call. = FALSE) @@ -428,8 +432,14 @@ dcsbm <- function( #' @method print undirected_dcsbm #' @export print.undirected_dcsbm <- function(x, ...) { - cat(glue("Undirected Degree-Corrected Stochastic Blockmodel\n", .trim = FALSE)) - cat(glue("-------------------------------------------------\n\n", .trim = FALSE)) + cat(glue( + "Undirected Degree-Corrected Stochastic Blockmodel\n", + .trim = FALSE + )) + cat(glue( + "-------------------------------------------------\n\n", + .trim = FALSE + )) sorted <- if (x$sorted) "arranged by block" else "not arranged by block" diff --git a/R/undirected_erdos_renyi.R b/R/undirected_erdos_renyi.R index 649c137..5049a9b 100644 --- a/R/undirected_erdos_renyi.R +++ b/R/undirected_erdos_renyi.R @@ -1,6 +1,8 @@ new_undirected_erdos_renyi <- function(X, S, p, ...) { er <- undirected_factor_model( - X, S, ..., + X, + S, + ..., subclass = "undirected_erdos_renyi" ) er$p <- p @@ -54,9 +56,12 @@ validate_undirected_erdos_renyi <- function(x) { #' A #' erdos_renyi <- function( - n, ..., p = NULL, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + n, + ..., + p = NULL, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { X <- Matrix(1, nrow = n, ncol = 1) if (is.null(p) && is.null(expected_degree)) { @@ -69,8 +74,11 @@ erdos_renyi <- function( B <- matrix(p) - er <- new_undirected_erdos_renyi(X, B, - p = p, ..., + er <- new_undirected_erdos_renyi( + X, + B, + p = p, + ..., poisson_edges = poisson_edges, allow_self_loops = allow_self_loops ) diff --git a/R/undirected_factor_model.R b/R/undirected_factor_model.R index f85de2e..1452d9f 100644 --- a/R/undirected_factor_model.R +++ b/R/undirected_factor_model.R @@ -1,9 +1,11 @@ new_undirected_factor_model <- function( - X, S, - ..., - poisson_edges = TRUE, - allow_self_loops = TRUE, - subclass = character()) { + X, + S, + ..., + poisson_edges = TRUE, + allow_self_loops = TRUE, + subclass = character() +) { rlang::check_dots_unnamed() n <- nrow(X) @@ -143,12 +145,14 @@ validate_undirected_factor_model <- function(x) { #' svds(ufm2) #' undirected_factor_model <- function( - X, S, - ..., - expected_degree = NULL, - expected_density = NULL, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + X, + S, + ..., + expected_degree = NULL, + expected_density = NULL, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { X <- Matrix(X) S <- Matrix(S) @@ -159,7 +163,10 @@ undirected_factor_model <- function( ) } - ufm <- new_undirected_factor_model(X, S, ..., + ufm <- new_undirected_factor_model( + X, + S, + ..., poisson_edges = poisson_edges, allow_self_loops = allow_self_loops ) diff --git a/R/undirected_mmsbm.R b/R/undirected_mmsbm.R index 12487a1..7a8f81f 100644 --- a/R/undirected_mmsbm.R +++ b/R/undirected_mmsbm.R @@ -16,7 +16,6 @@ validate_undirected_mmsbm <- function(x) { stop("Elements of `theta` must be strictly positive.", call. = FALSE) } - if (min(values$S) < 0) { stop( "All elements of `S` must be non-negative.", @@ -35,13 +34,15 @@ validate_undirected_mmsbm <- function(x) { } new_undirected_mmsbm <- function( - X, S, - theta, - Z, - alpha, - sorted, - ..., - subclass = character()) { + X, + S, + theta, + Z, + alpha, + sorted, + ..., + subclass = character() +) { subclass <- c(subclass, "undirected_mmsbm") mmsbm <- undirected_factor_model(X, S, ..., subclass = subclass) mmsbm$theta <- theta @@ -181,14 +182,17 @@ new_undirected_mmsbm <- function( #' svds(custom_mmsbm)$d #' mmsbm <- function( - n = NULL, theta = NULL, - k = NULL, B = NULL, - ..., - alpha = rep(1, k), - sort_nodes = TRUE, - force_pure = TRUE, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + n = NULL, + theta = NULL, + k = NULL, + B = NULL, + ..., + alpha = rep(1, k), + sort_nodes = TRUE, + force_pure = TRUE, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { ### degree heterogeneity parameters if (is.null(n) && is.null(theta)) { @@ -286,8 +290,14 @@ mmsbm <- function( #' @method print undirected_mmsbm #' @export print.undirected_mmsbm <- function(x, ...) { - cat(glue("Undirected Degree-Corrected Mixed Membership Stochastic Blockmodel\n", .trim = FALSE)) - cat(glue("------------------------------------------------------------------\n\n", .trim = FALSE)) + cat(glue( + "Undirected Degree-Corrected Mixed Membership Stochastic Blockmodel\n", + .trim = FALSE + )) + cat(glue( + "------------------------------------------------------------------\n\n", + .trim = FALSE + )) sorted <- if (x$sorted) "arranged by block" else "not arranged by block" diff --git a/R/undirected_overlapping_sbm.R b/R/undirected_overlapping_sbm.R index c8a4b62..b58bfe8 100644 --- a/R/undirected_overlapping_sbm.R +++ b/R/undirected_overlapping_sbm.R @@ -1,13 +1,15 @@ new_undirected_overlapping_sbm <- function( - X, S, - Z, - poisson_edges = TRUE, - allow_self_loops = TRUE, - pi, - B, - sorted, - ..., - subclass = character()) { + X, + S, + Z, + poisson_edges = TRUE, + allow_self_loops = TRUE, + pi, + B, + sorted, + ..., + subclass = character() +) { subclass <- c(subclass, "undirected_overlapping_sbm") overlapping_sbm <- undirected_factor_model(X, S, ..., subclass = subclass) overlapping_sbm$Z <- Z @@ -199,13 +201,16 @@ validate_undirected_overlapping_sbm <- function(x) { #' population_eigs <- eigs_sym(custom_overlapping_sbm) #' overlapping_sbm <- function( - n, k = NULL, B = NULL, - ..., - pi = rep(1 / k, k), - sort_nodes = TRUE, - force_pure = TRUE, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + n, + k = NULL, + B = NULL, + ..., + pi = rep(1 / k, k), + sort_nodes = TRUE, + force_pure = TRUE, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { ### mixing matrix if (is.null(k) && is.null(B)) { @@ -291,8 +296,14 @@ overlapping_sbm <- function( #' @method print undirected_overlapping_sbm #' @export print.undirected_overlapping_sbm <- function(x, ...) { - cat(glue("Undirected Degree-Corrected Overlapping Blockmodel\n", .trim = FALSE)) - cat(glue("-------------------------------------------------\n\n", .trim = FALSE)) + cat(glue( + "Undirected Degree-Corrected Overlapping Blockmodel\n", + .trim = FALSE + )) + cat(glue( + "-------------------------------------------------\n\n", + .trim = FALSE + )) sorted <- if (x$sorted) "sorted" else "sorted" diff --git a/R/undirected_planted_partition.R b/R/undirected_planted_partition.R index c40d4bb..744bee6 100644 --- a/R/undirected_planted_partition.R +++ b/R/undirected_planted_partition.R @@ -24,7 +24,9 @@ validate_undirected_planted_partition <- function(x) { stop("`within_block` must be between zero and one.", call. = FALSE) } - if (!is.null(x$between_block) && (x$between_block < 0 || x$between_block > 1)) { + if ( + !is.null(x$between_block) && (x$between_block < 0 || x$between_block > 1) + ) { stop("`between_block` must be between zero and one.", call. = FALSE) } @@ -36,7 +38,6 @@ validate_undirected_planted_partition <- function(x) { stop("`b` must be greater than zero.", call. = FALSE) } - # diagonal B must be constant # off-diagonal of B must be constant @@ -120,18 +121,19 @@ validate_undirected_planted_partition <- function(x) { #' lazy_pp #' planted_partition <- function( - n, - k, - ..., - within_block = NULL, - between_block = NULL, - a = NULL, - b = NULL, - block_sizes = NULL, - pi = NULL, - sort_nodes = TRUE, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + n, + k, + ..., + within_block = NULL, + between_block = NULL, + a = NULL, + b = NULL, + block_sizes = NULL, + pi = NULL, + sort_nodes = TRUE, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { if (is.null(within_block)) { within_block <- a / n } diff --git a/R/undirected_sbm.R b/R/undirected_sbm.R index cc511ca..46a5459 100644 --- a/R/undirected_sbm.R +++ b/R/undirected_sbm.R @@ -100,14 +100,16 @@ validate_undirected_sbm <- function(x) { #' e$values #' sbm <- function( - n, - k = NULL, B = NULL, - ..., - block_sizes = NULL, - pi = NULL, - sort_nodes = TRUE, - poisson_edges = TRUE, - allow_self_loops = TRUE) { + n, + k = NULL, + B = NULL, + ..., + block_sizes = NULL, + pi = NULL, + sort_nodes = TRUE, + poisson_edges = TRUE, + allow_self_loops = TRUE +) { sbm <- dcsbm( theta = rep(1, n), k = k, diff --git a/tests/testthat/test-allow_self_loops.R b/tests/testthat/test-allow_self_loops.R index c81bc50..f29cd57 100644 --- a/tests/testthat/test-allow_self_loops.R +++ b/tests/testthat/test-allow_self_loops.R @@ -10,7 +10,8 @@ test_that("undirected graphs allow_self_loops = FALSE", { S <- matrix(runif(n = k * k, 0, .1), nrow = k) ufm <- undirected_factor_model( - X, S, + X, + S, expected_density = 0.1, allow_self_loops = FALSE ) @@ -44,7 +45,13 @@ test_that("directed graphs allow_self_loops = FALSE", { S <- matrix(runif(n = k1 * k2, 0, .1), nrow = k1, ncol = k2) Y <- matrix(rexp(n = k2 * d, 1), nrow = d) - fm <- directed_factor_model(X, S, Y, expected_density = 0.01, allow_self_loops = FALSE) + fm <- directed_factor_model( + X, + S, + Y, + expected_density = 0.01, + allow_self_loops = FALSE + ) edgelist <- sample_edgelist(fm) expect_false(any(edgelist$from == edgelist$to)) diff --git a/tests/testthat/test-directedness.R b/tests/testthat/test-directedness.R index 81c37a2..c487ac5 100644 --- a/tests/testthat/test-directedness.R +++ b/tests/testthat/test-directedness.R @@ -10,7 +10,8 @@ test_that("undirected graphs are undirected", { S <- matrix(runif(n = k * k, 0, .1), nrow = k) ufm <- undirected_factor_model( - X, S, + X, + S, expected_density = 0.1 ) diff --git a/tests/testthat/test-poisson_edges.R b/tests/testthat/test-poisson_edges.R index 32cb63a..9b8765e 100644 --- a/tests/testthat/test-poisson_edges.R +++ b/tests/testthat/test-poisson_edges.R @@ -13,8 +13,10 @@ test_that("undirected graphs poisson_edges = FALSE", { S <- matrix(runif(n = k * k, 0, .1), nrow = k) ufm <- undirected_factor_model( - X, S, - expected_density = 0.1, poisson_edges = FALSE + X, + S, + expected_density = 0.1, + poisson_edges = FALSE ) edgelist <- sample_edgelist(ufm) @@ -54,7 +56,13 @@ test_that("directed graphs poisson_edges = FALSE", { S <- matrix(runif(n = k1 * k2, 0, .1), nrow = k1, ncol = k2) Y <- matrix(rexp(n = k2 * d, 1), nrow = d) - fm <- directed_factor_model(X, S, Y, expected_density = 0.01, poisson_edges = FALSE) + fm <- directed_factor_model( + X, + S, + Y, + expected_density = 0.01, + poisson_edges = FALSE + ) edgelist <- sample_edgelist(fm) diff --git a/tests/testthat/test-retain-isolated-nodes.R b/tests/testthat/test-retain-isolated-nodes.R index e39af49..daf5ff5 100644 --- a/tests/testthat/test-retain-isolated-nodes.R +++ b/tests/testthat/test-retain-isolated-nodes.R @@ -122,7 +122,9 @@ test_that("sampling from rectangular directed factor models doesn't drop isolate Y <- matrix(rexp(n = k2 * d, 1), nrow = d) latent <- directed_factor_model( - X, S, Y, + X, + S, + Y, expected_in_degree = 1 ) diff --git a/tests/testthat/test-sampling-index-bug.R b/tests/testthat/test-sampling-index-bug.R index ff044ce..cae8f9f 100644 --- a/tests/testthat/test-sampling-index-bug.R +++ b/tests/testthat/test-sampling-index-bug.R @@ -21,7 +21,14 @@ test_that("degenerate test case 2 succeeds", { B <- diag(rep(0.5, k)) - dsbm <- directed_dcsbm(pi_in = pi, pi_out = pi, B = B, theta_in = rep(1, n), theta_out = rep(1, n), sort_nodes = TRUE) + dsbm <- directed_dcsbm( + pi_in = pi, + pi_out = pi, + B = B, + theta_in = rep(1, n), + theta_out = rep(1, n), + sort_nodes = TRUE + ) expect_silent(sample_sparse(dsbm)) }) @@ -40,7 +47,14 @@ test_that("degenerate test case 3 succeeds", { B <- matrix(0, nrow = k_out, ncol = k_in) diag(B) <- 0.5 - dsbm <- directed_dcsbm(pi_in = pi_in, pi_out = pi_out, B = B, theta_in = rep(1, n), theta_out = rep(1, n), sort_nodes = TRUE) + dsbm <- directed_dcsbm( + pi_in = pi_in, + pi_out = pi_out, + B = B, + theta_in = rep(1, n), + theta_out = rep(1, n), + sort_nodes = TRUE + ) expect_silent(sample_sparse(dsbm)) }) From 68dc40da773ba7ab48d383f1b7971bc9dcb4ecc5 Mon Sep 17 00:00:00 2001 From: Alex Hayes Date: Mon, 10 Nov 2025 17:30:34 -0800 Subject: [PATCH 12/12] Drop old release checks we're not tidyverse --- .github/workflows/R-CMD-check.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 69cfc6a..e8260c0 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -26,15 +26,12 @@ jobs: - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - # use 4.0 or 4.1 to check with rtools40's older compiler - - {os: windows-latest, r: 'oldrel-4'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} - {os: ubuntu-latest, r: 'oldrel-2'} - {os: ubuntu-latest, r: 'oldrel-3'} - - {os: ubuntu-latest, r: 'oldrel-4'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}