diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index 92e3f5f8..2bd1396f 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -40,6 +40,12 @@ jobs: pip install tox tox -e docs + - name: Edit CSS + run: | + CSSPATH=./docs/_build/html/_static/styles/furo.css + cat ${CSSPATH} | sed "s/ol,ul{\\([^}]*\\);padding-left:1\\.2rem}/ol,ul{\\1}/" > tmp.css + mv tmp.css ${CSSPATH} + - name: GH Pages Deployment uses: JamesIves/github-pages-deploy-action@v4 with: diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 8299b5af..ec9fcc1c 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -38,6 +38,11 @@ jobs: python -m pip install setuptools python -c "import setup; print(setup.build_igraph(None));" + - name: Extract examples from docstrings + run: | + echo "def test_docstrings():" > tests/test_docstrings.py + cat src/scranpy/*.py | sed "s/^/#/" | sed "s/^# *>>> / /" >> tests/test_docstrings.py + - name: Test with tox run: | export SCRANPY_INSTALLED_PATH=$(pwd)/installed diff --git a/.gitignore b/.gitignore index d721274a..543443ff 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,5 @@ extern/build-* extern/igraph-* extern/*.tar.gz src/scranpy/lib_scranpy.py + +tests/test_docstrings.py diff --git a/setup.cfg b/setup.cfg index c36d6167..758a6ea5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -76,8 +76,8 @@ testing = setuptools pytest pytest-cov - singlecellexperiment>=0.4.0 - summarizedexperiment>=0.4.0 + singlecellexperiment>=0.6.0 + summarizedexperiment>=0.6.3 scrnaseq>=0.3.1 scipy diff --git a/src/scranpy/adt_quality_control.py b/src/scranpy/adt_quality_control.py index 3fdc18a3..63dfbc44 100644 --- a/src/scranpy/adt_quality_control.py +++ b/src/scranpy/adt_quality_control.py @@ -29,11 +29,13 @@ def compute_adt_qc_metrics( - A list of arrays. Each array corresponds to an ADT subset and can either contain boolean or integer values. + - For booleans, the sequence should be of length equal to the number of rows, and values should be truthy for rows that belong in the subset. If the sequence contains booleans, it should not contain any other type. - For integers, the value is the row index of an ADT in the subset. - For strings, the value is the name of an ADT in the subset. This should match at least one element in ``row_names``. + - A dictionary where keys are the names of each ADT subset and the values are arrays as described above. - A :py:class:`~biocutils.NamedList.NamedList` where each element is an array as described above, possibly with names. @@ -55,7 +57,7 @@ def compute_adt_qc_metrics( Each column is a double-precision NumPy array that contains the sum of counts for the corresponding subset in each cell. References: - The ``compute_adt_qc_metrics`` function in the `scran_qc `_ C++ library, which describes the rationale behind these QC metrics. + The ``compute_adt_qc_metrics`` function in the `scran_qc`_ C++ library, which describes the rationale behind these QC metrics. Examples: >>> import numpy @@ -107,23 +109,23 @@ def suggest_adt_qc_thresholds( Number of MADs from the median to define the threshold for outliers in each QC metric. Returns: - If ``block = None``, a :py:class:`~biocutils.NamedList.NamedList` is returned, containing: + If ``block = None``, a :py:class:`~biocutils.NamedList.NamedList` is returned, containing the following entries. - - ``detected``, a number specifying the lower threshold on the number of detected ADTs. - - ``subsets``, a :py:class:`~biocutils.FloatList.FloatList` of length equal to the number of control subsets (and named accordingly). + - ``detected``: a number specifying the lower threshold on the number of detected ADTs. + - ``subsets``: a :py:class:`~biocutils.FloatList.FloatList` of length equal to the number of control subsets (and named accordingly). Each entry represents the upper bound on the sum of counts in the corresponding control subset. - If ``block`` is provided, the NamedList instead contains: + If ``block`` is provided, the ``NamedList`` instead contains: - - ``detected``, a FloatList of length equal to the number of blocks (and named accordingly). + - ``detected``, a ``FloatList`` of length equal to the number of blocks (and named accordingly). Each entry represents the lower threshold on the number of detected ADTs in the corresponding block. - - ``subset_sum``, a NamedList of length equal to the number of control subsets. - Each entry is another FloatList that contains the upper threshold on the sum of counts for that subset in each block. + - ``subset_sum``, a ``NamedList`` of length equal to the number of control subsets. + Each entry is another ``FloatList`` that contains the upper threshold on the sum of counts for that subset in each block. - ``block_ids``, a list containing the unique levels of the blocking factor. This is in the same order as the blocks in ``detected`` and ``subset_sum``. References: - The ``compute_adt_qc_filters`` and ``compute_adt_qc_filters_blocked`` functions in the `scran_qc `_ C++ library, + The ``compute_adt_qc_filters`` and ``compute_adt_qc_filters_blocked`` functions in the `scran_qc`_ C++ library, which describe the rationale behind the suggested filters. Examples: @@ -169,8 +171,8 @@ def suggest_adt_qc_thresholds( def filter_adt_qc_metrics( - thresholds: biocframe.BiocFrame, - metrics: biocutils.NamedList, + thresholds: biocutils.NamedList, + metrics: biocframe.BiocFrame, block: Optional[Sequence] = None ) -> numpy.ndarray: """ @@ -188,10 +190,10 @@ def filter_adt_qc_metrics( The levels should be a subset of those used in :py:func:`~suggest_adt_qc_thresholds`. Returns: - A NumPy vector of length equal to the number of cells in ``metrics``, containing truthy values for putative high-quality cells. + A boolean NumPy vector of length equal to the number of cells in ``metrics``, containing truthy values for putative high-quality cells. References: - The ``AdtQcFilters`` and ``AdtQcBlockedFilters`` functions in the `scran_qc `_ C++ library. + The ``AdtQcFilters`` and ``AdtQcBlockedFilters`` functions in the `scran_qc`_ C++ library. Examples: >>> import numpy @@ -200,7 +202,8 @@ def filter_adt_qc_metrics( >>> res = scranpy.compute_adt_qc_metrics(mat, { "IgG": [ 1, 10, 20, 40 ] }) >>> filt = scranpy.suggest_adt_qc_thresholds(res) >>> keep = scranpy.filter_adt_qc_metrics(filt, res) - >>> keep.sum() + >>> import biocutils + >>> print(biocutils.table(keep)) """ dthresh = thresholds["detected"] diff --git a/src/scranpy/aggregate_across_cells.py b/src/scranpy/aggregate_across_cells.py index 8d1578a1..27e27f78 100644 --- a/src/scranpy/aggregate_across_cells.py +++ b/src/scranpy/aggregate_across_cells.py @@ -31,7 +31,7 @@ def aggregate_across_cells( Number of threads to use for aggregation. Returns: - :py:class:`~biocutils.NamedList.NamedList` containing: + A :py:class:`~biocutils.named_list.NamedList` containing the following entries. - ``sum``: double-precision NumPy matrix where each row corresponds to a gene and each column corresponds to a unique combination of grouping levels. Each matrix entry contains the summed expression across all cells with that combination. diff --git a/src/scranpy/aggregate_across_genes.py b/src/scranpy/aggregate_across_genes.py index 4cdd4757..855dcda5 100644 --- a/src/scranpy/aggregate_across_genes.py +++ b/src/scranpy/aggregate_across_genes.py @@ -36,10 +36,10 @@ def aggregate_across_genes( The first column contains the row names/indices and the second column contains the weights. Alternatively, a dictionary may be supplied where each key is the name of a gene set and each value is a sequence/tuple as described above. - The keys will be used to name the output NamedList. + The keys will be used to name the output ``NamedList``. Alternatively, a :py:class:`~biocutils.NamedList.NamedList` where each entry is a gene set represented by a sequence/tuple as described above. - If names are available, they will be used to name the output NamedList. + If names are available, they will be used to name the output ``NamedList``. row_names: Sequence of strings of length equal to the number of rows of ``x``, containing the name of each gene. @@ -53,7 +53,7 @@ def aggregate_across_genes( Number of threads to be used for aggregation. Returns: - List of length equal to that of ``sets``. + A :py:class:`~biocutils.NamedList.NamedList` of length equal to that of ``sets``. Each entry is a numeric vector of length equal to the number of columns in ``x``, containing the (weighted) sum/mean of expression values for the corresponding set across all cells. diff --git a/src/scranpy/build_snn_graph.py b/src/scranpy/build_snn_graph.py index 84b54609..153903e1 100644 --- a/src/scranpy/build_snn_graph.py +++ b/src/scranpy/build_snn_graph.py @@ -27,7 +27,7 @@ def build_snn_graph( Alternatively, a :py:class:`~knncolle.find_knn.FindKnnResults` object containing existing neighbor search results. - Alternatively, a :py:class:`~knncolle.Index.Index` object. + Alternatively, a :py:class:`~knncolle.classes.Index` object. num_neighbors: Number of neighbors in the nearest-neighbor graph. @@ -44,7 +44,7 @@ def build_snn_graph( The algorithm to use for the nearest-neighbor search. Only used if ``x`` is not a pre-built nearest-neighbor search index or a list of existing nearest-neighbor search results. - Results: + Returns: A :py:class:`~biocutils.NamedList.NamedList` containing the components of a (possibly weighted) graph. - ``vertices``: integer specifying the number of vertices (i.e., cells) in the graph. diff --git a/src/scranpy/center_size_factors.py b/src/scranpy/center_size_factors.py index 04730c89..0278571b 100644 --- a/src/scranpy/center_size_factors.py +++ b/src/scranpy/center_size_factors.py @@ -40,7 +40,7 @@ def center_size_factors( This argument only used if ``size_factors`` is double-precision, otherwise a new array is always returned. Returns: - Array containing centered size factors. + Double-precision NumPy array containing centered size factors. If ``in_place = True``, this is a reference to ``size_factors``. References: diff --git a/src/scranpy/choose_highly_variable_genes.py b/src/scranpy/choose_highly_variable_genes.py index ef670b58..cd5366d3 100644 --- a/src/scranpy/choose_highly_variable_genes.py +++ b/src/scranpy/choose_highly_variable_genes.py @@ -37,7 +37,7 @@ def choose_highly_variable_genes( Ignored if ``None``. Returns: - Array containing the indices of genes in ``stats`` that are considered to be highly variable. + Integer NumPy array containing the indices of genes in ``stats`` that are considered to be highly variable. References: The ``choose_highly_variable_genes`` function from the `scran_variances `_ library, diff --git a/src/scranpy/cluster_graph.py b/src/scranpy/cluster_graph.py index af08bb94..62185b9f 100644 --- a/src/scranpy/cluster_graph.py +++ b/src/scranpy/cluster_graph.py @@ -45,7 +45,7 @@ def cluster_graph( Random seed to use for ``method = "multilevel"`` or ``"leiden"``. Returns: - A :py:class:`~biocutils.NamedList.NamedList` containing: + A :py:class:`~biocutils.NamedList.NamedList` containing the following entries. - ``membership``: an integer NumPy array containing the cluster assignment for each vertex, i.e., cell. All values are in [0, N) where N is the total number of clusters. @@ -61,7 +61,7 @@ def cluster_graph( - ``merges``: an integer NumPy matrix with two columns. Each row corresponds to a merge step and specifies the pair of cells or clusters that were merged at that step. - - ``modularity: a double-precision NumPy array that contains the modularity score at each merge step. + - ``modularity``: a double-precision NumPy array that contains the modularity score at each merge step. For ``method = "leiden"``, the output also contains: @@ -79,7 +79,8 @@ def cluster_graph( >>> import scranpy >>> graph = scranpy.build_snn_graph(pcs) >>> clust = scranpy.cluster_graph(graph) - >>> print(clust["membership"]) + >>> import biocutils + >>> print(biocutils.table(clust["membership"])) """ graph = (x["vertices"], x["edges"], x["weights"]) diff --git a/src/scranpy/cluster_kmeans.py b/src/scranpy/cluster_kmeans.py index 3e3d1250..b321ea7c 100644 --- a/src/scranpy/cluster_kmeans.py +++ b/src/scranpy/cluster_kmeans.py @@ -66,7 +66,7 @@ def cluster_kmeans( Number of threads to use. Returns: - A :py:class:`~biocutils.NamedList.NamedList` containing: + A :py:class:`~biocutils.NamedList.NamedList` containing the following entries. - ``clusters``: an integer NumPy array containing the cluster assignment for each cell. Values are integers in [0, N) where N is the total number of clusters. @@ -74,10 +74,11 @@ def cluster_kmeans( Dimensions are in the rows while centers are in the columns. - ``iterations``: integer specifying the number of refinement iterations that were performed. - ``status``: convergence status. - Any non-zero value indicates a convergence failure though the exact meaning depends on the choice of ``refine_method``. - - For Lloyd, a value of 2 indicates convergence failure. - - For Hartigan-Wong, a value of 2 indicates convergence failure in the optimal transfer iterations. - A value of 4 indicates convergence failure in the quick transfer iterations when ``hartigan_wong_quit_quick_transfer_failure = True``. + Any non-zero value indicates a convergence failure though the exact meaning depends on the choice of ``refine_method``. + + - For Lloyd, a value of 2 indicates convergence failure. + - For Hartigan-Wong, a value of 2 indicates convergence failure in the optimal transfer iterations. + A value of 4 indicates convergence failure in the quick transfer iterations when ``hartigan_wong_quit_quick_transfer_failure = True``. References: https://ltla.github.io/CppKmeans, which describes the various initialization and refinement algorithms in more detail. @@ -87,7 +88,8 @@ def cluster_kmeans( >>> pcs = numpy.random.rand(10, 200) >>> import scranpy >>> clust = scranpy.cluster_kmeans(pcs, k=3) - >>> print(clust["clusters"]) + >>> import biocutils + >>> print(biocutils.table(clust["clusters"])) """ out = lib.cluster_kmeans( diff --git a/src/scranpy/combine_factors.py b/src/scranpy/combine_factors.py index ff3533b2..289c0197 100644 --- a/src/scranpy/combine_factors.py +++ b/src/scranpy/combine_factors.py @@ -30,9 +30,9 @@ def combine_factors(factors: Union[dict, Sequence, biocutils.NamedList, biocfram If any entry of ``factors`` is a :py:class:`~biocutils.Factor.Factor` object, any unused levels will also be preserved. Returns: - :py:class:`~biocutils.NamedList.NamedList` containing: + :py:class:`~biocutils.NamedList.NamedList` containing the following entries. - - ``levels``: a :py:func:`~biocframe.BiocFrame.BiocFrame` containing the sorted and unique combinations of levels as a tuple. + - ``levels``: a :py:class:`~biocframe.BiocFrame.BiocFrame` containing the sorted and unique combinations of levels as a tuple. Each column corresponds to a factor in ``factors`` while each row represents a unique combination. Corresponding elements of each column define a single combination, i.e., the ``i``-th combination is defined by taking the ``i``-th element of each column. - ``index``: an integer NumPy array specifying the index into ``levels`` for each observation. @@ -49,6 +49,8 @@ def combine_factors(factors: Union[dict, Sequence, biocutils.NamedList, biocfram >>> y = random.choices([True, False], k = 20) >>> combined = scranpy.combine_factors({ "foo": x, "bar": y }) >>> print(combined["levels"]) + >>> import biocutils + >>> print(biocutils.table(combined["index"])) """ if isinstance(factors, biocframe.BiocFrame): diff --git a/src/scranpy/compute_clrm1_factors.py b/src/scranpy/compute_clrm1_factors.py index 158a3ba2..d40e539b 100644 --- a/src/scranpy/compute_clrm1_factors.py +++ b/src/scranpy/compute_clrm1_factors.py @@ -18,7 +18,7 @@ def compute_clrm1_factors(x: Any, num_threads: int = 1) -> numpy.ndarray: Number of threads to use. Returns: - Array containing the CLRm1 size factor for each cell. + Double-precision NumPy array containing the CLRm1 size factor for each cell. Note that these size factors are not centered and should be passed through, e.g., :py:func:`~scranpy.center_size_factors.center_size_factors` before normalization. References: diff --git a/src/scranpy/correct_mnn.py b/src/scranpy/correct_mnn.py index adaf5705..9ff5bf66 100644 --- a/src/scranpy/correct_mnn.py +++ b/src/scranpy/correct_mnn.py @@ -80,9 +80,9 @@ def correct_mnn( Number of threads to use. Returns: - A :py:class:`~biocutils.NamedList.NamedList` containing: + A :py:class:`~biocutils.NamedList.NamedList` containing the following entries. - - ``corrected``, a double-precision NumPy array of the same dimensions as the ``x`` used in :py:func:`~correct_mnn`, containing the corrected values. + - ``corrected``, a double-precision NumPy array of the same dimensions as ``x``, containing the corrected values. References: https://libscran.github.io/mnncorrect, which describes the MNN correction algorithm in more detail. diff --git a/src/scranpy/crispr_quality_control.py b/src/scranpy/crispr_quality_control.py index 41f5fbe1..3a954495 100644 --- a/src/scranpy/crispr_quality_control.py +++ b/src/scranpy/crispr_quality_control.py @@ -21,7 +21,7 @@ def compute_crispr_qc_metrics(x: Any, num_threads: int = 1) -> biocframe.BiocFra Returns: A :py:class:`~biocframe.BiocFrame.BiocFrame` with number of rows equal to the number of cells (i.e., columns) in ``x``. - It contains the following columns: + It contains the following columns. - ``sum``, a double-precision NumPy array containing the sum of counts across all guides for each cell. - ``detected``, an integer NumPy array containing the number of guides with non-zero counts in each cell. @@ -29,7 +29,7 @@ def compute_crispr_qc_metrics(x: Any, num_threads: int = 1) -> biocframe.BiocFra - ``max_index``, an integer NumPy array containing the row index of the guide with the maximum count in each cell. References: - The ``compute_crispr_qc_metrics`` function in the `scran_qc `_ C++ library, which describes the rationale behind these QC metrics. + The ``compute_crispr_qc_metrics`` function in the `scran_qc`_ C++ library, which describes the rationale behind these QC metrics. Examples: >>> import numpy @@ -71,11 +71,11 @@ def suggest_crispr_qc_thresholds( Number of MADs from the median to define the threshold for outliers in each QC metric. Returns: - If ``block = None``, a :py:class:`~biocutils.NamedList.NamedList` is returned, containing: + If ``block = None``, a :py:class:`~biocutils.NamedList.NamedList` is returned, containing the following entries. - ``max_value``, a number specifying the lower threshold on the maximum count in each cell. - If ``block`` is provided, the NamedList instead contains: + If ``block`` is provided, the ``NamedList`` instead contains: - ``max_value``, a FloatList of length equal to the number of blocks (and named accordingly). Each entry represents the lower threshold on the maximum count in the corresponding block. @@ -83,7 +83,7 @@ def suggest_crispr_qc_thresholds( This is in the same order as the blocks in ``detected`` and ``subset_sum``. References: - The ``compute_crispr_qc_filters`` and ``compute_crispr_qc_filters_blocked`` functions in the `scran_qc `_ C++ library, + The ``compute_crispr_qc_filters`` and ``compute_crispr_qc_filters_blocked`` functions in the `scran_qc`_ C++ library, which describes the rationale behind the suggested filters. Examples: @@ -138,16 +138,16 @@ def filter_crispr_qc_metrics( The levels should be a subset of those used in :py:func:`~suggest_crispr_qc_thresholds`. Returns: - A NumPy vector of length equal to the number of cells in ``metrics``, containing truthy values for putative high-quality cells. + A boolean NumPy vector of length equal to the number of cells in ``metrics``, containing truthy values for putative high-quality cells. References: - The ``CrisprQcFilters`` and ``CrisprQcBlockedFilters`` functions in the `scran_qc `_ C++ library. + The ``CrisprQcFilters`` and ``CrisprQcBlockedFilters`` functions in the `scran_qc`_ C++ library. Examples: >>> import numpy >>> mat = numpy.reshape(numpy.random.poisson(lam=5, size=1000), (50, 20)) >>> import scranpy - >>> res = scranpy.compute_crispr_qc_metrics(mat, { "IgG": [ 1, 10, 20, 40 ] }) + >>> res = scranpy.compute_crispr_qc_metrics(mat) >>> filt = scranpy.suggest_crispr_qc_thresholds(res) >>> keep = scranpy.filter_crispr_qc_metrics(filt, res) >>> keep.sum() diff --git a/src/scranpy/fit_variance_trend.py b/src/scranpy/fit_variance_trend.py index d97cc653..ea4b1a1e 100644 --- a/src/scranpy/fit_variance_trend.py +++ b/src/scranpy/fit_variance_trend.py @@ -39,24 +39,24 @@ def fit_variance_trend( span: Span of the LOWESS smoother. - Ignored if ``use_min_width = TRUE``. + Ignored if ``use_min_width = True``. use_min_width: Whether a minimum width constraint should be applied to the LOWESS smoother. This is useful to avoid overfitting in high-density intervals. min_width: - Minimum width of the window to use when ``use_min_width = TRUE``. + Minimum width of the window to use when ``use_min_width = True``. min_window_count: Minimum number of observations in each window. - Only used if ``use_min_width=TRUE``. + Only used if ``use_min_width = True``. num_threads: Number of threads to use. Returns: - A :py:class:`~biocutils.NamedList.NamedList` containing: + A :py:class:`~biocutils.NamedList.NamedList` containing the following entries. - ``fitted``: a double-precision NumPy array of length equal to ``mean``, containing the fitted value of the trend for each gene. - ``residual``: a double-precision NumPy array of length equal to ``mean``, containing the residual from the trend for each gene. diff --git a/src/scranpy/model_gene_variances.py b/src/scranpy/model_gene_variances.py index 873c9527..96744964 100644 --- a/src/scranpy/model_gene_variances.py +++ b/src/scranpy/model_gene_variances.py @@ -42,17 +42,17 @@ def model_gene_variances( block_weight_policy: Policy for weighting different blocks when computing the weighted mean across blocks for each statistic. - Only used if ``block`` is provided and ``block_average_policy == "mean"``. + Only used if ``block`` is provided and ``block_average_policy = "mean"``. variable_block_weight: Parameters for variable block weighting. This should be a tuple of length 2 where the first and second values are used as the lower and upper bounds, respectively, for the variable weight calculation. - Only used if ``block`` is provided, ``block_average_policy == "mean"``, and ``block_weight_policy = "variable"``. + Only used if ``block`` is provided, ``block_average_policy = "mean"``, and ``block_weight_policy = "variable"``. block_quantile: Probability for computing the quantile across blocks. Defaults to 0.5, i.e., the median of per-block statistics. - Only used if ``block`` is provided and ``block_average_policy == "quantile"``. + Only used if ``block`` is provided and ``block_average_policy = "quantile"``. mean_filter: Whether to filter on the means before trend fitting. @@ -80,7 +80,7 @@ def model_gene_variances( Number of threads to use. Returns: - A `:py:class:`~biocutils.NamedList.NamedList` containing `statistics`. + A :py:class:`~biocutils.NamedList.NamedList` containing ``statistics``. This is a :py:class:`~biocframe.BiocFrame.BiocFrame` with one row per gene and the following columns: - ``mean``: a double-precision NumPy array containing the mean (log-)expression for each gene. @@ -88,10 +88,10 @@ def model_gene_variances( - ``fitted``: a double-precision NumPy array containing the fitted value of the mean-variance trend for each gene. - ``residual``: a double-precision NumPy array containing the residual from the mean-variance trend for each gene. - If ``block`` is supplied, the NamedList will also contain: + If ``block`` is supplied, the ``NamedList`` will also contain: - ``per_block``: a :py:class:`~biocutils.NamedList.NamedList` containing the per-block statistics. - Each entry is a BiocFrame that contains the ``mean``, ``variance``, ``fitted`` and ``residual`` for each block. + Each entry is a ``BiocFrame`` that contains the ``mean``, ``variance``, ``fitted`` and ``residual`` for each block. - ``block_ids``: a list containing the identities of the blocks. This corresponds to the entries of ``per_block``. diff --git a/src/scranpy/normalize_counts.py b/src/scranpy/normalize_counts.py index c815ef5e..eff5fa9e 100644 --- a/src/scranpy/normalize_counts.py +++ b/src/scranpy/normalize_counts.py @@ -28,7 +28,7 @@ def normalize_counts( x: Matrix-like object containing cells in columns and features in rows, typically with count data. - Alternatively, a :py:class:`~mattress.InitializedMatrix.InitializedMatrix` representing a count matrix, typically created by :py:class:`~mattress.initialize.initialize`. + Alternatively, a :py:class:`~mattress.InitializedMatrix.InitializedMatrix` representing a count matrix, typically created by :py:func:`~mattress.initialize.initialize`. size_factors: Size factor for each cell. This should have length equal to the number of columns in ``x``. diff --git a/src/scranpy/rna_quality_control.py b/src/scranpy/rna_quality_control.py index 20b07434..e8d1e516 100644 --- a/src/scranpy/rna_quality_control.py +++ b/src/scranpy/rna_quality_control.py @@ -28,11 +28,13 @@ def compute_rna_qc_metrics( - A list of sequences. Each sequence corresponds to a gene subset and can contain booleans, integers or strings. + - For booleans, the sequence should be of length equal to the number of rows, and values should be truthy for rows that belong in the subset. If the sequence contains booleans, it should not contain any other type. - For integers, the value is the row index of a gene in the subset. - For strings, the value is the name of a gene in the subset. This should match at least one element in ``row_names``. + - A dictionary where keys are the names of each gene subset and the values are arrays as described above. - A :py:class:`~biocutils.NamedList.NamedList` where each element is an array as described above, possibly with names. @@ -101,26 +103,26 @@ def suggest_rna_qc_thresholds( Number of MADs from the median to define the threshold for outliers in each QC metric. Returns: - If ``block = None``, a :py:class:`~biocutils.NamedList.NamedList` is returned, containing: + If ``block = None``, a :py:class:`~biocutils.NamedList.NamedList` is returned, containing the following entries. - ``sum``, a number specifying the lower threshold on the sum of counts in each cell. - ``detected``, a number specifying the lower threshold on the number of detected genes. - ``subset_proportion``, a :py:class:`~biocutils.FloatList.FloatList` of length equal to the number of control subsets (and named accordingly). Each entry represents the upper bound on the proportion of counts in the corresponding control subset. - If ``block`` is provided, the NamedList instead contains: + If ``block`` is provided, the ``NamedList`` instead contains: - ``sum``, a FloatList of length equal to the number of blocks (and named accordingly). Each entry represents the lower threshold on the sum of counts in the corresponding block. - ``detected``, a FloatList of length equal to the number of blocks (and named accordingly). Each entry represents the lower threshold on the number of detected genes in the corresponding block. - - ``subset_proportion``, a NamedList of length equal to the number of control subsets. + - ``subset_proportion``, a ``NamedList`` of length equal to the number of control subsets. Each entry is another FloatList that contains the upper threshold on the proportion of counts for that subset in each block. - ``block_ids``, a list containing the unique levels of the blocking factor. This is in the same order as the blocks in ``detected`` and ``subset_sum``. References: - The ``compute_rna_qc_filters`` and ``compute_rna_qc_filters_blocked`` functions in the `scran_qc `_ C++ library, + The ``compute_rna_qc_filters`` and ``compute_rna_qc_filters_blocked`` functions in the `scran_qc`_ C++ library, which describes the rationale behind the suggested filters. Examples: @@ -185,10 +187,10 @@ def filter_rna_qc_metrics( The levels should be a subset of those used in :py:func:`~suggest_rna_qc_thresholds`. Returns: - A NumPy vector of length equal to the number of cells in ``metrics``, containing truthy values for putative high-quality cells. + A boolean NumPy vector of length equal to the number of cells in ``metrics``, containing truthy values for putative high-quality cells. References: - The ``RnaQcFilters`` and ``RnaQcBlockedFilters`` functions in the `scran_qc `_ C++ library. + The ``RnaQcFilters`` and ``RnaQcBlockedFilters`` functions in the `scran_qc`_ C++ library. Examples: >>> import numpy @@ -197,7 +199,7 @@ def filter_rna_qc_metrics( >>> res = scranpy.compute_rna_qc_metrics(mat, { "mito": [ 1, 10, 20, 40 ] }) >>> filt = scranpy.suggest_rna_qc_thresholds(res) >>> keep = scranpy.filter_rna_qc_metrics(filt, res) - >>> keep.sum() + >>> print(biocutils.table(keep)) """ sthresh = thresholds["sum"] diff --git a/src/scranpy/run_all_neighbor_steps.py b/src/scranpy/run_all_neighbor_steps.py index e7375b66..a8b907a9 100644 --- a/src/scranpy/run_all_neighbor_steps.py +++ b/src/scranpy/run_all_neighbor_steps.py @@ -72,7 +72,7 @@ def run_all_neighbor_steps( This overrides the specified number of threads in the various ``*_options`` arguments. Returns: - A :py:class:`~biocutils.NamedList.NamedList` containing the results of each step: + A :py:class:`~biocutils.NamedList.NamedList` containing one entry for each step. - ``run_tsne``: results of :py:func:`~scranpy.run_tsne.run_tsne`. Omitted if t-SNE was not performed. @@ -92,7 +92,8 @@ def run_all_neighbor_steps( >>> output = scranpy.run_all_neighbor_steps(pcs) >>> print(output["run_tsne"][:5,:]) >>> print(output["run_umap"][:5,:]) - >>> print(output["cluster_graph"]["membership"]) + >>> import biocutils + >>> print(biocutils.table(output["cluster_graph"]["membership"])) """ if isinstance(x, knncolle.Index): diff --git a/src/scranpy/run_pca.py b/src/scranpy/run_pca.py index adb4305c..53588217 100644 --- a/src/scranpy/run_pca.py +++ b/src/scranpy/run_pca.py @@ -71,7 +71,7 @@ def run_pca( Number of threads to use. Returns: - A :py:class:`~biocutils.NamedList.NamedList` containing: + A :py:class:`~biocutils.NamedList.NamedList` containing the following entries. - ``components``: a double-precision NumPy matrix of principal component (PC) scores. Rows are dimensions (i.e., PCs) and columns are cells. diff --git a/src/scranpy/run_tsne.py b/src/scranpy/run_tsne.py index 8b16ff55..da42176f 100644 --- a/src/scranpy/run_tsne.py +++ b/src/scranpy/run_tsne.py @@ -100,7 +100,7 @@ def run_tsne( Only used if ``x`` is not a pre-built nearest-neighbor search index or a list of existing nearest-neighbor search results. Returns: - Array containing the coordinates of each cell in a 2-dimensional embedding. + Double-precision NumPy matrix containing the coordinates of each cell in a 2-dimensional embedding. Each row corresponds to a cell and each column corresponds to a dimension. References: diff --git a/src/scranpy/run_umap.py b/src/scranpy/run_umap.py index 3550e610..e8c3617a 100644 --- a/src/scranpy/run_umap.py +++ b/src/scranpy/run_umap.py @@ -151,7 +151,7 @@ def run_umap( Only used if ``x`` is not a pre-built nearest-neighbor search index or a list of existing nearest-neighbor search results. Returns: - Array containing the coordinates of each cell in a 2-dimensional embedding. + Double-precision NumPy matrix containing the coordinates of each cell in a 2-dimensional embedding. Each row corresponds to a cell and each column corresponds to a dimension. References: diff --git a/src/scranpy/sanitize_size_factors.py b/src/scranpy/sanitize_size_factors.py index dba65358..5b336706 100644 --- a/src/scranpy/sanitize_size_factors.py +++ b/src/scranpy/sanitize_size_factors.py @@ -18,25 +18,29 @@ def sanitize_size_factors( Floating-point array containing size factors for all cells. replace_zero: - Whether to replace size factors of zero with the lowest positive factor. If ``False``, zeros are retained. + Whether to replace size factors of zero with the lowest positive factor. + If ``False``, zeros are retained. replace_negative: - Whether to replace negative size factors with the lowest positive factor. If ``False``, negative values are retained. + Whether to replace negative size factors with the lowest positive factor. + If ``False``, negative values are retained. replace_infinite: - Whether to replace infinite size factors with the largest positive factor. If ``False``, infinite values are retained. + Whether to replace infinite size factors with the largest positive factor. + If ``False``, infinite values are retained. replace_nan: - Whether to replace NaN size factors with unity. If False, NaN values are retained. + Whether to replace NaN size factors with unity. + If ``False``, NaN values are retained. in_place: - Whether to modify ``size_factors`` in place. If False, a new array - is returned. This argument only used if ``size_factors`` is - double-precision, otherwise a new array is always returned. + Whether to modify ``size_factors`` in place. + If ``False``, a new array is returned. + This argument only used if ``size_factors`` is double-precision, otherwise a new array is always returned. Returns: - Array containing sanitized size factors. If ``in_place = True``, this - is a reference to ``size_factors``. + Double-precision NumPy array containing sanitized size factors. + If ``in_place = True``, this is a reference to ``size_factors``. References: The ``sanitize_size_factors`` function in the `scran_norm `_ C++ library, which provides the underlying implementation. diff --git a/src/scranpy/scale_by_neighbors.py b/src/scranpy/scale_by_neighbors.py index aa224b0a..9873f6cd 100644 --- a/src/scranpy/scale_by_neighbors.py +++ b/src/scranpy/scale_by_neighbors.py @@ -42,7 +42,7 @@ def scale_by_neighbors( The default of ``None`` is equivalent to an all-1 vector, i.e., all modalities are scaled to have the same within-population variance. Returns: - A :py:class:`~biocutils.NamedList.NamedList` containing: + A :py:class:`~biocutils.NamedList.NamedList` containing the following entries. - ``scaling``: a double-precision NumPy array containing the scaling factor to be applied to each embedding in ``x``. - ``combined``: a double-precision NumPy matrix of scaled and concatenated embeddings. diff --git a/src/scranpy/score_gene_set.py b/src/scranpy/score_gene_set.py index 47bc6da7..05ea02d5 100644 --- a/src/scranpy/score_gene_set.py +++ b/src/scranpy/score_gene_set.py @@ -70,7 +70,7 @@ def score_gene_set( Number of threads to use. Returns: - A :py:class:`~biocutils.NamedList.NamedList` containing: + A :py:class:`~biocutils.NamedList.NamedList` containing the following entries. - ``scores``: a double-precision NumPy array containing the gene set score for each cell. - ``weights``: a double-precision NumPy array containing containing the weight of each gene in ``set``. diff --git a/src/scranpy/score_markers.py b/src/scranpy/score_markers.py index c58f8e2c..62556197 100644 --- a/src/scranpy/score_markers.py +++ b/src/scranpy/score_markers.py @@ -146,9 +146,9 @@ def score_markers( Each row is a gene and each column is a group, ordered as in ``group_ids``. Omitted if ``compute_group_means = False``. - If ``all_pairwise = False``, the NamedList contains the following additional entries: + If ``all_pairwise = False``, the ``NamedList`` contains the following additional entries: - - ``cohens_d``: a NamedList with the same structure as returned by :py:func:`~scranpy.summarize_effects.summarized_effects`. + - ``cohens_d``: a ``NamedList`` with the same structure as returned by :py:func:`~scranpy.summarize_effects.summarized_effects`. Briefly, each entry corresponds to a group in ``group_ids`` and is a :py:class:`~biocframe.BiocFrame.BiocFrame` with one row per gene. Each column contains a summary statistic of the Cohen's d from pairwise comparisons to all other groups, e.g., min, mean, median, max, min-rank, and any requested quantiles. Columns are omitted if the relevant ``compute_summary_*`` option is set to ``False``. @@ -156,7 +156,7 @@ def score_markers( - ``delta_mean``: Same as ``cohens_d`` but for the delta-mean. - ``delta_detected``: Same as ``cohens_d`` but for the delta-detected. - If ``all_pairwise = True``, the NamedList contains the following addditional entries: + If ``all_pairwise = True``, the ``NamedList`` contains the following addditional entries: - ``cohens_d``: a 3-dimensional double-precision NumPy array containing the Cohen's d from each pairwise comparison between groups. The extents of the first two dimensions are equal to the number of groups, while the extent of the final dimension is equal to the number of genes. @@ -165,11 +165,11 @@ def score_markers( - ``delta_mean``: Same as ``cohens_d`` but for the delta-mean. - ``delta_detected``: Same as ``cohens_d`` but for the delta-detected. - If ``all_pairwise`` is an integer, the NamedList contains the following additional entries: + If ``all_pairwise`` is an integer, the ``NamedList`` contains the following additional entries: - - ``cohens_d``: a NamedList list of named lists of :py:class:`~biocframe.BiocFrame.BiocFrame` objects. - The BiocFrame at ``cohens_d[m][n]`` contains the top markers for the comparison of group ``m`` over group ``n``. - Each BiocFrame has the ``index`` and ``effect`` columns, containing the row indices and effect sizes of the top genes, respectively. + - ``cohens_d``: a ``NamedList`` list of named lists of :py:class:`~biocframe.BiocFrame.BiocFrame` objects. + The ``BiocFrame`` at ``cohens_d[m][n]`` contains the top markers for the comparison of group ``m`` over group ``n``. + Each ``BiocFrame`` has the ``index`` and ``effect`` columns, containing the row indices and effect sizes of the top genes, respectively. - ``auc``: Same as ``cohens_d`` but for the AUCs. - ``delta_mean``: Same as ``cohens_d`` but for the delta-mean. - ``delta_detected``: Same as ``cohens_d`` but for the delta-detected. diff --git a/src/scranpy/se_aggregate_across_cells.py b/src/scranpy/se_aggregate_across_cells.py index 0adca95c..ccf43e67 100644 --- a/src/scranpy/se_aggregate_across_cells.py +++ b/src/scranpy/se_aggregate_across_cells.py @@ -23,7 +23,7 @@ def aggregate_across_cells_se( ) -> summarizedexperiment.SummarizedExperiment: """ Aggregate expression values across groups of cells for each gene, storing the result in a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. - This calls :py:func:`~aggregate_across_cells` along with :py:func:`~aggregate_column_data`. + This calls :py:func:`~scranpy.aggregate_across_cells.aggregate_across_cells` along with :py:func:`~aggregate_column_data`. Args: x: @@ -31,13 +31,13 @@ def aggregate_across_cells_se( Rows correspond to genes and columns correspond to cells. factors: - One or more grouping factors, see the argument of the same name in :py:func:`~aggregate_across_cells`. + One or more grouping factors, see the argument of the same name in :py:func:`~scranpy.aggregate_across_cells.aggregate_across_cells`. num_threads: - Passed to :py:func:`~aggregate_across_cells`. + Passed to :py:func:`~scranpy.aggregate_across_cells.aggregate_across_cells`. more_aggr_args: - Further arguments to pass to :py:func:`~aggregate_across_cells`. + Further arguments to pass to :py:func:`~scranpy.aggregate_across_cells.aggregate_across_cells`. assay_type: Name or index of the assay of ``x`` to be aggregated. @@ -79,7 +79,7 @@ def aggregate_across_cells_se( copy_altexps: Whether to copy the column data and metadata of the output ``SingleCellExperiment`` into each of its alternative experiments. - Only relevant if ``x`` is a :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment` or one of its subclasses. + Only relevant if ``x`` is a ``SingleCellExperiment`` or one of its subclasses. Returns: A :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment` where each column corresponds to a factor combination. @@ -181,7 +181,7 @@ def aggregate_column_data(coldata: biocframe.BiocFrame, index: Sequence, number: Args: coldata: - A :py:class:`~biocframe.BiocFrame.BiocFrame` containing the column data for a SummarizedExperiment. + A :py:class:`~biocframe.BiocFrame.BiocFrame` containing the column data for a ``SummarizedExperiment``. Each row should correspond to a cell. index: @@ -194,17 +194,27 @@ def aggregate_column_data(coldata: biocframe.BiocFrame, index: Sequence, number: All elements of ``index`` should be less than ``number``. only_simple: - Whether to skip columns of ``coldata`` that are not lists, NumPy arrays, :py:class:`~biocutils.NamedList.NamedList`s or :py:class:`~biocutils.Factor.Factor`s. + Whether to skip a column of ``coldata`` that is not a list, NumPy array, :py:class:`~biocutils.NamedList.NamedList` or :py:class:`~biocutils.Factor.Factor`. placeholder: Placeholder value to store in the output column when a factor combination does not have a single unique value. Returns: A :py:class:`~biocframe.BiocFrame.BiocFrame` with number of rows equal to ``number``. - Each "simple" column in ``coldata`` (i.e., list, NumPy array, NamedList or Factor) is represented by a column in the output BiocFrame. + Each "simple" column in ``coldata`` (i.e., list, NumPy array, ``NamedList`` or ``Factor``) is represented by a column in the output ``BiocFrame``. In each column, the ``j``-th entry is equal to the unique value of all rows where ``index == j``, or ``placeholder`` if there is not exactly one unique value. - If ``only_simple = False``, any non-simple columns of ``coldata`` are represented in the output BiocFrame by a list of ``placeholder``values. + If ``only_simple = False``, any non-simple columns of ``coldata`` are represented in the output ``BiocFrame`` by a list of ``placeholder`` values. Otherwise, if ``only_simple = True``, any non-simple columns of ``coldata`` are skipped. + + Examples: + >>> import biocframe + >>> df = biocframe.BiocFrame({ + >>> "X": ["a", "a", "b", "b", "c", "c"], + >>> "Y": [ 1, 1, 1, 2, 2, 2], + >>> "Z": [True, False, True, False, True, False] + >>> }) + >>> import scranpy + >>> print(scranpy.aggregate_column_data(df, [0, 0, 1, 1, 2, 2], 3)) """ collected = biocframe.BiocFrame(number_of_rows=number) diff --git a/src/scranpy/se_aggregate_across_genes.py b/src/scranpy/se_aggregate_across_genes.py index 84f15355..da0e1650 100644 --- a/src/scranpy/se_aggregate_across_genes.py +++ b/src/scranpy/se_aggregate_across_genes.py @@ -17,7 +17,7 @@ def aggregate_across_genes_se( ) -> summarizedexperiment.SummarizedExperiment: """ Aggregate expression values across sets of genes for each cell. - This calls :py:func:`~aggregate_across_cells` on an assay from a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. + This calls :py:func:`~scranpy.aggregate_across_genes.aggregate_across_genes` on an assay from a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. Args: x: @@ -28,10 +28,10 @@ def aggregate_across_genes_se( Sequence of gene sets, see :py:func:`~scranpy.aggregate_across_genes.aggregate_across_genes` for details. num_threads: - Passed to :py:func:`~aggregate_across_cells`. + Passed to :py:func:`~scranpy.aggregate_across_genes.aggregate_across_genes`. more_aggr_args: - Further arguments to pass to :py:func:`~aggregate_across_cells`. + Further arguments to pass to :py:func:`~scranpy.aggregate_across_genes.aggregate_across_genes`. assay_type: Name or index of the assay of ``x`` to be aggregated across genes. diff --git a/src/scranpy/se_analyze.py b/src/scranpy/se_analyze.py index 81dafb40..558776a6 100644 --- a/src/scranpy/se_analyze.py +++ b/src/scranpy/se_analyze.py @@ -93,7 +93,7 @@ def analyze_se( and/or :py:func:`~scranpy.se_normalize_crispr_counts.normalize_crispr_counts_se`, for normalization. - Running :py:func:`~scranpy.se_choose_rna_hvgs.choose_rna_hvgs_se` to identify highly variable genes. - - Running :py:func:`~scranpy.se_rna_pca.run_pca_se` on the RNA and/or ADT data. + - Running :py:func:`~scranpy.se_run_pca.run_pca_se` on the RNA and/or ADT data. - Running :py:func:`~scranpy.se_scale_by_neighbors.scale_by_neighbors_se` if multiple modalities are present. - Running :py:func:`~scranpy.se_correct_mnn.correct_mnn_se` if multiple batches are present. - Running :py:func:`~scranpy.se_run_all_neighbor_steps.run_all_neighbor_steps_se` to obtain t-SNE and UMAP coordinates, and to perform graph-based clustering. @@ -137,7 +137,7 @@ def analyze_se( Alternatively ``None``, if all cells are from the same block. block_name: - Name of the column in which to store the blocking factor in the column data of the output SummarizedExperiment. + Name of the column in which to store the blocking factor in the column data of the output ``SummarizedExperiment``. Only used if ``block`` is not ``None``. If ``None``, the blocking factor is not stored in the output. @@ -307,8 +307,8 @@ def analyze_se( - ``x``: a :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment` that is a copy of the input ``x``. It is also decorated with the results of each analysis step. - - ``markers``, a list of list of :py:class:`~biocframe.BiocFrame.BiocFrame`s containing the marker statistics for each modality. - Each inner list corresponds to a modality (RNA, ADT, etc.) while each BiocFrame corresponds to a group. + - ``markers``: a list of list of :py:class:`~biocframe.BiocFrame.BiocFrame` objects containing the marker statistics for each modality. + Each inner list corresponds to a modality (RNA, ADT, etc.) while each ``BiocFrame`` corresponds to a group. Examples: >>> import scranpy @@ -321,6 +321,8 @@ def analyze_se( >>> res["x"].get_assay_names() >>> res["x"].get_reduced_dimension_names() >>> print(res["x"].get_column_data()) + >>> import biocutils + >>> print(biocutils.table(res["x"].get_column_data()["graph_cluster"])) >>> print(scranpy.preview_markers(res["markers"]["rna"][0])) """ diff --git a/src/scranpy/se_choose_rna_hvgs.py b/src/scranpy/se_choose_rna_hvgs.py index add71541..e6b2d9d9 100644 --- a/src/scranpy/se_choose_rna_hvgs.py +++ b/src/scranpy/se_choose_rna_hvgs.py @@ -62,7 +62,8 @@ def choose_rna_hvgs_se( >>> import scranpy >>> sce = scranpy.get_test_rna_data_se("norm") >>> sce = scranpy.choose_rna_hvgs_se(sce, more_var_args={ "use_min_width": True }) - >>> sce.get_row_data()["hvg"].sum() + >>> import biocutils + >>> print(biocutils.table(sce.get_row_data()["hvg"])) """ info = model_gene_variances( diff --git a/src/scranpy/se_cluster_graph.py b/src/scranpy/se_cluster_graph.py index ca3fc04f..6096976a 100644 --- a/src/scranpy/se_cluster_graph.py +++ b/src/scranpy/se_cluster_graph.py @@ -22,7 +22,7 @@ def cluster_graph_se( """ Construct a shared-nearest neighbor (SNN) graph from an existing low-dimensional embedding, and apply community detection algorithms to obtain clusters of cells. - This calls :py:func:`~scranpy.build_snn_graph.build_snn_graph` followed by :py:func:`~cluster_graph`. + This calls :py:func:`~scranpy.build_snn_graph.build_snn_graph` followed by :py:func:`~scranpy.cluster_graph.cluster_graph`. Args: x: @@ -39,17 +39,17 @@ def cluster_graph_se( Additional arguments to be passed to :py:func:`~scranpy.build_snn_graph.build_snn_graph`. method: - Community detection method to be used by :py:func:`~cluster_graph`. + Community detection method to be used by :py:func:`~scranpy.cluster_graph.cluster_graph`. resolution: - Clustering resolution to be used by :py:func:`~cluster_graph`. + Clustering resolution to be used by :py:func:`~scranpy.cluster_graph.cluster_graph`. This is either passed as ``multilevel_resolution`` or ``leiden_resolution``, depending on ``method``. more_cluster_args: - Additional arguments to be passed to :py:func:`~cluster_graph`. + Additional arguments to be passed to :py:func:`~scranpy.cluster_graph.cluster_graph`. reddim_type: - Name or index of the existing embedding of ``x.reduced_dimensions()`` to be used for clustering. + Name or index of the existing reduced dimension of ``x`` to be used for clustering. Alternatively a tuple, where the first element contains the name of an alternative experiment of ``x``, and the second element contains the name/index of an embedding in that alternative experiment. @@ -72,13 +72,8 @@ def cluster_graph_se( >>> import scranpy >>> sce = scranpy.get_test_rna_data_se("pca") >>> sce = scranpy.cluster_graph_se(sce) - >>> - >>> # Looking at the distribution of cells among the clusters: - >>> import numpy - >>> clust = sce.get_column_data()["clusters"] - >>> clustids, counts = numpy.unique(clust, return_counts=True) - >>> import biocframe - >>> print(biocframe.BiocFrame({ "cluster": clustids, "counts": counts })) + >>> import biocutils + >>> print(biocutils.table(sce.get_column_data()["clusters"])) """ graph_out = build_snn_graph( diff --git a/src/scranpy/se_cluster_kmeans.py b/src/scranpy/se_cluster_kmeans.py index e57b95d5..ebc7a827 100644 --- a/src/scranpy/se_cluster_kmeans.py +++ b/src/scranpy/se_cluster_kmeans.py @@ -53,13 +53,8 @@ def cluster_kmeans_se( >>> import scranpy >>> sce = scranpy.get_test_rna_data_se("pca") >>> sce = scranpy.cluster_kmeans_se(sce, k=10) - >>> - >>> # Looking at the distribution of cells among the clusters: - >>> import numpy - >>> clust = sce.get_column_data()["clusters"] - >>> clustids, counts = numpy.unique(clust, return_counts=True) - >>> import biocframe - >>> print(biocframe.BiocFrame({ "cluster": clustids, "counts": counts })) + >>> import biocutils + >>> print(biocutils.table(sce.get_column_data()["clusters"])) """ clout = cluster_kmeans( diff --git a/src/scranpy/se_get_test_data.py b/src/scranpy/se_get_test_data.py index db319627..43d8b265 100644 --- a/src/scranpy/se_get_test_data.py +++ b/src/scranpy/se_get_test_data.py @@ -169,7 +169,7 @@ def get_test_adt_data_se(at: Literal["start", "qc", "norm", "hvg", "pca"] = "sta def get_test_crispr_data_se(at: Literal["start", "qc"] = "start") -> singlecellexperiment.SingleCellExperiment: """ Get a Perturb-seq dataset with varying levels of processing. - This contains a pancreatic beta cell line obtained with ``scrnaseq.fetch_dataset("cao-pancreas-2025", "2025-10-10", "rqc")}. + This contains a pancreatic beta cell line obtained with ``scrnaseq.fetch_dataset("cao-pancreas-2025", "2025-10-10", "rqc")``. The main experiment contains RNA counts and the alternative experiment contains CRISPR guide counts. Args: diff --git a/src/scranpy/se_quick_adt_qc.py b/src/scranpy/se_quick_adt_qc.py index cf8230de..2da4805a 100644 --- a/src/scranpy/se_quick_adt_qc.py +++ b/src/scranpy/se_quick_adt_qc.py @@ -71,7 +71,8 @@ def quick_adt_qc_se( >>> sce = scranpy.quick_adt_qc_se(sce, subsets={ "igg": is_igg }) >>> print(sce.get_column_data()[:,["sum", "detected", "subset_sum_igg"]]) >>> print(sce.get_metadata()["qc"]["thresholds"]) - >>> sce.get_column_data()["keep"].sum() + >>> import biocutils + >>> print(biocutils.table(sce.get_column_data()["keep"])) """ metrics = compute_adt_qc_metrics(x.get_assay(assay_type), subsets, row_names=x.get_row_names(), num_threads=num_threads) @@ -102,10 +103,11 @@ def format_compute_adt_qc_metrics_result(df: biocframe.BiocFrame, flatten: bool Result of :py:func:`~scranpy.adt_quality_control.compute_adt_qc_metrics`. flatten: - Whether to flatten the nested BiocFrame of subset proportions. + Whether to flatten the nested ``BiocFrame`` of subset sums. Returns: - A BiocFrame containing per-cell QC statistics. + A ``BiocFrame`` containing per-cell QC statistics. + If ``flatten = True``, the subset sums are stored as top-level columns with name ``subset_sum_`` where ```` is the name of the subset. Examples: >>> import scranpy diff --git a/src/scranpy/se_quick_crispr_qc.py b/src/scranpy/se_quick_crispr_qc.py index 6681fc07..207cec3a 100644 --- a/src/scranpy/se_quick_crispr_qc.py +++ b/src/scranpy/se_quick_crispr_qc.py @@ -94,7 +94,7 @@ def format_compute_crispr_qc_metrics_result(df: biocframe.BiocFrame) -> biocfram Result of :py:func:`~scranpy.crispr_quality_control.compute_crispr_qc_metrics`. Returns: - A BiocFrame containing per-cell QC statistics. + A ``BiocFrame`` containing per-cell QC statistics. Examples: >>> import scranpy diff --git a/src/scranpy/se_quick_rna_qc.py b/src/scranpy/se_quick_rna_qc.py index 6e390f42..32931f14 100644 --- a/src/scranpy/se_quick_rna_qc.py +++ b/src/scranpy/se_quick_rna_qc.py @@ -44,7 +44,7 @@ def compute_rna_qc_metrics_with_altexps( each key is a string specifying the name of an alternative experiment for which to compute QC metrics, while each value is an integer/string specifying the index/name of the assay to use from that experiment. - This option is only relevant if ``x`` is a `~singlecellexperiment.SingleCellExperiment.SingleCellExperiment`. + This option is only relevant if ``x`` is a :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment`. num_threads: Number of threads, passed to :py:func:`~scranpy.rna_quality_control.compute_rna_qc_metrics`. @@ -56,10 +56,10 @@ def compute_rna_qc_metrics_with_altexps( A :py:class:`~biocutils.NamedList.NamedList` containing: - ``main``: a :py:class:`~biocframe.BiocFrame.BiocFrame` containing QC statistics for the main experiment, - see :py:class:`~scranpy.rna_quality_control.compute_rna_qc_metrics` for details. + see :py:func:`~scranpy.rna_quality_control.compute_rna_qc_metrics` for details. The proportion of counts for each alternative experiment in ``altexp_proportions`` is stored in the ``subset_proportions`` column. - - ``altexps``: a NamedList with one entry per alternative experiment listed in ``altexp_proportions``. - Each entry is named after its corresponding alternative experiment and is a BiocFrame of QC statistics for that experiment. + - ``altexps``: a ``NamedList`` with one entry per alternative experiment listed in ``altexp_proportions``. + Each entry is named after its corresponding alternative experiment and is a ``BiocFrame`` of QC statistics for that experiment. Examples: >>> import scranpy @@ -157,7 +157,8 @@ def quick_rna_qc_se( >>> sce = scranpy.quick_rna_qc_se(sce, subsets={ "mito": is_mito }) >>> print(sce.get_column_data()[:,["sum", "detected", "subset_proportion_mito"]]) >>> print(sce.get_metadata()["qc"]["thresholds"]) - >>> sce.get_column_data()["keep"].sum() + >>> import biocutils + >>> print(biocutils.table(sce.get_column_data()["keep"])) >>> >>> # Computing spike-in proportions, if available. >>> sce = scranpy.get_test_rna_data_se() @@ -205,10 +206,11 @@ def format_compute_rna_qc_metrics_result(df: biocframe.BiocFrame, flatten: bool Result of :py:func:`~scranpy.rna_quality_control.compute_rna_qc_metrics`. flatten: - Whether to flatten the nested BiocFrame of subset proportions. + Whether to flatten the nested ``BiocFrame`` of subset proportions. Returns: - A BiocFrame containing per-cell QC statistics. + A ``BiocFrame`` containing per-cell QC statistics. + If ``flatten = True``, the subset proportions are stored as top-level columns with name ``subset_proportion_`` where ```` is the name of the subset. Examples: >>> import scranpy diff --git a/src/scranpy/se_run_all_neighbor_steps.py b/src/scranpy/se_run_all_neighbor_steps.py index 55122b1c..f19b305d 100644 --- a/src/scranpy/se_run_all_neighbor_steps.py +++ b/src/scranpy/se_run_all_neighbor_steps.py @@ -43,7 +43,7 @@ def run_all_neighbor_steps_se( If ``None``, the UMAP is not computed. more_umap_args: - Additional arguments for UMAP, to pass to :py:func:`~scranpy.run_all_neighbor_steps.run_all_neighbor_steps` as `run_umap_options`. + Additional arguments for UMAP, to pass to :py:func:`~scranpy.run_all_neighbor_steps.run_all_neighbor_steps` as ``run_umap_options``. If ``None``, the UMAP is not computed. tsne_output_name: @@ -51,7 +51,7 @@ def run_all_neighbor_steps_se( If ``None``, the t-SNE is not computed. more_tsne_args: - Additional arguments for t-SNE, to pass to :py:func:`~scranpy.run_all_neighbor_steps.run_all_neighbor_steps` as `run_tsne_options`. + Additional arguments for t-SNE, to pass to :py:func:`~scranpy.run_all_neighbor_steps.run_all_neighbor_steps` as ``run_tsne_options``. If ``None``, the t-SNE is not computed. build_graph_name: @@ -59,7 +59,7 @@ def run_all_neighbor_steps_se( If ``None``, the graph is not stored. more_build_graph_args: - Additional arguments for graph construction, to pass to :py:func:`~scranpy.run_all_neighbor_steps.run_all_neighbor_steps` as `more_build_graph_args`. + Additional arguments for graph construction, to pass to :py:func:`~scranpy.run_all_neighbor_steps.run_all_neighbor_steps` as ``more_build_graph_args``. cluster_output_name: Name of the column of the column data in which to store the cluster assignments. @@ -70,7 +70,7 @@ def run_all_neighbor_steps_se( If ``None``, additional outputs are not stored. more_cluster_graph_args: - Additional arguments for graph-based clustering, to pass to :py:func:`~scranpy.run_all_neighbor_steps.run_all_neighbor_steps` as `more_cluster_graph_args`. + Additional arguments for graph-based clustering, to pass to :py:func:`~scranpy.run_all_neighbor_steps.run_all_neighbor_steps` as ``more_cluster_graph_args``. If ``None``, the graph-based clustering is not performed. nn_parameters: @@ -100,7 +100,8 @@ def run_all_neighbor_steps_se( >>> more_umap_args={ "num_epochs": 50 } >>> ) >>> sce.get_reduced_dimension_names() - >>> sce.get_column_data()["clusters"] + >>> import biocutils + >>> print(biocutils.table(sce.get_column_data()["clusters"])) """ if umap_output_name is None: diff --git a/src/scranpy/se_score_gene_set.py b/src/scranpy/se_score_gene_set.py index 93fc7674..acf70bc4 100644 --- a/src/scranpy/se_score_gene_set.py +++ b/src/scranpy/se_score_gene_set.py @@ -38,7 +38,8 @@ def score_gene_set_se( Name or index of the assay of ``x`` from which to compute the gene set scores. Returns: - List containing per-cell scores and per-gene weights. + A :py:class:`~biocutils.NamedList.NamedList` containing per-cell scores and per-gene weights, + see :py:func:`~scranpy.score_gene_set.score_gene_set` for details. Examples: >>> import scranpy diff --git a/src/scranpy/se_score_markers.py b/src/scranpy/se_score_markers.py index 7c896880..473c126b 100644 --- a/src/scranpy/se_score_markers.py +++ b/src/scranpy/se_score_markers.py @@ -20,7 +20,7 @@ def score_markers_se( """ Identify candidate marker genes based on effect sizes from pairwise comparisons between groups of cells. This calls :py:func:`~scranpy.score_markers.score_markers` on an assay of a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`, - and then uses :py:func:`~format_score_markers_result` to reforamt the results. + and then uses :py:func:`~format_score_markers_result` to reformat the results. Args: x: @@ -48,19 +48,19 @@ def score_markers_se( A single string is treated as a list of length 1. order_by: - Name of the column to use for ordering the rows of the output :py:class:`~biocframe.BiocFrame.BiocFrame`s. + Name of the column to use for ordering the rows of each output :py:class:`~biocframe.BiocFrame.BiocFrame`. Alternatively ``True``, in which case a column is automatically chosen from the effect size summaries. If ``None`` or ``False``, no ordering is performed. Returns: A :py:class:`~biocutils.NamedList.NamedList` of :py:class:`~biocframe.BiocFrame.BiocFrame`s. - Each BiocFrame corresponds to a unique group in ``groups``. + Each ``BiocFrame`` corresponds to a unique group in ``groups``. Each row contains statistics for a gene in ``x``, with the following columns: - ``mean``, the mean expression in the current group. - ``detected``, the proportion of cells with detected expression in the current group. - - ``_``, a summary statistic for an effect size, - ``cohens_d_mean`` contains the mean Cohen's d across comparisons involving the current group. + - ``_``, a summary statistic for an effect size. + For example, ``cohens_d_mean`` contains the mean Cohen's d across comparisons involving the current group. Examples: >>> import scranpy @@ -102,7 +102,7 @@ def _find_order_by(df: biocframe.BiocFrame, order_by: Optional[Union[str, bool]] return order_by -def order(x, decreasing): +def _order(x, decreasing): if decreasing: return numpy.argsort(-x) else: @@ -116,28 +116,29 @@ def format_score_markers_result( row_names: Optional[Sequence] = None ) -> biocutils.NamedList: """ - Format the output of :py:func:`~scranpy.score_markers.score_markers` to a list of per-group :py:class:`~biocframe.BiocFrame.BiocFrame`s. + Reformat the output of :py:func:`~scranpy.score_markers.score_markers` into a list of per-group :py:class:`~biocframe.BiocFrame.BiocFrame` objects. Args: res: Results of :py:func:`~scranpy.score_markers.score_markers`. extra_columns: - A :py:class:`~biocframe.BiocFrame.BiocFrame` with the same number of rows as ``x``, containing extra columns to add each DataFrame. + A :py:class:`~biocframe.BiocFrame.BiocFrame` with the same number of rows as ``x``, containing extra columns to add each ``BiocFrame``. order_by: - Name of the column to use for ordering the rows of the output :py:class:`~biocframe.BiocFrame.BiocFrame`s. + Name of the column to use for ordering the rows of each output :py:class:`~biocframe.BiocFrame.BiocFrame`. Alternatively ``True``, in which case a column is automatically chosen from the effect size summaries. If ``None`` or ``False``, no ordering is performed. row_names: - Sequence of strings containing the row names to add to each BiocFrame. + Sequence of strings containing the row names to add to each ``BiocFrame``. This should correspond to the gene names corresponding to the rows of ``x`` used in :py:func:`~scranpy.score_markers.score_markers`. Returns: - A :py:class:`~biocutils.NamedList.NamedList` of :py:class:`~biocframe.BiocFrame.BiocFrame`s. - Each BiocFrame corresponds to a unique group in ``groups``. - Each row contains statistics for a gene in ``x``, with the following columns: + A :py:class:`~biocutils.NamedList.NamedList` of :py:class:`~biocframe.BiocFrame.BiocFrame` objects. + Each ``BiocFrame`` corresponds to a unique group in ``groups``. + Each row of each ``BiocFrame`` contains statistics for a gene in ``x``. + Each ``BiocFrame`` contains the following columns: - ``mean``, the mean expression in the current group. - ``detected``, the proportion of cells with detected expression in the current group. @@ -182,7 +183,7 @@ def format_score_markers_result( has_order_by = True if order_by is not None: dec = not order_by.endswith("_min_rank") - ordering = order(current[order_by], decreasing=dec) + ordering = _order(current[order_by], decreasing=dec) current = current[ordering,:] output[group] = current @@ -208,7 +209,7 @@ def preview_markers( Names of columns of ``df`` to retain in the preview. Alternatively, each entry may be a tuple of two strings. - The first string is the name of the column in the output BiocFrame, and the second string is the name of the column of ``df`` to retain. + The first string is the name of the column in the output ``BiocFrame``, and the second string is the name of the column of ``df`` to retain. rows: Number of rows to show. @@ -220,9 +221,9 @@ def preview_markers( If ``None`` or ``False``, no ordering is performed. include_order_by: - Whether to include the column named by ``order_by`` in the output BiocFrame. + Whether to include the column named by ``order_by`` in the output ``BiocFrame``. This may also be a string, which is treated as ``True``; - the value is used as the name of the column in the output BiocFrame. + the value is used as the name of the column in the output ``BiocFrame``. Returns: A :py:class:`~biocframe.BiocFrame.BiocFrame` containing important columns for the top markers. @@ -250,7 +251,7 @@ def preview_markers( if order_by is not None: dec = not order_by.endswith("_min_rank") - ordering = order(df[order_by], decreasing=dec) + ordering = _order(df[order_by], decreasing=dec) if rows is not None and rows < len(ordering): ordering = ordering[:rows] new_df = new_df[ordering,:] diff --git a/src/scranpy/subsample_by_neighbors.py b/src/scranpy/subsample_by_neighbors.py index 1dc5df3c..cfb7be8b 100644 --- a/src/scranpy/subsample_by_neighbors.py +++ b/src/scranpy/subsample_by_neighbors.py @@ -20,7 +20,7 @@ def subsample_by_neighbors( x: Numeric matrix where rows are dimensions and columns are cells, typically containing a low-dimensional representation from, e.g., :py:func:`~scranpy.run_pca.run_pca`. - Alternatively, a :py:class:`~knncolle.Index.Index` object containing a pre-built search index for a dataset. + Alternatively, a :py:class:`~knncolle.classes.Index` object containing a pre-built search index for a dataset. Alternatively, a :py:class:`~knncolle.find_knn.FindKnnResults` object containing pre-computed search results for a dataset. @@ -42,7 +42,7 @@ def subsample_by_neighbors( Only used if ``x`` does not contain existing neighbor search results. Returns: - Integer array with indices of the cells selected to be in the subsample. + Integer NumPy array containing the indices of the cells selected to be in the subsample. References: https://libscran.github.io/nenesub, for the rationale behind this approach. @@ -52,7 +52,7 @@ def subsample_by_neighbors( >>> pcs = numpy.random.rand(20, 500) >>> import scranpy >>> keep = scranpy.subsample_by_neighbors(pcs) - >>> keep + >>> print(keep) """ if isinstance(x, knncolle.FindKnnResults): diff --git a/src/scranpy/summarize_effects.py b/src/scranpy/summarize_effects.py index aa6796b7..83fd20cc 100644 --- a/src/scranpy/summarize_effects.py +++ b/src/scranpy/summarize_effects.py @@ -25,7 +25,7 @@ def summarize_effects( compute_quantiles: Optional[Sequence] = None, compute_min_rank: bool = True, num_threads: int = 1 -) -> list[biocframe.BiocFrame]: +) -> biocutils.NamedList: """For each group, summarize the effect sizes for all pairwise comparisons to other groups. This yields a set of summary statistics that can be used to rank marker genes for each group. @@ -62,7 +62,7 @@ def summarize_effects( Number of threads to use. Returns: - List of length equal to the number of groups (i.e., the extents of the first two dimensions of ``effects``). + A :py:class:`~biocutils.NamedList.NamedList` of length equal to the number of groups (i.e., the extents of the first two dimensions of ``effects``). Each entry is a :py:class:`~biocframe.BiocFrame.BiocFrame` where each row corresponds to a gene in ``effects``. Each column contain a summary statistic of the effect sizes of the comparisons involving its corresponding group. diff --git a/src/scranpy/test_enrichment.py b/src/scranpy/test_enrichment.py index 413cdf90..70476e5e 100644 --- a/src/scranpy/test_enrichment.py +++ b/src/scranpy/test_enrichment.py @@ -48,7 +48,7 @@ def test_enrichment( Number of threads to use. Returns: - Array of (log-transformed) p-values to test for significant enrichment of ``x`` in each entry of ``sets``. + Double-precision NumPy array of (log-transformed) p-values to test for significant enrichment of ``x`` in each entry of ``sets``. References: https://libscran.github.io/phyper, for the underlying implementation.