diff --git a/src/metrics/quality/quality_metrics.py b/src/metrics/quality/quality_metrics.py index 0ec9d5a2..b354bf00 100644 --- a/src/metrics/quality/quality_metrics.py +++ b/src/metrics/quality/quality_metrics.py @@ -1,8 +1,8 @@ +import numpy as np import pandas as pd import spatialdata as sd - def proportion_of_assigned_reads( sdata: sd.SpatialData, ) -> [float, pd.Series]: @@ -22,10 +22,8 @@ def proportion_of_assigned_reads( """ - sdata['transcripts']['assigned'] = sdata['transcripts']['cell_id'] != 0 - # Proportion of assigned reads - prop_of_assigned_reads = float(((sdata['transcripts']['assigned']).sum() / len(sdata['transcripts'])).compute()) + prop_of_assigned_reads = float(sdata["counts"].layers["counts"].sum() / len(sdata['transcripts'])) # Proportion of assigned reads per gene if prop_of_assigned_reads == 1.0: @@ -39,10 +37,56 @@ def proportion_of_assigned_reads( data=0.0 ) else: - df = pd.crosstab(sdata['transcripts']['feature_name'], sdata['transcripts']['assigned']) - prop_of_assigned_reads_per_gene = df[True] / (df[False] + df[True]) + genes, counts = np.unique(sdata['transcripts']['feature_name'], return_counts=True) + df = pd.DataFrame(index=genes, data = {"fraction":0, "count":counts, "count_assigned":0}) + df.loc[sdata["counts"].var_names, "count_assigned"] = np.array(sdata["counts"].layers["counts"].sum(axis=0))[0,:] + df["fraction"] = df["count_assigned"] / df["count"] + prop_of_assigned_reads_per_gene = df["fraction"] return prop_of_assigned_reads, prop_of_assigned_reads_per_gene + +# Previous version only based on transcripts table. +# +#def proportion_of_assigned_reads( +# sdata: sd.SpatialData, +#) -> [float, pd.Series]: +# """ Calculate the proportion of assigned reads +# +# Parameters +# ---------- +# sdata : sd.SpatialData +# SpatialData object with sdata['transcripts'] including the column 'cell_id' +# +# Returns +# ------- +# float +# Proportion of assigned reads +# pd.Series +# Proportion of assigned reads per gene +# +# """ +# +# sdata['transcripts']['assigned'] = sdata['transcripts']['cell_id'] != 0 +# +# # Proportion of assigned reads +# prop_of_assigned_reads = float(((sdata['transcripts']['assigned']).sum() / len(sdata['transcripts'])).compute()) +# +# # Proportion of assigned reads per gene +# if prop_of_assigned_reads == 1.0: +# prop_of_assigned_reads_per_gene = pd.Series( +# index=sdata['transcripts']['feature_name'].unique().compute().values, +# data=1.0 +# ) +# elif prop_of_assigned_reads == 0.0: +# prop_of_assigned_reads_per_gene = pd.Series( +# index=sdata['transcripts']['feature_name'].unique().compute().values, +# data=0.0 +# ) +# else: +# df = pd.crosstab(sdata['transcripts']['feature_name'], sdata['transcripts']['assigned']) +# prop_of_assigned_reads_per_gene = df[True] / (df[False] + df[True]) +# +# return prop_of_assigned_reads, prop_of_assigned_reads_per_gene