Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
1 change: 1 addition & 0 deletions scripts/run_benchmark/run_full_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ expression_correction_methods:
- no_correction
# - gene_efficiency_correction
# - resolvi_correction
# - split
method_parameters_yaml: /tmp/method_params.yaml
HERE

Expand Down
1 change: 1 addition & 0 deletions scripts/run_benchmark/run_full_seqeracloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ expression_correction_methods:
- no_correction
- gene_efficiency_correction
- resolvi_correction
- split
method_parameters_yaml: /tmp/method_params.yaml
HERE

Expand Down
1 change: 1 addition & 0 deletions scripts/run_benchmark/run_test_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ expression_correction_methods:
- no_correction
# - gene_efficiency_correction
# - resolvi_correction
# - split
method_parameters_yaml: /tmp/method_params.yaml
HERE

Expand Down
1 change: 1 addition & 0 deletions scripts/run_benchmark/run_test_seqeracloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ expression_correction_methods:
- no_correction
- gene_efficiency_correction
- resolvi_correction
- split
#method_parameters_yaml: /tmp/method_params.yaml
HERE

Expand Down
51 changes: 51 additions & 0 deletions src/methods_expression_correction/split/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
__merge__: /src/api/comp_method_expression_correction.yaml

name: split
label: "SPLIT"
summary: "Correct doublet/misegmented cells using SPLIT"
description: "SPLIT (Spatial Purification of Layered Intracellular Transcripts) is a novel method that integrates snRNA-seq with RCTD deconvolution to enhance signal purity. SPLIT effectively resolves mixed transcriptomic signals, improving background correction and cell-type resolution."
links:
documentation: "https://github.com/bdsc-tds/SPLIT"
repository: "https://github.com/bdsc-tds/SPLIT"
references:
doi: "10.1101/2025.04.23.649965"

arguments:
- name: --keep_all_cells
required: false
direction: input
type: boolean
default: false
description: Whether to keep cells with 0 counts (may cause errors if set to TRUE)

resources:
- type: r_script
path: script.R

engines:
- type: docker
image: openproblems/base_r:1
setup:
- type: docker
run: |
apt-get update
- type: r
bioc: [anndataR, rhdf5, devtools, scater]
- type: docker
run: |
Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"

# SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
# https://github.com/drighelli/SpatialExperiment/issues/171
# https://github.com/satijalab/seurat/issues/9889
# The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment.

# Using a large timeout here to reduce failures during GitHub package installation.

- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [ hightime, highcpu, highmem ]
105 changes: 105 additions & 0 deletions src/methods_expression_correction/split/script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
library(spacexr)
library(Matrix)
library(SingleCellExperiment)
library(anndataR)
library(SPLIT)
library(Seurat)
library(scuttle)

## VIASH START
par <- list(
"input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_with_celltypes.h5ad",
"input_scrnaseq_reference"= "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad",
"output" = "task_ist_preprocessing/tmp/split_corrected.h5ad",
"keep_all_cells" = FALSE,
)

meta <- list(
'cpus': 4,
)

## VIASH END

# Read the input h5ad file and convert to SingleCellExperiment and Seurat
sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment")
xe <- read_h5ad(par$input_spatial_with_cell_types, as = "Seurat")

# filter out 0 cells
if (!par$keep_all_cells) {
cat("Filtering cells with 0 counts\n")
sce <- sce[, colSums(counts(sce)) > 0]
xe <- subset(xe, subset = nCount_RNA > 0)
}

# Extract spatial coordinates and counts matrix
centroid_x <- colData(sce)$centroid_x
centroid_y <- colData(sce)$centroid_y
coords <- data.frame(centroid_x, centroid_y)
counts <- assay(sce, "counts")
rownames(coords) <- colData(sce)$cell_id
puck <- SpatialRNA(coords, counts)

# Read reference scrnaseq
ref <- read_h5ad(par$input_scrnaseq_reference, as = "SingleCellExperiment")

#filter reference cell types to those with >25 cells (minimum for RCTD)
valid_celltypes <- names(table(colData(ref)$cell_type))[table(colData(ref)$cell_type) >= 25]
filtered_ref <- ref[,colData(ref)$cell_type %in% valid_celltypes]

ref_counts <- assay(filtered_ref, "counts")
# factor to drop filtered cell types
colData(filtered_ref)$cell_type <- factor(colData(filtered_ref)$cell_type)
cell_types <- colData(filtered_ref)$cell_type
names(cell_types) <- colnames(ref_counts)
reference <- Reference(ref_counts, cell_types, min_UMI = 0)

# check cores
cores <- 1
if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus
cat(sprintf("Number of cores: %s\n", cores))

# Run the algorithm
cat("Running RCTD\n")
myRCTD <- create.RCTD(puck, reference, max_cores = cores)
myRCTD <- run.RCTD(myRCTD, doublet_mode = "doublet")

# Get the "spot_class" annotation from RCTD
# cat("Saving RCTD spot_class\n")
# results <- myRCTD@results
# rctd_spot_class <- results$results_df$spot_class
# names(rctd_spot_class) <- rownames(results$results_df)
# colData(sce)$RCTD_class <- "not_included"
# colData(sce)[names(rctd_spot_class),"RCTD_class"] <- as.character(rctd_spot_class)

# Post-process RCTD output
RCTD <- SPLIT::run_post_process_RCTD(myRCTD)

# Run SPLIT purification
cat("Running SPLIT\n")
res_split <- SPLIT::purify(
counts = GetAssayData(xe, assay = 'RNA', layer = 'counts'), # or any gene x cells counts matrix
rctd = RCTD,
DO_purify_singlets = TRUE # optional
)


# create corrected counts layer in original SingleCell object
cat("Normalizing counts\n")

# Preserve original normalized values before overwriting with corrected normalization
assay(sce, "normalized_uncorrected") <- assay(sce, "normalized")

# First copy in counts
assay(sce, "corrected_counts") <- assay(sce, "counts")

# Then, replace only the updated cells
assay(sce, "corrected_counts")[rownames(res_split$purified_counts), colnames(res_split$purified_counts)] <- res_split$purified_counts

# Library size normalization - see note in resolVI
size_factors <- librarySizeFactors(assay(sce, "corrected_counts"))
assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts")

# Write the final object to h5ad format
cat("Writing to h5ad\n")
dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
write_h5ad(sce, par$output, mode = "w")
3 changes: 2 additions & 1 deletion src/workflows/run_benchmark/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ argument_groups:
A list of expression correction methods to run.
type: string
multiple: true
default: "no_correction:gene_efficiency_correction:resolvi_correction"
default: "no_correction:gene_efficiency_correction:resolvi_correction:split"
- name: Method parameters
description: |
Use these arguments to control the parameter sets that are run for each
Expand Down Expand Up @@ -175,6 +175,7 @@ dependencies:
- name: methods_expression_correction/no_correction
- name: methods_expression_correction/gene_efficiency_correction
- name: methods_expression_correction/resolvi_correction
- name: methods_expression_correction/split
- name: methods_data_aggregation/aggregate_spatial_data
- name: metrics/similarity
- name: metrics/quality
Expand Down
3 changes: 2 additions & 1 deletion src/workflows/run_benchmark/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,8 @@ workflow run_wf {
expr_corr_methods = [
no_correction,
gene_efficiency_correction,
resolvi_correction
resolvi_correction,
split
]

expr_corr_ch = cta_ch
Expand Down