This notebook extracts data from a Seurat object and saves it in formats suitable for downstream analysis.
Run as an R script:
Rscript extract_Visium_v5.R seurat_obj.rds output_directory cluster_column
library(jsonlite)
library(Matrix)
library(data.table)
library(Seurat)
library(tidyverse)
library(jsonlite)
library(png)
You can set these manually for interactive use:
# Set these manually for notebook use
seurat_obj_file <- "example_data/Visium_MTG_10samples.rds" # Path to your Seurat object fil
output_dir <- "example_data/Visium_MTG_10samples" # Directory to save output files, usually the dataset name
cluster_col <- "smoothed_label_s5" # Column name in metadata for clustering information
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
cat("Loading RDS data...\n")
## Loading RDS data...
seurat_obj <- readRDS(seurat_obj_file)
if (!inherits(seurat_obj, "Seurat")) {
stop("The provided file is not a valid Seurat object.")
}
capture.output(str(seurat_obj), file = paste0(output_dir, "/seurat_obj_structure.txt"))
v5 Seurat objects have a different structure compared to v4, so we need to ensure the object contains the expected slots and layers.
# Check if the Seurat object has the necessary assay
if (!"Spatial" %in% names(seurat_obj@assays)) {
stop("The Seurat object does not contain the 'Spatial' assay.")
}
# Check if the Seurat object has the necessary assay data
if (!"data" %in% slotNames(seurat_obj@assays$Spatial)) {
stop("The Seurat object does not contain the 'data' slot in the 'Spatial' assay.")
}
# Check if the Seurat object has the necessary metadata
if (!"meta.data" %in% slotNames(seurat_obj)) {
stop("The Seurat object does not contain the 'meta.data' slot.")
}
## Check if the Seurat object has the necessary umap embedding
if (!"umap" %in% names(seurat_obj@reductions)) {
stop("The Seurat object does not contain the 'umap' reduction.")
}
# Check if the cluster column exists in the metadata
if (!cluster_col %in% colnames(seurat_obj@meta.data)) {
stop(paste("The cluster column", cluster_col, "does not exist in the metadata."))
}
cat("Saving metadata...\n")
## Saving metadata...
metadata <- seurat_obj@meta.data
write.csv(metadata, file = paste0(output_dir, "/raw_metadata.csv"), row.names = TRUE)
cat("Saving metadata feature names...\n")
## Saving metadata feature names...
column_names <- colnames(metadata)
write_json(column_names, path = file.path(output_dir, "raw_metadata_columns.json"), pretty = TRUE)
cat("Saving UMAP embedding...\n")
## Saving UMAP embedding...
umap_embeddings <- seurat_obj@reductions$umap@cell.embeddings
colnames(umap_embeddings) <- toupper(colnames(umap_embeddings))
write.csv(umap_embeddings, file = paste0(output_dir, "/raw_umap_embeddings.csv"), row.names = TRUE)
cat("Saving normalized counts...\n")
## Saving normalized counts...
normalized_counts <- seurat_obj@assays$Spatial@data # This is a sparse matrix
# Convert sparse matrix to triplet format (long format)
triplet <- summary(normalized_counts)
# Get row (gene) and column (spot) names
triplet$Gene <- rownames(normalized_counts)[triplet$i]
triplet$Spot <- colnames(normalized_counts)[triplet$j]
triplet$Expression <- triplet$x
# Keep only necessary columns
triplet <- triplet[, c("Gene", "Spot", "Expression")]
long_data <- triplet %>%
select(Spot, Gene, Expression)
nonzero_data <- long_data[long_data$Expression > 0, ]
nonzero_data <- as.data.table(nonzero_data)
fwrite(nonzero_data, file = paste0(output_dir, "/raw_normalized_counts.csv"), row.names = FALSE)
cat("Save images and coordinates...\n")
## Save images and coordinates...
images = seurat_obj@images
all_names = names(images)
## Create the directory
images_dir = paste0(output_dir, "/images")
dir.create(images_dir, showWarnings = FALSE)
coordinates_dir = paste0(output_dir, "/coordinates")
dir.create(coordinates_dir, showWarnings = FALSE)
#Loop and extract the image data
for (i in 1:length(all_names)) {
cat(i, "/", length(all_names), ": ", all_names[i], "\n")
image_name = all_names[i]
spatial_data = images[[image_name]]
img_array = spatial_data@image
coordinates = spatial_data@coordinates
# Extract the scale.factors from the Seurat object
scale_factors <- spatial_data@scale.factors
# Remove the custom class attributes
scale_factors_plain <- unclass(scale_factors)
scale_factors_plain$spot.radius = spatial_data@spot.radius
# Convert to JSON with pretty formatting
json_output <- toJSON(scale_factors_plain, pretty = TRUE, auto_unbox = TRUE)
# Save to a JSON file
write(json_output, paste0(coordinates_dir, "/raw_scalefactors_", image_name, ".json"))
# Save coordinates
write.csv(coordinates, paste0(coordinates_dir, "/raw_coordinates_", image_name, ".csv"), row.names = TRUE)
# Save as PNG (best for analysis)
png::writePNG(img_array, target = paste0(images_dir, "/raw_image_", image_name, ".png"))
}
## 1 / 10 : slice1_BN0662
## 2 / 10 : slice1_BN1076
## 3 / 10 : slice1_BN1424
## 4 / 10 : slice1_BN1535
## 5 / 10 : slice1_BN1726
## 6 / 10 : slice1_BN1762
## 7 / 10 : slice1_BN1817
## 8 / 10 : slice1_BN1822
## 9 / 10 : slice1_BN1827
## 10 / 10 : slice1_BN1839
cat("Done! Data extraction is complete!
You can find the output files in the directory:", output_dir, "\n")
## Done! Data extraction is complete!
## You can find the output files in the directory: example_data/Visium_MTG_10samples