Overview

This notebook extracts data from a Seurat object and saves it in formats suitable for downstream analysis.

Usage

Run as an R script:

Rscript extract_Visium_v5.R seurat_obj.rds output_directory cluster_column

Load Libraries

library(jsonlite)
library(Matrix)
library(data.table)
library(Seurat)
library(tidyverse)
library(jsonlite)
library(png)

Set Arguments

You can set these manually for interactive use:

# Set these manually for notebook use
seurat_obj_file <- "example_data/Visium_MTG_10samples.rds"  # Path to your Seurat object fil
output_dir <- "example_data/Visium_MTG_10samples"  # Directory to save output files, usually the dataset name
cluster_col <- "smoothed_label_s5"  # Column name in metadata for clustering information

Create Output Directory

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

Load Seurat Object

cat("Loading RDS data...\n")
## Loading RDS data...
seurat_obj <- readRDS(seurat_obj_file)
if (!inherits(seurat_obj, "Seurat")) {
  stop("The provided file is not a valid Seurat object.")
}
capture.output(str(seurat_obj), file = paste0(output_dir, "/seurat_obj_structure.txt"))

Check Seurat Object Structure

v5 Seurat objects have a different structure compared to v4, so we need to ensure the object contains the expected slots and layers.

# Check if the Seurat object has the necessary assay
if (!"Spatial" %in% names(seurat_obj@assays)) {
  stop("The Seurat object does not contain the 'Spatial' assay.")
}
# Check if the Seurat object has the necessary assay data
if (!"data" %in% slotNames(seurat_obj@assays$Spatial)) {
  stop("The Seurat object does not contain the 'data' slot in the 'Spatial' assay.")
}

# Check if the Seurat object has the necessary metadata
if (!"meta.data" %in% slotNames(seurat_obj)) {
  stop("The Seurat object does not contain the 'meta.data' slot.")
}

## Check if the Seurat object has the necessary umap embedding
if (!"umap" %in% names(seurat_obj@reductions)) {
  stop("The Seurat object does not contain the 'umap' reduction.")
}

# Check if the cluster column exists in the metadata
if (!cluster_col %in% colnames(seurat_obj@meta.data)) {
  stop(paste("The cluster column", cluster_col, "does not exist in the metadata."))
}

Extract and Save Metadata

cat("Saving metadata...\n")
## Saving metadata...
metadata <- seurat_obj@meta.data
write.csv(metadata, file = paste0(output_dir, "/raw_metadata.csv"), row.names = TRUE)

cat("Saving metadata feature names...\n")
## Saving metadata feature names...
column_names <- colnames(metadata)
write_json(column_names, path = file.path(output_dir, "raw_metadata_columns.json"), pretty = TRUE)

Save UMAP Embedding

cat("Saving UMAP embedding...\n")
## Saving UMAP embedding...
umap_embeddings <- seurat_obj@reductions$umap@cell.embeddings
colnames(umap_embeddings) <- toupper(colnames(umap_embeddings))
write.csv(umap_embeddings, file = paste0(output_dir, "/raw_umap_embeddings.csv"), row.names = TRUE)

Extract and Save Normalized Counts

cat("Saving normalized counts...\n")
## Saving normalized counts...
normalized_counts <- seurat_obj@assays$Spatial@data  # This is a sparse matrix

# Convert sparse matrix to triplet format (long format)
triplet <- summary(normalized_counts)
# Get row (gene) and column (spot) names
triplet$Gene <- rownames(normalized_counts)[triplet$i]
triplet$Spot <- colnames(normalized_counts)[triplet$j]
triplet$Expression <- triplet$x
# Keep only necessary columns
triplet <- triplet[, c("Gene", "Spot", "Expression")]

long_data <- triplet %>%
  select(Spot, Gene, Expression)

nonzero_data <- long_data[long_data$Expression > 0, ]
nonzero_data <- as.data.table(nonzero_data)
fwrite(nonzero_data, file = paste0(output_dir, "/raw_normalized_counts.csv"), row.names = FALSE)

Extract and Save Spatial Information

cat("Save images and coordinates...\n")
## Save images and coordinates...
images = seurat_obj@images
all_names = names(images)

## Create the directory
images_dir = paste0(output_dir, "/images")
dir.create(images_dir, showWarnings = FALSE)
coordinates_dir = paste0(output_dir, "/coordinates")
dir.create(coordinates_dir, showWarnings = FALSE)

#Loop and extract the image data
for (i in 1:length(all_names)) {
    cat(i, "/", length(all_names), ": ", all_names[i], "\n")

    image_name = all_names[i]
    spatial_data = images[[image_name]]
    img_array = spatial_data@image
    coordinates = spatial_data@coordinates

    # Extract the scale.factors from the Seurat object
    scale_factors <- spatial_data@scale.factors

    # Remove the custom class attributes
    scale_factors_plain <- unclass(scale_factors)
    scale_factors_plain$spot.radius = spatial_data@spot.radius

    # Convert to JSON with pretty formatting
    json_output <- toJSON(scale_factors_plain, pretty = TRUE, auto_unbox = TRUE)

    # Save to a JSON file
    write(json_output, paste0(coordinates_dir, "/raw_scalefactors_", image_name, ".json"))
    # Save coordinates
    write.csv(coordinates, paste0(coordinates_dir, "/raw_coordinates_", image_name, ".csv"), row.names = TRUE)
    # Save as PNG (best for analysis)
    png::writePNG(img_array, target = paste0(images_dir, "/raw_image_", image_name, ".png"))
}
## 1 / 10 :  slice1_BN0662 
## 2 / 10 :  slice1_BN1076 
## 3 / 10 :  slice1_BN1424 
## 4 / 10 :  slice1_BN1535 
## 5 / 10 :  slice1_BN1726 
## 6 / 10 :  slice1_BN1762 
## 7 / 10 :  slice1_BN1817 
## 8 / 10 :  slice1_BN1822 
## 9 / 10 :  slice1_BN1827 
## 10 / 10 :  slice1_BN1839
cat("Done! Data extraction is complete!
You can find the output files in the directory:", output_dir, "\n")
## Done! Data extraction is complete!
## You can find the output files in the directory: example_data/Visium_MTG_10samples