Extract Data from Seurat Object

Overview
Usage
Load Libraries
Set Arguments
Create Output Directory
Load Seurat Object
Check Seurat Object Structure
Extract and Save Metadata
Save UMAP Embedding
Extract and Save Normalized Counts
Extract and Save Spatial Information

Overview

This notebook extracts data from a Seurat object and saves it in formats suitable for downstream analysis.

Usage

Run as an R script:

Rscript extract_Visium_v5.R seurat_obj.rds output_directory cluster_column

Load Libraries

library(jsonlite)
library(Matrix)
library(data.table)
library(Seurat)
library(tidyverse)
library(jsonlite)
library(png)

Set Arguments

You can set these manually for interactive use:

# Set these manually for notebook use
seurat_obj_file <- "example_data/Visium_MTG_10samples.rds"  # Path to your Seurat object fil
output_dir <- "example_data/Visium_MTG_10samples"  # Directory to save output files, usually the dataset name
cluster_col <- "smoothed_label_s5"  # Column name in metadata for clustering information

Create Output Directory

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

Load Seurat Object

cat("Loading RDS data...\n")

## Loading RDS data...

seurat_obj <- readRDS(seurat_obj_file)
if (!inherits(seurat_obj, "Seurat")) {
  stop("The provided file is not a valid Seurat object.")
}
capture.output(str(seurat_obj), file = paste0(output_dir, "/seurat_obj_structure.txt"))

Check Seurat Object Structure

v5 Seurat objects have a different structure compared to v4, so we need to ensure the object contains the expected slots and layers.

# Check if the Seurat object has the necessary assay
if (!"Spatial" %in% names(seurat_obj@assays)) {
  stop("The Seurat object does not contain the 'Spatial' assay.")
}
# Check if the Seurat object has the necessary assay data
if (!"data" %in% slotNames(seurat_obj@assays$Spatial)) {
  stop("The Seurat object does not contain the 'data' slot in the 'Spatial' assay.")
}

# Check if the Seurat object has the necessary metadata
if (!"meta.data" %in% slotNames(seurat_obj)) {
  stop("The Seurat object does not contain the 'meta.data' slot.")
}

## Check if the Seurat object has the necessary umap embedding
if (!"umap" %in% names(seurat_obj@reductions)) {
  stop("The Seurat object does not contain the 'umap' reduction.")
}

# Check if the cluster column exists in the metadata
if (!cluster_col %in% colnames(seurat_obj@meta.data)) {
  stop(paste("The cluster column", cluster_col, "does not exist in the metadata."))
}

Extract and Save Metadata

cat("Saving metadata...\n")

## Saving metadata...

metadata <- seurat_obj@meta.data
write.csv(metadata, file = paste0(output_dir, "/raw_metadata.csv"), row.names = TRUE)

cat("Saving metadata feature names...\n")

## Saving metadata feature names...

column_names <- colnames(metadata)
write_json(column_names, path = file.path(output_dir, "raw_metadata_columns.json"), pretty = TRUE)

Save UMAP Embedding

cat("Saving UMAP embedding...\n")

## Saving UMAP embedding...

umap_embeddings <- seurat_obj@reductions$umap@cell.embeddings
colnames(umap_embeddings) <- toupper(colnames(umap_embeddings))
write.csv(umap_embeddings, file = paste0(output_dir, "/raw_umap_embeddings.csv"), row.names = TRUE)

Extract and Save Normalized Counts

cat("Saving normalized counts...\n")

## Saving normalized counts...

normalized_counts <- seurat_obj@assays$Spatial@data  # This is a sparse matrix

# Convert sparse matrix to triplet format (long format)
triplet <- summary(normalized_counts)
# Get row (gene) and column (spot) names
triplet$Gene <- rownames(normalized_counts)[triplet$i]
triplet$Spot <- colnames(normalized_counts)[triplet$j]
triplet$Expression <- triplet$x
# Keep only necessary columns
triplet <- triplet[, c("Gene", "Spot", "Expression")]

long_data <- triplet %>%
  select(Spot, Gene, Expression)

nonzero_data <- long_data[long_data$Expression > 0, ]
nonzero_data <- as.data.table(nonzero_data)
fwrite(nonzero_data, file = paste0(output_dir, "/raw_normalized_counts.csv"), row.names = FALSE)

Extract and Save Spatial Information

cat("Save images and coordinates...\n")

## Save images and coordinates...

images = seurat_obj@images
all_names = names(images)

## Create the directory
images_dir = paste0(output_dir, "/images")
dir.create(images_dir, showWarnings = FALSE)
coordinates_dir = paste0(output_dir, "/coordinates")
dir.create(coordinates_dir, showWarnings = FALSE)

#Loop and extract the image data
for (i in 1:length(all_names)) {
    cat(i, "/", length(all_names), ": ", all_names[i], "\n")

    image_name = all_names[i]
    spatial_data = images[[image_name]]
    img_array = spatial_data@image
    coordinates = spatial_data@coordinates

    # Extract the scale.factors from the Seurat object
    scale_factors <- spatial_data@scale.factors

    # Remove the custom class attributes
    scale_factors_plain <- unclass(scale_factors)
    scale_factors_plain$spot.radius = spatial_data@spot.radius

    # Convert to JSON with pretty formatting
    json_output <- toJSON(scale_factors_plain, pretty = TRUE, auto_unbox = TRUE)

    # Save to a JSON file
    write(json_output, paste0(coordinates_dir, "/raw_scalefactors_", image_name, ".json"))
    # Save coordinates
    write.csv(coordinates, paste0(coordinates_dir, "/raw_coordinates_", image_name, ".csv"), row.names = TRUE)
    # Save as PNG (best for analysis)
    png::writePNG(img_array, target = paste0(images_dir, "/raw_image_", image_name, ".png"))
}

## 1 / 10 :  slice1_BN0662 
## 2 / 10 :  slice1_BN1076 
## 3 / 10 :  slice1_BN1424 
## 4 / 10 :  slice1_BN1535 
## 5 / 10 :  slice1_BN1726 
## 6 / 10 :  slice1_BN1762 
## 7 / 10 :  slice1_BN1817 
## 8 / 10 :  slice1_BN1822 
## 9 / 10 :  slice1_BN1827 
## 10 / 10 :  slice1_BN1839

cat("Done! Data extraction is complete!
You can find the output files in the directory:", output_dir, "\n")

## Done! Data extraction is complete!
## You can find the output files in the directory: example_data/Visium_MTG_10samples