Interactive code can be found at the following address:

1 Blastoid

Read preprocessing

Raw reads can be obtained from GSE177689.

Smart-Seq transcriptome sequencing experiments were analyzed using genome sequence and gene annotation from Ensembl GRCh38 release 103 as reference. For gene expression quantification RNA-seq reads were first trimmed using trim-galore v0.6.6 and thereafter aligned to the genome using hisat2 v2.2.1. Uniquely mapping reads in genes were quantified using htseq-count v0.13.5 with parameter -s no. TPM estimates were obtained using RSEM v1.3.3 with parameter –single-cell-prior

Loading packages and preprocessed data

Raw count data and metadata files can be obtained from NCBI GEO GSE177616 or from here:
- blastoid.H9.okae_bts5__scRNAseq.unfiltered__counts.RDS
- blastoid.H9.okae_bts5__scRNAseq.unfiltered__metadata.tsv

suppressPackageStartupMessages({
    library(RColorBrewer)
    library(ggplot2)
    library(dplyr)
    library(scran)
    library(Seurat)
    library(kableExtra)
})

counts.all <- readRDS("blastoid.H9.okae_bts5__scRNAseq.unfiltered__counts.RDS") %>% tibble::column_to_rownames("gene_id")
meta.all <- readr::read_tsv("blastoid.H9.okae_bts5__scRNAseq.unfiltered__metadata.tsv") %>%
        dplyr::mutate(time = case_when(
                      grepl("24h",samplename) ~ "24h",
                      grepl("60h",samplename) ~ "60h",
                      grepl("96h",samplename) ~ "96h",
                      TRUE ~ gsub("-.*","",samplename)
                  )) %>% data.frame()

rownames(meta.all) <- meta.all$sampleid

mtname="^ENSG[[:digit:]]+-MT-"

Filtering

Based on initial evaluation of per-cell quality control metrices and outlier identification using the median absolute deviation algorithm, cells with <= 2000 detected genes or >= 12.5% mitochondrial gene percentage were filtered out. Only genes detected in at least 5 cells were retained.

meta.filter <- subset(meta.all,
    nFeature_RNA > 2000 & percent.mt < 12.5)
counts.filter <- counts.all[,rownames(meta.filter)]

sobj <- CreateSeuratObject(counts.filter, min.cells = 5, meta.data = meta.filter)

Data Analysis

Count-data were log-normalized, top 3000 highly variable were selected, and standardization of per gene expression values across cells was performed using NormalizeData, FindVariableFeatures and ScaleData data functions in Seurat. Principal component analysis (PCA) based on the standardized highly variable features was used for linear dimension reduction, a shared nearest neighbor (SNN) graph was constructed on the dimensionally reduced data, and the graph was partitioned using a SNN modularity optimization based clustering algorithm at a range of resolutions using RunPCA, FindNeighbors and FindClusters from Seurat with default settings. Cluster marker genes were identified with the Wilcox likelihood-ratio test using the FindAllMarkers function. Uniform Manifold Approximation and Projection (UMAP) was used for visualization.

set.seed(1234)
varGene = 3000
pc = 20
resolutionwanted=c(0.02,1)
sobj <- sobj %>% Seurat::NormalizeData(verbose = FALSE) %>%
    FindVariableFeatures(nfeatures = varGene) %>%
    ScaleData(verbose = FALSE) %>%
    RunPCA(verbose = FALSE) %>%
    FindNeighbors(verbose = FALSE) %>%
    FindClusters(resolution = resolutionwanted, verbose=FALSE) %>%
    RunUMAP(dims = 1:pc, n.components = 3L, min.dist = 0.5, verbose=FALSE)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

Data Visualization

UMAP/Sampletime

UMAP projection colored according to sample time

idents <- c("naive_H9","24h","60h","96h","primed_H9","okae_bts5")
colors <- rev(c("#ff7f00","#4d076a","#ea7bc0","#619CFF","#fdbf6f","#33a02c"))
Idents(sobj) <- "time"
p <- DimPlot(sobj, order = idents, cols = colors, pt.size=0.4)
p[[1]]$layers[[1]]$aes_params$alpha = .7
p <- p + theme(aspect.ratio = 1, legend.position = "bottom", legend.title = element_blank()) +
    guides(color = guide_legend(override.aes = list(size=3)))
print(p)

UMAP/Clusters

UMAP projection colored according to cluster identity at various resolutions

ResolutionList <- grep("_snn_res", colnames([email protected]), value = TRUE)
for(Resolution in ResolutionList){
    print(DimPlot(sobj, label = TRUE, group.by = Resolution, order = T, pt.size = 0.4) +
          theme(aspect.ratio=1) +
          theme(legend.title=element_blank())
          )
          
}

UMAP/Marker

UMAP projection colored according to expression of selected EPI, TE, and PrE markers

markers.list<-list(
    "TE" = c("GATA2","GATA3"),
    "EPI" = c("POU5F1","KLF17"),
    "PRE" = c("GATA4","SOX17")
)
for(set in names(markers.list)){
    ens.gn <- gs2ensid(markers.list[[set]])
    print(FeaturePlot(object = sobj, features = ens.gn, order = T, pt.size = 0.4, max.cutoff = "q99",cols = c("#FEE0D2","#67000D")) + theme(aspect.ratio=1))
}

Marker Heatmap

Identify top markers based on avg_log2FC at resolution 0.02 and display their expression in a row-scaled heatmap sorted by cluster and sample time.

#wanted.resolution <- "RNA_snn_res.0.02" 
wanted.resolution <- "blastoid.Fig2b.lowres"
## Identify top markers at wanted resolution

Idents(sobj) <- wanted.resolution
top.markers <- FindAllMarkers(sobj, only.pos = TRUE, verbose = FALSE) %>%
    dplyr::filter(p_val_adj < 0.01) %>%
    group_by(cluster) %>%
    top_n(n = 30, wt = avg_log2FC) %>%
    dplyr::filter(cluster %in% c(0,1,3))


## Order time points of interest

times.order <- c("naive_H9","24h","60h","96h")
sobjT <- subset(sobj, subset = time %in% times.order)
sobjT[["time"]] <- factor([email protected]$time, levels = times.order)


## Get CB ordered by cluster id and time

sorted.cells <- subset(sobjT, subset = time %in% times.order)@meta.data %>%
                                                      arrange_at(c(wanted.resolution,"time")) %>%
                                                      pull(sampleid) %>% as.character()

# Get their normalized expr values for the identified top markers

lognormExp <- as.matrix(GetAssayData(sobjT, slot = "data"))[ top.markers$gene, sorted.cells]
lognormExp.scaled <- t(apply(lognormExp,1,scale)) %>% data.frame() %>%
    mutate(across(everything(), ~ ifelse(. > 2.5, 2.5, .))) %>%
    mutate(across(everything(), ~ ifelse(. < -2.5, -2.5, .))) %>%
    setNames(colnames(lognormExp))

colanno <- [email protected][match(colnames(lognormExp.scaled),rownames([email protected])),c(wanted.resolution,"time")]

heat.col <- colorRampPalette(c("#0D0887FF","#0D0887FF","#0D0887FF","#0D0887FF","#7E03A8FF","#CC4678FF","#F89441FF","#F0F921FF","#F0F921FF"))(100)

pheatmap::pheatmap(lognormExp.scaled,
                   width = 500,
                   height = 300,
                   annotation = colanno,
                   cluster_rows = F,
                   cluster_cols = F,
                   scale = "none",
                   show_colnames = F,
                   show_rownames = T,
                   color = heat.col,
                   border_color = "NA",
                   fontsize = 4,
                   fontsize_row = 2)

timmy1027 / human_blastoid_kagawa_et_al- Goto Github PK

human_blastoid_kagawa_et_al-'s Introduction

**Blastoid scRNAseq processing

1 Blastoid

Read preprocessing

Loading packages and preprocessed data

Filtering

Data Analysis

Data Visualization

UMAP/Sampletime

UMAP/Clusters

UMAP/Marker

Marker Heatmap

human_blastoid_kagawa_et_al-'s People

Contributors

Stargazers

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent