Skip to content

Commit 938af91

Browse files
committed
upload img
1 parent 1c458a8 commit 938af91

6 files changed

+291
-27
lines changed
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
#---------------------------
2+
# Annotate DNA lineage
3+
# 1. quantify any signature for single-cell using UCELL for the ciceroRNA (preferred) or the iRNA assay
4+
# 2. visualize signature on the DNA lineage tree ----
5+
6+
# Can use either iRNA or ciceroRNA
7+
# Can use either UCell or Seurat's addmodulescore
8+
#---------------------------
9+
suppressPackageStartupMessages({
10+
library(Seurat)
11+
library(Signac)
12+
library(tidyverse); library(forcats)
13+
library(fs)
14+
library(copykit)
15+
library(VennDiagram)
16+
library(SummarizedExperiment)
17+
library(ggpubr); library(ruok); library(scales)
18+
library(ComplexHeatmap)
19+
})
20+
setwd("/volumes/USR1/yyan/project/coda")
21+
source('/volumes/USR1/yyan/project/coda/rsrc/utils.R')
22+
23+
options <- commandArgs(trailingOnly = TRUE)
24+
if (length(options)>0) {
25+
cat('Reading user parameters:\n')
26+
sample_name <- options[[1]]
27+
rna_assay_use <- options[[2]]
28+
module_method <- options[[3]]
29+
} else {
30+
sample_name = 'DCIS35T'
31+
rna_assay_use <- 'iRNA'
32+
rna_assay_use <- 'ciceroRNA'
33+
module_method <- 'ucell'
34+
module_method <- 'seurat'
35+
}
36+
37+
#------ input / output ------
38+
39+
dir_coda <- file.path(
40+
'/volumes/USR1/yyan/project/coda',
41+
'rds_coda_ready', sample_name, 'aneuploid_epi')
42+
43+
44+
load_coda(dir_coda) # obja; objd; df_meta
45+
# consensus_z <- 'clones'
46+
print(obja)
47+
48+
#------ load ciceroRNA ------
49+
f_crna <- file.path(dir_coda, 'cicero', 'ciceroRNA_assay.seurat.rds')
50+
crna <- try(read_rds(f_crna))
51+
52+
#------ choose RNA assay ------
53+
54+
if ( 'try-error' %in% class(crna)) {
55+
DefaultAssay(obja) <- 'iRNA'
56+
} else {
57+
stopifnot( identical(Cells(crna), Cells(obja)) )
58+
obja[['ciceroRNA']] <- crna[['ciceroRNA']]
59+
DefaultAssay(obja) <- 'ciceroRNA'
60+
}
61+
62+
try(DefaultAssay(obja) <- rna_assay_use)
63+
64+
cat('ATAC uses RNA assay = ', DefaultAssay(obja), '\n')
65+
print(obja)
66+
#------ choose pathways ------
67+
68+
if (T) {
69+
library(msigdbr)
70+
db_msigdb_H <- msigdbr(species = "Homo sapiens", category = "H") %>%
71+
dplyr::select(gs_name, gene_symbol)
72+
db_msigdb_H <- deframe_to_list(as.data.frame(db_msigdb_H))
73+
names(db_msigdb_H) <- make.names(names(db_msigdb_H))
74+
}
75+
76+
db = db_msigdb_H
77+
name_db <- 'msigdb_hallmark'
78+
79+
#------ setup dir_res ------
80+
81+
dir_res <- file.path(
82+
dir_coda, sprintf('%s_%s_%s',
83+
module_method,
84+
name_db,
85+
DefaultAssay(obja)))
86+
fs::dir_create(dir_res)
87+
88+
#------ run UCELL ------
89+
if (module_method == 'ucell') {
90+
library(UCell)
91+
if (! file.exists(file.path(dir_res, 'addmodulescore.df.rds'))) {
92+
# if (T) { ## force to rerun
93+
message('Run AddModuleScore_UCell')
94+
obja <- AddModuleScore_UCell(obja, features = db, name="_UCell")
95+
names(db) <- paste0(names(db), '_UCell')
96+
df_signature_res <- obja[[]][, names(db)]
97+
print(colnames(df_signature_res))
98+
write.csv(x=df_signature_res, file = file.path(dir_res, 'addmodulescore.df.csv'))
99+
write_rds(x=df_signature_res, file.path(dir_res, 'addmodulescore.df.rds'))
100+
} else {
101+
message('Load AddModuleScore_UCell')
102+
df_signature_res <- read_rds(file.path(dir_res, 'addmodulescore.df.rds'))
103+
stopifnot(identical(rownames(df_signature_res), Cells(obja)))
104+
obja <- AddMetaData(obja, metadata = df_signature_res)
105+
names(db) <- paste0(names(db), '_UCell')
106+
}
107+
}
108+
109+
#------ Run AddModulescore ------
110+
111+
if (module_method == 'seurat') {
112+
if (! file.exists(file.path(dir_res, 'addmodulescore.df.rds'))) {
113+
# if (T) { ## force to rerun
114+
message('Run AddModuleScore')
115+
obja <- AddModuleScore(obja, features = db, name='seuratmodule')
116+
for ( tmp in 1:length(db) ) {
117+
obja@meta.data[paste0(names(db)[tmp], '_seurat')] <- obja@meta.data[paste0('seuratmodule', tmp)]
118+
obja@meta.data[paste0('seuratmodule', tmp)] <- NULL
119+
}
120+
names(db) <- paste0(names(db), '_seurat')
121+
df_signature_res <- obja[[]][, names(db)]
122+
print(colnames(df_signature_res))
123+
write.csv(
124+
x=df_signature_res,
125+
file = file.path(dir_res, 'addmodulescore.df.csv'))
126+
write_rds(
127+
x=df_signature_res,
128+
file.path(dir_res, 'addmodulescore.df.rds'))
129+
} else {
130+
message('Load AddModuleScore')
131+
df_signature_res <- read_rds(file.path(dir_res, 'addmodulescore.df.rds'))
132+
stopifnot(identical(rownames(df_signature_res), Cells(obja)))
133+
obja <- AddMetaData(obja, metadata = df_signature_res)
134+
names(db) <- paste0(names(db), '_seurat')
135+
}
136+
}
137+
138+
139+
140+
#------ viz Featureplot ------
141+
update_coda(df_meta, obja, objd)
142+
143+
pdf(file.path(dir_res, sprintf('coda_featureplot.birdview_%s.pdf', name_db)),
144+
width = 8, height = 5, onefile = T, useDingbats = F)
145+
for (i in seq_along(names(db))) {
146+
print(coda_featureplot2(df = df_meta, feature = names(db)[i], pal='Plasma'))
147+
}
148+
dev.off()
149+
150+
151+
#------ viz VlnPlot ------
152+
pal_cna_clones = new_palette_D(levels(objd@colData$clones), pal = 'stallion')
153+
n_clones <- length(unique(objd@colData$clones))
154+
for (z in c('clones')) {
155+
message(z)
156+
pdf(file.path(dir_res, sprintf('vlnplot.birdview_%s.%s.pdf', name_db, z)),
157+
width = n_clones*0.9, height = 4, useDingbats = F, onefile = T)
158+
for (i in seq_along(names(db))) {
159+
p <- VlnPlot(obja, feature = names(db)[i],
160+
group.by = z, pt.size = 0.5) &
161+
labs(x = z) &
162+
scale_fill_manual(values = pal_cna_clones) &
163+
stat_compare_means(label = "p.format") &
164+
stat_summary(fun = 'mean', colour = "cyan", size = 2, geom = "point", pch=4)
165+
print(p)
166+
167+
}
168+
dev.off()
169+
}
170+
names(db)
171+
172+
cat('Done: ', sample_name, '-', rna_assay_use, '-', module_method, '\n')
173+

tutorial/scripts/coda.annotate_DNA_lineage.s3_cna.public.R

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ calc_gearyc <- function(w, x) {
414414

415415

416416
calc_heritability_score_gearyc <- function(d, x) {
417-
## d: distance between clones
417+
## d: distance between clones
418418
## x: values for the clones
419419
max_d <- apply(d, 1, max)
420420
w <- exp(-1 * d^2 / max_d^2)
@@ -435,18 +435,6 @@ calc_heritability_score_gearyc_hnull <- function(d, x, ntimes=1e3, ncores=8) {
435435
o <- sort(as.numeric(unlist(o)))
436436
}
437437

438-
439-
if (F) {
440-
w <- matrix(c(0, 1/3, 1/3, 0, 0,
441-
1/2, 0, 1/3, 1/3, 0,
442-
1/2, 1/3, 0, 1/3, 0,
443-
0, 1/3, 1/3, 0, 1,
444-
0, 0, 0, 1/3, 0), byrow = F, nrow = 5, ncol = 5)
445-
x <- c(8, 6, 6, 3, 2)
446-
rownames(w) <- colnames(w) <- names(x) <- letters[1:5]
447-
calc_gearyc(w, x)
448-
calc_heritability_score_gearyc(w, x)
449-
}
450438
phenotypes_sig_diverse_heritability_gearyc <- c()
451439
phenotypes_sig_diverse_heritability_gearyc_pval_up <- c()
452440
phenotypes_sig_diverse_heritability_gearyc_pval_dn <- c()
@@ -508,16 +496,6 @@ calc_heritability_score_orig <- function(d1, d2) {
508496
'pval' = as.numeric(o$p.value))
509497
}
510498

511-
if (F) {
512-
## not helpful
513-
phenotypes_sig_diverse_heritability_orig_test <- sapply(
514-
phenotypes_sig_diverse_opts, function(xx) {
515-
calc_heritability_score_orig(
516-
as.dist( log((cna_dist_mat+1e-3)/2) ),
517-
dist(mat_pheno_viz_zscore[, xx, drop=F]) )
518-
}
519-
)
520-
}
521499
phenotypes_sig_diverse_heritability_orig_test <- sapply(
522500
phenotypes_sig_diverse_opts, function(xx) {
523501
calc_heritability_score_orig(

tutorial/scripts/std.ATAC.cicero_CCAN.R

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,7 @@ if (F) {
131131

132132
coaccess_cutoff_override <- 0
133133

134-
# sample_name <- 'DCIS66T_chip2'
135-
sample_name <- 'DCIS22T'
136-
sample_name <- 'DCIS35T'
137-
# sample_name = 'mda231wt_kaile'
134+
sample_name <- 'DCIS66T_chip2'
138135
args <- commandArgs(trailingOnly = TRUE)
139136
if (length(args) > 0) {
140137
sample_name = args[[1]]
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
source('/volumes/USR1/yyan/project/coda/rsrc/r_libs.R')
2+
source('/volumes/USR1/yyan/project/coda/rsrc/utils.R')
3+
if (T) {
4+
library(cicero); library(monocle3)
5+
library(data.table);
6+
setDTthreads(threads = 4)
7+
}
8+
9+
cat('load input...\n')
10+
# sample_name <- 'DCIS66T_chip2'
11+
# sample_name <- 'DCIS22T'
12+
# sample_name <- 'DCIS51T'
13+
# sample_name <- 'DCIS68T'
14+
sample_name <- 'DCIS35T'
15+
16+
args <- commandArgs(trailingOnly = TRUE)
17+
if (length(args) > 0) {
18+
sample_name = args[[1]]
19+
}
20+
21+
dir_coda <- file.path(
22+
'/volumes/USR1/yyan/project/coda/rds_coda_ready',
23+
sample_name, 'aneuploid_epi')
24+
dir_cicero <- file.path(
25+
'/volumes/USR1/yyan/project/coda/rds_coda_ready',
26+
sample_name, 'aneuploid_epi', 'cicero')
27+
28+
29+
load_coda(dir_coda)
30+
cicero <- read_rds(file.path(dir_cicero, 'obja.cicero.rds'))
31+
monocle <- read_rds(file.path(dir_cicero, 'obja.monocle3.rds'))
32+
conns <- read_rds(file.path(dir_cicero, 'cicero_conns.df.rds'))
33+
## REF: https://cole-trapnell-lab.github.io/cicero-release/docs_m3/#cicero-gene-activity-scores
34+
35+
fpath_hg19_gtf <- 'https://ftp.ensembl.org/pub/grch37/current/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz'
36+
fpath_hg19_gtf <- '/volumes/USR1/yyan/public/hg19/Homo_sapiens.GRCh37.87.gtf.gz'
37+
fpath_hg19_gtf <- '/volumes/USR1/yyan/public/hg19/Homo_sapiens.GRCh37.87.rtracklayer.dataframe.rds'
38+
if (F) {
39+
gene_anno <- rtracklayer::readGFF(fpath_hg19_gtf); class(gene_anno)
40+
write_rds(gene_anno, fpath_hg19_gtf)
41+
} else {
42+
gene_anno <- read_rds( fpath_hg19_gtf )
43+
}
44+
colnames(gene_anno)
45+
gene_anno$chromosome <- paste0("chr", gene_anno$seqid)
46+
gene_anno$gene <- gene_anno$gene_id
47+
gene_anno$transcript <- gene_anno$transcript_id
48+
gene_anno$symbol <- gene_anno$gene_name
49+
50+
if (T) {
51+
pos <- subset(gene_anno, strand == "+")
52+
pos <- pos[order(pos$start),]
53+
# remove all but the first exons per transcript
54+
pos <- pos[!duplicated(pos$transcript),]
55+
# make a 1 base pair marker of the TSS
56+
pos$end <- pos$start + 1
57+
neg <- subset(gene_anno, strand == "-")
58+
neg <- neg[order(neg$start, decreasing = TRUE),]
59+
# remove all but the first exons per transcript
60+
neg <- neg[!duplicated(neg$transcript),]
61+
neg$start <- neg$end - 1
62+
gene_annotation_sub <- rbind(pos, neg)
63+
# Make a subset of the TSS annotation columns containing just the coordinates
64+
# and the gene name
65+
gene_annotation_sub <- gene_annotation_sub[,c("chromosome", "start", "end", "symbol")]
66+
# Rename the gene symbol column to "gene"
67+
names(gene_annotation_sub)[4] <- "gene"
68+
gene_annotation_sub$chromosome[gene_annotation_sub$chromosome=='chrMT'] <- 'chrM'
69+
}
70+
71+
cat('annotate_cds_by_site...\n')
72+
monocle <- annotate_cds_by_site(monocle, gene_annotation_sub, verbose = TRUE)
73+
tail(fData(monocle))
74+
75+
76+
coaccess_cutoff_override_use <- readRDS(
77+
file.path(dir_cicero, 'coaccess_cutoff_override_use.value.rds'))
78+
cat('using coaccesss cutoff=', coaccess_cutoff_override_use, '...\n')
79+
80+
cat('build_gene_activity_matrix...\n')
81+
unnorm_ga <- build_gene_activity_matrix(
82+
monocle, conns,
83+
dist_thresh=100e3,
84+
coaccess_cutoff = coaccess_cutoff_override_use)
85+
class(unnorm_ga) # dgCMatrix
86+
print(dim(unnorm_ga) )
87+
print(range(unnorm_ga)) # count matrix
88+
print(unnorm_ga[1:3, 1:3])
89+
write_rds(unnorm_ga, file.path(dir_cicero, 'ciceroRNA_counts.mat.rds'))
90+
unnorm_ga <- readRDS( file.path(dir_cicero, 'ciceroRNA_counts.mat.rds') )
91+
#------ create seurat object assay ------
92+
cat('CreateSeuratObject...\n')
93+
f_ciceroRNA_assay <- file.path(dir_cicero, 'ciceroRNA_assay.seurat.rds')
94+
ciceroRNA_assay_name <- 'ciceroRNA'
95+
cRNA_assay <- CreateAssayObject(counts = unnorm_ga)
96+
stopifnot(identical(colnames(cRNA_assay), Cells(obja)))
97+
cRNA_assay <- CreateSeuratObject(
98+
counts = cRNA_assay, assay = 'ciceroRNA',
99+
meta.data = obja@meta.data[colnames(cRNA_assay), ])
100+
DefaultAssay(cRNA_assay) <- ciceroRNA_assay_name
101+
cRNA_assay <- NormalizeData(cRNA_assay)
102+
write_rds(cRNA_assay, f_ciceroRNA_assay)
103+
104+
cRNA_assay <- FindVariableFeatures(cRNA_assay)
105+
cRNA_assay <- ScaleData(cRNA_assay)
106+
cRNA_assay <- RunPCA(cRNA_assay)
107+
pdf(file.path(dir_cicero, 'plot.cRNA_assay.ElbowPlot.pdf'), width = 5, height = 4)
108+
print(ElbowPlot(cRNA_assay))
109+
dev.off()
110+
write_rds(cRNA_assay, f_ciceroRNA_assay)
111+
112+
cRNA_assay <- RunUMAP(cRNA_assay, dims = 1:30)
113+
write_rds(cRNA_assay, f_ciceroRNA_assay)
114+
DimPlot(cRNA_assay, group.by = 'clones', label=T)
115+
116+
cat('Done:', sample_name, '\n')
83 KB
Loading
247 KB
Loading

0 commit comments

Comments
 (0)