library(ithi.utils)
load_base_libs()
library(methods)
library(ithi.meta)
library(ithi.figures)
library(ithi.utils)
library(ithi.seq)
library(ithi.clones)
library(ithi.supp)
library(ithi.xcr)
ihc_table_path <- snakemake@input$ihc_table
xcr_table_path <- snakemake@input$xcr_table
neoediting_outdir <- snakemake@input$neoediting_outdir
snv_cluster_files <- snakemake@input$snv_cluster_files
clone_tree_file <- snakemake@input$clone_tree_file
clone_branch_length_file <- snakemake@input$clone_branch_length_file
clone_prevalence_file <- snakemake@input$clone_prevalence_file
tcr_diversity_file <- snakemake@input$tcr_diversity
bcr_diversity_file <- snakemake@input$bcr_diversity
remixt_ploidy_file <- snakemake@input$remixt_cellularity_ploidy_file
clonal_measures_file <- snakemake@input$ith_stats
db_path <- snakemake@params$db
ith_stat_types <- snakemake@params$ith_stat_types
xcr_diversity_measures <- as.vector(outer(c("tcr", "bcr"), c("clonotypes_unique",
"shannon_entropy", "inverse_simpson", "D50_index", "chao1"), function(x,
y) paste(x, y, sep = "_")))
annotation_colours <- ithi.figures::get_annotation_colours()
ihc_table <- fread(ihc_table_path)
xcr_table <- read_clonotypes(xcr_table_path, duplicates = FALSE, db_path = db_path)
Read 19.7% of 304822 rows
Read 62.3% of 304822 rows
Read 88.6% of 304822 rows
Read 304822 rows and 18 (of 18) columns from 0.070 GB file in 00:00:05
tree_branch_data <- read_clone_tree_data(clone_tree_file, clone_branch_length_file,
clone_prevalence_file, db_path)
neoediting_res <- supp_neoediting(neoediting_outdir, ihc_table, db_path, tree_branch_data,
wtfilter = TRUE, full_epitopes = FALSE, snv_cluster_files = snv_cluster_files)
xcr_diversity <- ithi.supp::get_xcr_diversity(tcr_diversity_file, bcr_diversity_file,
db_path, xcr_table)
remixt_ploidy <- read.table(remixt_ploidy_file, row.names = 1, header = TRUE,
stringsAsFactors = FALSE)
remixt_ploidy <- remixt_ploidy %>% rownames_to_column(var = "voa")
remixt_ploidy$patient_id <- ithi.meta::factor_id(remixt_ploidy$patient_id, type = "patient_id",
db_path)
remixt_ploidy$condensed_id <- ithi.meta::map_id(remixt_ploidy$voa, from = "voa",
to = "condensed_id", db_path)
clonal_measures <- read_ith_stats(clonal_measures_file, db_path, duplicates = FALSE)
The purpose of this analysis is to show whether or not TCR repertoire diversity associates with subclonal neoepitope depletion. If so, I suppose the biological interpretation would be that a more diverse TCR repertoire would be better able to eliminate specific (subclonal) antigens.
There are 2 things we should use to address this line of thinking:
The first point – at a high level – challenges the assumption that more diverse TCR repertoires may contend with antigenic diversity associated with subclonal diversification, while the latter point directly addresses the reviewer comment. To then explain how TCR diversity can be collinear with epithelial CD8+ TIL density but not be associated with ITH, I need to review the results of the TIL cluster vs. XCR diversity analysis. (TODO!)
We mention this in the paper, but to show a plot:
xcr_diversity_melted <- melt(xcr_diversity, id.vars = c("patient_id", "condensed_id"),
measure.vars = xcr_diversity_measures)
xcr_clonal_diversity <- merge(clonal_measures, xcr_diversity_melted)
for (stat_type in ith_stat_types) {
pvals <- ithi.utils::compute_pvals_subsets(xcr_clonal_diversity, facet_vars = c("variable"),
formula = as.formula(paste0("~ ", stat_type, "+ value")), corfun = cor.test,
method = "spearman")
p <- ggplot(xcr_clonal_diversity, aes_string(x = stat_type, y = "value")) +
geom_point(aes(colour = patient_id)) + theme_bw() + theme_Publication() +
theme_nature() + scale_colour_manual(values = annotation_colours$patient_id) +
xlab(stat_type) + ylab("XCR diversity") + facet_wrap(~variable, scales = "free") +
geom_text(data = pvals, aes(x = Inf, y = Inf, label = p.value.text),
hjust = 1.1, vjust = 1.5, size = 2.5, parse = TRUE)
print(p)
}
Even when working with uncorrected p-values, it’s quite obvious that there’s no correlation between ITH and XCR diversity.
rates_xcr_analysis <- function(rates_unmerged, xcr_diversity, divcol = "tcr_clonotypes_unique") {
rates_unmerged_xcr <- plyr::join(rates_unmerged, xcr_diversity)
rates_unmerged_xcr$patient_id <- ithi.meta::map_id(rates_unmerged_xcr$condensed_id,
from = "condensed_id", to = "patient_id", db_path)
div_cols <- c("tcr_clonotypes_unique", "tcr_shannon_entropy", "tcr_inverse_simpson",
"bcr_clonotypes_unique", "bcr_shannon_entropy", "bcr_inverse_simpson")
for (col in div_cols) {
rates_unmerged_xcr[, col] <- rates_unmerged_xcr[, col]/max(rates_unmerged_xcr[,
col], na.rm = TRUE)
}
mod <- glmer(as.formula(paste0("expratio/obsratio ~ ", divcol, "+ (1 | patient_id)")),
data = rates_unmerged_xcr[!is.na(rates_unmerged_xcr[, divcol]), ], family = Gamma(link = "log"))
summod <- summary(mod)
pval <- unname(summod$coefficients[, 4][2])
rates_unmerged_xcr$ei <- with(rates_unmerged_xcr, expratio/obsratio)
labeller_vector <- c(paste("Patient", unique(rates_unmerged_xcr$patient_id)))
names(labeller_vector) <- as.character(unique(rates_unmerged_xcr$patient_id))
p1 <- ggplot(rates_unmerged_xcr, aes_string(x = divcol, y = "ei")) + theme_bw() +
ithi.utils::theme_Publication() + ithi.utils::theme_nature() + ithi.utils::stripped_theme() +
ylab("Expected/observed neoantigen ratio") + geom_point() + scale_color_manual(values = annotation_colours$patient_id) +
facet_wrap(~patient_id, scales = "free", labeller = as_labeller(labeller_vector)) +
xlab(divcol) + guides(colour = guide_legend(title = "Patient", nrow = 2)) +
ithi.utils::ggmargins(type = "topminus")
return(list(plot = p1, pval = pval))
}
subclonal_rates_unmerged <- neoediting_res$subclonal_rates_unmerged %>% plyr::rename(c(sample_key = "condensed_id"))
tcr_uniqclone_neoediting_subclonal_res <- rates_xcr_analysis(subclonal_rates_unmerged,
xcr_diversity, divcol = "tcr_clonotypes_unique")
bcr_uniqclone_neoediting_subclonal_res <- rates_xcr_analysis(subclonal_rates_unmerged,
xcr_diversity, divcol = "bcr_clonotypes_unique")
tcr_shannon_neoediting_subclonal_res <- rates_xcr_analysis(subclonal_rates_unmerged,
xcr_diversity, divcol = "tcr_shannon_entropy")
bcr_shannon_neoediting_subclonal_res <- rates_xcr_analysis(subclonal_rates_unmerged,
xcr_diversity, divcol = "bcr_shannon_entropy")
tcr_simpson_neoediting_subclonal_res <- rates_xcr_analysis(subclonal_rates_unmerged,
xcr_diversity, divcol = "tcr_inverse_simpson")
bcr_simpson_neoediting_subclonal_res <- rates_xcr_analysis(subclonal_rates_unmerged,
xcr_diversity, divcol = "bcr_inverse_simpson")
(These are for subclonal neoediting.)
clonal_rates_unmerged <- neoediting_res$clonal_rates_unmerged %>% plyr::rename(c(sample_key = "condensed_id"))
tcr_uniqclone_neoediting_clonal_res <- rates_xcr_analysis(clonal_rates_unmerged,
xcr_diversity, divcol = "tcr_clonotypes_unique")
bcr_uniqclone_neoediting_clonal_res <- rates_xcr_analysis(clonal_rates_unmerged,
xcr_diversity, divcol = "bcr_clonotypes_unique")
tcr_shannon_neoediting_clonal_res <- rates_xcr_analysis(clonal_rates_unmerged,
xcr_diversity, divcol = "tcr_shannon_entropy")
bcr_shannon_neoediting_clonal_res <- rates_xcr_analysis(clonal_rates_unmerged,
xcr_diversity, divcol = "bcr_shannon_entropy")
tcr_simpson_neoediting_clonal_res <- rates_xcr_analysis(clonal_rates_unmerged,
xcr_diversity, divcol = "tcr_inverse_simpson")
bcr_simpson_neoediting_clonal_res <- rates_xcr_analysis(clonal_rates_unmerged,
xcr_diversity, divcol = "bcr_inverse_simpson")
So, this is a bit interesting. While we don’t really see any robust results for clonal neoediting and TCR/BCR diversity, we do see that TCR, but not BCR diversity is (negatively) correlated with E_i. The plots aren’t super convincing:
tcr_shannon_neoediting_subclonal_res$plot
But this implies that perhaps less diverse – more expanded? – TCR populations are also a sign of immunoediting.
A natural question to ask is whether or not this is merely because TCR diversity and CD8+ TIL densities may be negatively correlated within patients. We can see whether that’s the case:
ihc_table$patient_id <- ithi.meta::factor_id(ihc_table$patient_id, type = "patient_id",
db_path)
ihc_xcr_div <- ihc_table %>% plyr::join(xcr_diversity)
ihc_xcr_div$E_CD8_rescaled <- ihc_xcr_div$E_CD8_density/max(ihc_xcr_div$E_CD8_density,
na.rm = TRUE)
ihc_xcr_div$tcr_shannon_entropy_rescaled <- ihc_xcr_div$tcr_shannon_entropy/max(ihc_xcr_div$tcr_shannon_entropy,
na.rm = TRUE)
dat <- subset(ihc_xcr_div, !is.na(E_CD8_density) & !is.na(tcr_shannon_entropy))
singleton_patients <- (dat %>% group_by(patient_id) %>% summarise(n = n()) %>%
subset(n == 1))$patient_id
glmer(as.formula(paste0("E_CD8_rescaled ~ ", "tcr_shannon_entropy_rescaled",
"+ (1 | patient_id)")), data = dat %>% subset(!patient_id %in% singleton_patients),
family = Gamma(link = "log")) %>% summary
Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: Gamma ( log )
Formula: E_CD8_rescaled ~ tcr_shannon_entropy_rescaled + (1 | patient_id)
Data: dat %>% subset(!patient_id %in% singleton_patients)
AIC BIC logLik deviance df.resid
-178.5 -169.5 93.3 -186.5 67
Scaled residuals:
Min 1Q Median 3Q Max
-1.2181 -0.6581 -0.1140 0.4331 2.7443
Random effects:
Groups Name Variance Std.Dev.
patient_id (Intercept) 1.1756 1.084
Residual 0.6022 0.776
Number of obs: 71, groups: patient_id, 17
Fixed effects:
Estimate Std. Error t value Pr(>|z|)
(Intercept) -2.5896 0.3663 -7.070 1.55e-12 ***
tcr_shannon_entropy_rescaled 0.4004 0.6402 0.626 0.532
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr)
tcr_shnnn__ -0.384
And there’s no correlation here. So it’s not true that, within patients, samples with higher epithelial CD8+ TIL densities necessarily have lower TCR diversity.
subclonal_rates_ihc_xcr_div <- plyr::join(subclonal_rates_unmerged, ihc_xcr_div)
mod <- glmer(expratio/obsratio ~ E_CD8_rescaled + tcr_shannon_entropy_rescaled +
(1 | patient_id), data = subset(subclonal_rates_ihc_xcr_div, !is.na(E_CD8_density) &
!is.na(tcr_shannon_entropy_rescaled)), family = Gamma(link = "log"))
summod <- summary(mod)
summod
Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: Gamma ( log )
Formula:
expratio/obsratio ~ E_CD8_rescaled + tcr_shannon_entropy_rescaled +
(1 | patient_id)
Data:
subset(subclonal_rates_ihc_xcr_div, !is.na(E_CD8_density) & !is.na(tcr_shannon_entropy_rescaled))
AIC BIC logLik deviance df.resid
-32.4 -23.5 21.2 -42.4 39
Scaled residuals:
Min 1Q Median 3Q Max
-1.88529 -0.53689 -0.09274 0.51294 1.35377
Random effects:
Groups Name Variance Std.Dev.
patient_id (Intercept) 0.02579 0.1606
Residual 0.01519 0.1233
Number of obs: 44, groups: patient_id, 12
Fixed effects:
Estimate Std. Error t value Pr(>|z|)
(Intercept) 0.1487 0.1039 1.431 0.152303
E_CD8_rescaled 0.4192 0.1112 3.769 0.000164 ***
tcr_shannon_entropy_rescaled -0.2673 0.1446 -1.849 0.064454 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) E_CD8_
E_CD8_rscld -0.215
tcr_shnnn__ -0.276 0.160
Note how the sign (of the TCR coefficient) changes.
Adding in ploidy …
subclonal_rates_ihc_xcr_div_ploidy <- plyr::join(subclonal_rates_ihc_xcr_div,
remixt_ploidy, by = c("condensed_id", "patient_id"))
mod <- glmer(expratio/obsratio ~ E_CD8_rescaled + tcr_shannon_entropy_rescaled +
psi + Cellularity + (1 | patient_id), data = subset(subclonal_rates_ihc_xcr_div_ploidy,
!is.na(E_CD8_density) & !is.na(tcr_shannon_entropy_rescaled)), family = Gamma(link = "log"))
summod <- summary(mod)
summod
Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: Gamma ( log )
Formula:
expratio/obsratio ~ E_CD8_rescaled + tcr_shannon_entropy_rescaled +
psi + Cellularity + (1 | patient_id)
Data:
subset(subclonal_rates_ihc_xcr_div_ploidy, !is.na(E_CD8_density) &
!is.na(tcr_shannon_entropy_rescaled))
AIC BIC logLik deviance df.resid
-51.7 -38.8 32.9 -65.7 40
Scaled residuals:
Min 1Q Median 3Q Max
-2.03133 -0.40851 -0.02773 0.52020 1.35785
Random effects:
Groups Name Variance Std.Dev.
patient_id (Intercept) 0.02473 0.1573
Residual 0.01090 0.1044
Number of obs: 47, groups: patient_id, 12
Fixed effects:
Estimate Std. Error t value Pr(>|z|)
(Intercept) -0.03317 0.14038 -0.236 0.813223
E_CD8_rescaled 0.32928 0.09320 3.533 0.000411 ***
tcr_shannon_entropy_rescaled -0.13896 0.12053 -1.153 0.248942
psi -0.01147 0.02750 -0.417 0.676676
Cellularity 0.37558 0.08830 4.253 2.1e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) E_CD8_ tcr___ psi
E_CD8_rscld -0.019
tcr_shnnn__ -0.152 0.104
psi -0.539 -0.034 -0.171
Cellularity -0.380 -0.244 0.221 0.020
So, putting everything together we still have epithelial CD8+ TIL density being significant, TCR diversity being a negatively associated factor at the edge of significance, while controlling for ploidy and cellularity.
In summary, TCR entropy alone isn’t significantly correlated with subclonal neoepitope elimination. When you add epithelial CD8+ TIL density to the explanatory variables, all of a sudden it is – but likely because it’s collinear with E_CD8_rescaled, given that the sign of the coefficient changes. Regularization could help here but no idea how to do it in glmer.
Some extensions might be to use other diversity indices (e.g. D50 or Berger-Parker indices).