library(ithi.utils)
load_base_libs()

library(ithi.meta)
library(ithi.xcr)
library(ithi.clones)

Colour palettes

pal_patient <- select_palette("patient")

Parameters

db_path <- snakemake@params$db

xcr_table_path <- snakemake@input$xcr_table
prevalence_option <- snakemake@params$prevalence_option
distance_method <- snakemake@params$xcr_distance_method

clone_tree_file <- snakemake@input$clone_tree_file
clone_prevalence_file <- snakemake@input$clone_prevalence_file
clone_branch_length_file <- snakemake@input$clone_branch_length_file

Metadata

db <- src_sqlite(db_path, create = FALSE)

duplicates <- collect(tbl(db, "duplicates"))

Analysis

XCR repertoire similarity

xcr_table <- read_clonotypes(xcr_table_path, duplicates = FALSE, db_path, verbose = 1)

Read 19.7% of 304822 rows
Read 59.1% of 304822 rows
Read 91.9% of 304822 rows
Read 304822 rows and 18 (of 18) columns from 0.070 GB file in 00:00:05
tcr_segment_type <- "TRB"
bcr_segment_type <- "IGH"

id_type <- "condensed_id"
patients <- unique(xcr_table$patient_id)

dists <- lapply(patients, function(patient) {
    tcr_clonotypes <- subset(xcr_table, type == tcr_segment_type & patient_id == 
        patient)
    bcr_clonotypes <- subset(xcr_table, type == bcr_segment_type & patient_id == 
        patient)
    tcr_cross_table <- cross_tabulate(tcr_clonotypes, id_type = id_type)
    bcr_cross_table <- cross_tabulate(bcr_clonotypes, id_type = id_type)
    
    cross_tables <- list(tcr = tcr_cross_table, bcr = bcr_cross_table)
    
    distance_matrices <- lapply(cross_tables, function(cross_table) {
        distmat <- compute_immune_distance_matrix(cross_table, method = distance_method)
        mat <- as.matrix(distmat)
        return(mat)
    })
    return(distance_matrices)
})
tcr_dists <- lapply(dists, function(x) x$tcr)
bcr_dists <- lapply(dists, function(x) x$bcr)

xcr_dists <- tibble(patient_id = patients, tcr = tcr_dists, bcr = bcr_dists)
xcr_dists$patient_id <- factor(xcr_dists$patient_id)
xcr_dists <- xcr_dists[order(xcr_dists$patient_id), ]

The distance metric being used is horn.

Tumour clones

tree_branch_data <- read_clone_tree_data(clone_tree_file, clone_branch_length_file, 
    clone_prevalence_file, db_path)
trees <- lapply(tree_branch_data, function(x) x$tree)
prevalences <- rbind.fill(lapply(tree_branch_data, function(x) x$prevalence_dat))
branch_lengths <- rbind.fill(lapply(tree_branch_data, function(x) x$branch_dat))

ccfs <- compute_ccf(prevalences, trees, id_type = id_type)
ccfs_labeled <- merge(ccfs, branch_lengths, by = c("label", "node", "patient_id"))

## Need non-normalized distances
clone_dists <- clone_distances(ccfs_labeled, normalize = FALSE, id_type = "condensed_id")

Combination

clone_dists$patient_id <- factor(clone_dists$patient_id)

total_dists <- inner_join(clone_dists, xcr_dists)
compute_overlap_similarities <- function(vals) {
    common_samples <- lapply(vals, function(x) {
        colnames(x)
    })
    common_samples <- Reduce(intersect, common_samples)
    
    sim_dfs <- lapply(names(vals), function(category) {
        mat <- vals[[category]]
        mat <- mat[common_samples, common_samples]
        
        res <- setNames(melt(as.matrix(mat)), c("sample1", "sample2", category))
        return(res)
    })
    similarities <- Reduce(merge, sim_dfs)
    result <- subset(similarities, as.numeric(sample1) < as.numeric(sample2))
    return(result)
}
pairwise_similarities <- rbind.fill(lapply(1:nrow(total_dists), function(i) {
    clone_distmat <- total_dists[i, ]$dist_clones_weighted[[1]]
    tcr_distmat <- total_dists[i, ]$tcr[[1]]
    bcr_distmat <- total_dists[i, ]$bcr[[1]]
    
    inputs <- list(clone_distmat, tcr_distmat, bcr_distmat)
    names(inputs) <- c("clones", "tcr", "bcr")
    
    patient_id <- total_dists[i, ]$patient_id
    
    pairwise_sims <- cbind(patient_id = patient_id, compute_overlap_similarities(inputs))
    return(pairwise_sims)
}))
pairwise_similarities$patient_id <- factor(pairwise_similarities$patient_id)

patient_counts <- pairwise_similarities %>% group_by(patient_id) %>% summarise(n = n())
singletons <- subset(patient_counts, n == 1)$patient_id
pairwise_similarities <- subset(pairwise_similarities, !patient_id %in% singletons)

TCR-clones

stats <- setNames(ddply(pairwise_similarities, .(patient_id), function(x) {
    res <- with(x, cor.test(clones, tcr, method = "pearson"))
    pvalue <- res$p.value
    eq <- substitute(italic(P) == p, list(p = format(pvalue, digits = 3)))
    return(as.character(as.expression(eq)))
}), c("patient_id", "p.value"))

summary(lmer(tcr ~ clones + (1 | patient_id), data = pairwise_similarities))
Linear mixed model fit by REML t-tests use Satterthwaite approximations
  to degrees of freedom [lmerMod]
Formula: tcr ~ clones + (1 | patient_id)
   Data: pairwise_similarities

REML criterion at convergence: -66.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.2035 -0.3406  0.1428  0.4982  1.7980 

Random effects:
 Groups     Name        Variance Std.Dev.
 patient_id (Intercept) 0.03909  0.1977  
 Residual               0.01950  0.1396  
Number of obs: 116, groups:  patient_id, 13

Fixed effects:
             Estimate Std. Error        df t value Pr(>|t|)    
(Intercept) 7.045e-01  5.957e-02 1.417e+01   11.83 9.86e-09 ***
clones      2.708e-05  7.941e-06 1.136e+02    3.41 0.000901 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
       (Intr)
clones -0.313
fit warnings:
Some predictor variables are on very different scales: consider rescaling
ggplot(pairwise_similarities, aes(x = clones, y = tcr)) + geom_point(aes(colour = patient_id)) + 
    theme_bw() + theme_Publication() + scale_color_manual(values = pal_patient) + 
    xlab("Clone distance") + ylab("TCR distance") + facet_wrap(~patient_id, 
    scales = "free") + geom_text(data = stats, aes(x = Inf, y = Inf, label = p.value), 
    hjust = 1.1, vjust = 1.5, size = 3, parse = TRUE)

BCR-clones

stats <- setNames(ddply(pairwise_similarities, .(patient_id), function(x) {
    res <- with(x, cor.test(clones, bcr, method = "pearson"))
    pvalue <- res$p.value
    eq <- substitute(italic(P) == p, list(p = format(pvalue, digits = 3)))
    return(as.character(as.expression(eq)))
}), c("patient_id", "p.value"))

summary(lmer(bcr ~ clones + (1 | patient_id), data = pairwise_similarities))
Linear mixed model fit by REML t-tests use Satterthwaite approximations
  to degrees of freedom [lmerMod]
Formula: bcr ~ clones + (1 | patient_id)
   Data: pairwise_similarities

REML criterion at convergence: -88.1

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-5.2307 -0.3481  0.1803  0.4483  1.6530 

Random effects:
 Groups     Name        Variance Std.Dev.
 patient_id (Intercept) 0.01768  0.1330  
 Residual               0.01720  0.1312  
Number of obs: 116, groups:  patient_id, 13

Fixed effects:
             Estimate Std. Error        df t value Pr(>|t|)    
(Intercept) 7.949e-01  4.254e-02 1.532e+01  18.688 5.86e-12 ***
clones      2.192e-05  7.205e-06 1.121e+02   3.043  0.00292 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
       (Intr)
clones -0.394
fit warnings:
Some predictor variables are on very different scales: consider rescaling
ggplot(pairwise_similarities, aes(x = clones, y = bcr)) + geom_point(aes(colour = patient_id)) + 
    theme_bw() + theme_Publication() + scale_color_manual(values = pal_patient) + 
    xlab("Clone distance") + ylab("BCR distance") + facet_wrap(~patient_id, 
    scales = "free") + geom_text(data = stats, aes(x = Inf, y = Inf, label = p.value), 
    hjust = 1.1, vjust = 1.5, size = 3, parse = TRUE)

TCR-clone correlations are good for most patients, while BCR-clone correlations are poor for most patients. Patient 1 has good correlations for both – not exactly surprising as patient 1 is very immunoreactive (highest out of the ITH2 cohort). Patient 4 is a clear opposite to this trend. Is it possible that patient 4 harbors a B-cell driven response, rather than a T-cell driven one?

Do we have corroborating evidence for this?

Supports:

  • Patient 4 has the highest intrapatient BCR similarity across the entire cohort (immune responses highly similar from site-to-site could imply that there’s a widespread antigen, and tends to be associated with higher CD20+ TIL)
  • Patient 4 has relatively high CD20+ TIL density (see the Immune Variability page) – tied for 3rd/21

Does not support:

  • Patient 3 also has high CD20+ TIL density (tied for 3rd/21), but does not exhibit this pattern
---
title: "XCRs and tumour clones"
output: 
  html_notebook:
    toc: true
    toc_depth: 5
    toc_float: true
params:
  rmd: "xcr_clones_analysis.Rmd"
---
                        ```{r, echo=FALSE, message=FALSE, warning=FALSE}

######## Snakemake header ########
library(methods)
Snakemake <- setClass(
    "Snakemake",
    slots = c(
        input = "list",
        output = "list",
        params = "list",
        wildcards = "list",
        threads = "numeric",
        log = "list",
        resources = "list",
        config = "list",
        rule = "character"
    )
)
snakemake <- Snakemake(
    input = list('/shahlab/alzhang/projects/ITH_Immune/paper/analysis/Rmd/_site.yml', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/clone_data.tsv', 'Rmd/xcr_clones_analysis.Rmd', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/branch_data.tsv', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/xcr_table.tsv', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/tree_data.tsv', "site_file" = '/shahlab/alzhang/projects/ITH_Immune/paper/analysis/Rmd/_site.yml', "clone_prevalence_file" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/clone_data.tsv', "notebook" = 'Rmd/xcr_clones_analysis.Rmd', "clone_branch_length_file" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/branch_data.tsv', "xcr_table" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/xcr_table.tsv', "clone_tree_file" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/tree_data.tsv'),
    output = list('/shahlab/alzhang/projects/ITH_Immune/paper/results/web/xcr_clones_analysis.nb.html'),
    params = list('/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3', 'clones', 'horn', 'ithi-analysis-xcr-tracking', "db" = '/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3', "prevalence_option" = 'clones', "xcr_distance_method" = 'horn', "name" = 'ithi-analysis-xcr-tracking'),
    wildcards = list(),
    threads = 1,
    log = list('/shahlab/alzhang/clusttmp/paperanalysis2/xcr_tracking.log'),
    resources = list(),
    config = list("icgc_specimen_file" = '/shahlab/alzhang/data/ICGC/specimen.tsv', "variability_type" = 'stabilize', "immtyper_lengths" = '11 12 13 14 15 16 17 18', "driver_map" = '/shahlab/alzhang/projects/ITH_Immune/subprojects/drivers/data/gene_list_mapped.bed', "multiviewclustering_notebook" = 'Rmd/multiviewclustering.Rmd', "xcr_distance_method" = 'horn', "mmctm_sample_ad_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-ancestry-sample/plots/ith-by-ancestral-sample_snv-sv_sigs_multipanel.pdf', "mmctm_ancestral_descendant_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-ancestry-sample/output', "tils_for_cluster" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "bcrphylo_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density', 'T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density'), "smooth_type" = 'loess', "xcrmapscape_files" = c('/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/1.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/2.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/3.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/4.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/7.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/9.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/10.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/11.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/12.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/13.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/14.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/15.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/16.svg', '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/xcrmapscape/17.svg'), "ith_project_results" = '/shahlab/alzhang/projects/ith3/data/results', "ihc_run2" = '/shahlab/alzhang/projects/ITH_Immune/data/ihc/cd79cd138cd68/validated_stats_weighted.rdata', "nanostring_annotations" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/pancancer_annotations.tsv', "icgc_clinical" = '/shahlab/alzhang/data/ICGC/donor.OV-AU.tsv', "clonal_figure_template" = '/shahlab/alzhang/projects/ITH_Immune/paper/analysis/templates/clonal_figure.svg', "nanostring_signature_notebook" = 'Rmd/nanostring_signatures.Rmd', "PNG_DENSITY" = 300, "tcga_clinical" = '/shahlab/alzhang/data/TCGA/synapse_clinAll_data.tsv', "clone_branch_length_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/branch_data.tsv', "tcr_diversity" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/TRB/postfilter_diversity_stats/diversity.strict.resampled.txt', "mmctm_final_patient_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient_with-ov/plots/ith-by-patient_snv-sv_sigs_multipanel.pdf', "mutation_signature_notebook" = 'Rmd/mutation_signatures.Rmd', "nclusts" = 3, "mmctm_patient_ancestral_descendant_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient-ancestry/output', "master_breakpoint_file" = '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_breakpoints.tsv', "ith_til_notebook" = 'Rmd/ith_til_densities.Rmd', "mmctm_sample_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-sample/output', "mmctm_final_patient_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient_with-ov', "v_dictionary" = '/shahlab/alzhang/projects/ITH_Immune/subprojects/immtyper/metadata/imgt/Homo_sapiens_TRBV.fasta', "clonal_samplers" = c('HMC', 'NUTS'), "phenotype_threshold" = 0.85, "tcga_expr_matrix" = '/shahlab/alzhang/data/TCGA/expr_matrix_normalize_standardize_noduplicates.tsv', "mmctm_ov_combined_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/combined_ov_mmctm/plots/ov_snv-sv_sigs_multipanel.pdf', "logdir" = '/shahlab/alzhang/clusttmp/paperanalysis2', "xcr_clones_notebook" = 'Rmd/xcr_clones_analysis.Rmd', "clone_prevalence_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/clone_data.tsv', "mvclust_nclust" = 3, "db" = '/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3', "spatial_result_dirs" = list("epithelial" = '/shahlab/alzhang/pipeline_outputs/ith_immune/spatsim/ith3/abc', "stromal" = '/shahlab/alzhang/pipeline_outputs/ith_immune/spatsim/ith5/abc'), "tils_for_variability" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "nanostring_data" = '/shahlab/alzhang/projects/ITH_Immune/results/nanostring_results/ith_full/qc/limma_quantile/normalized_expression_voa_labels_filtered.tsv', "bcrphylo_correlations_notebook" = 'Rmd/bcr_phylo_correlations.Rmd', "driver_analysis_notebook" = 'Rmd/driver_analysis.Rmd', "ihc_xcr_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density', 'T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density'), "figure_gallery_notebook" = 'Rmd/figures.Rmd', "subtype_marker_file" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/subtype_markers.tsv', "wang_fbi_status" = '/shahlab/alzhang/data/ICGC/ng.3849-S12.txt', "xcrmapscape_notebook" = 'Rmd/xcrmapscape.Rmd', "mmctm_ov_combined_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/combined_ov_mmctm/output', "immune_variability_notebook" = 'Rmd/immune_variability.Rmd', "default_sampler" = 'HMC', "notebook_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/web', "mutsig_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density', 'T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density'), "site_file" = '/shahlab/alzhang/projects/ITH_Immune/paper/analysis/Rmd/_site.yml', "known_subtype_file" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/array/subtypes/known_subtypes.tsv', "clone_tree_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/tree_data.tsv', "molsubtype_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "bcrphylo_examples_notebook" = 'Rmd/bcr_phylo_examples.Rmd', "til_classifier_notebook" = 'Rmd/til_classifier.Rmd', "neoediting_outdir" = '/shahlab/alzhang/pipeline_outputs/ith_immune/neoediting/run4', "mmctm_patient_ad_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient-ancestry/plots/ith-by-patient-ancestry_snv-sv_sigs_multipanel.pdf', "xcr_qc_notebook" = 'Rmd/replicates.Rmd', "bcr_clonotypes" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/clonotypes/IGH_clonotypes_filtered.txt', "known_subtypes_merged" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/known_subtypes_merged.tsv', "molsubtype_notebook" = 'Rmd/molecular_subtypes.Rmd', "ith_statistics_notebook" = 'Rmd/ith_statistics.Rmd', "prevalence_threshold" = 0.01, "library_sizes" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/library_sizes.tsv', "igpartition_outdir" = '/shahlab/alzhang/pipeline_outputs/ith_immune/igpartition/run22', "patients_for_clonal" = c(1, 2, 3, 4, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17), "j_dictionary" = '/shahlab/alzhang/projects/ITH_Immune/subprojects/immtyper/metadata/imgt/Homo_sapiens_TRBJ.fasta', "bcr_diversity" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/IGH/postfilter_diversity_stats/diversity.strict.resampled.txt', "nscatters" = 6, "immtyper_models" = '/shahlab/alzhang/projects/ITH_Immune/results/immtyper_results/klarenbeek/aa_vj/gradboost', "ihc_run1" = '/shahlab/alzhang/projects/ITH_Immune/data/ihc/cd8cd3cd20/validated_stats_weighted_new.rdata', "PNG_QUALITY" = 300, "icgc_molecular_subtypes" = '/shahlab/alzhang/data/ICGC/icgc_primary_tumour_subtypes.tsv', "classifier_type" = 'knn', "sad_notebook" = 'Rmd/species_abundance_distributions.Rmd', "ihc_xcr_stats_notebook" = 'Rmd/ihc_xcr_stats.Rmd', "icgc_expr_melted" = '/shahlab/alzhang/data/ICGC/OVAU_expr_melted.tsv', "xcrmapscape_tcr_patient_order" = c(15, 1, 3, 4, 2, 17, 7, 14, 9, 10, 12, 13, 11, 16), "mvclust_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "intermediate_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2', "ith_stat_types" = c('entropy', 'postprocessed_divergence', 'combined_ith_normalized', 'proportion_subclonal'), "mapscape_notebook" = 'Rmd/mapscape.Rmd', "master_variant_file" = '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_snvs.tsv', "ith_stats_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/clonal_measures.tsv', "example_msa_plot" = '/shahlab/alzhang/pipeline_outputs/ith_immune/igpartition/run13/old/alignment_plots/msa/ith2_2/clust9/indel_reversed.html', "benchmarkdir" = '/shahlab/alzhang/benchmarks/paperanalysis2', "example_annotations" = '/shahlab/alzhang/pipeline_outputs/ith_immune/igpartition/run13/final_partitions/ith2_2/clust9/annotations_flagged.tsv', "icgc_normalized_reads_matrix" = '/shahlab/alzhang/data/ICGC/OVAU_expr_matrix.tsv', "index_notebook" = 'Rmd/index.Rmd', "tcga_ov_annotations" = '/shahlab/alzhang/data/TCGA/tcga_ov_annotation_sup13.txt', "mmctm_sample_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-sample/plots/ith-by-sample_snv-sv_sigs_multipanel.pdf', "neoantigen_editing_notebook" = 'Rmd/immunoediting.Rmd', "table_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2', "proportion_subclonal_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/old_proportion_subclonal.tsv', "spatial_notebook" = 'Rmd/spatial_analysis.Rmd', "xcr_mapping_notebook" = 'Rmd/xcr_mapping.Rmd', "tcr_clonotypes" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/clonotypes/TRB_clonotypes_filtered.txt'),
    rule = 'xcr_tracking'
)
######## Original script #########

                        ```


```{r global_chunk_options, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, tidy=TRUE, warning=FALSE, message=FALSE)
```


```{r}
library(ithi.utils)
load_base_libs()

library(ithi.meta)
library(ithi.xcr)
library(ithi.clones)
```

## Colour palettes

```{r}
pal_patient <- select_palette("patient")
```

## Parameters

```{r}
db_path <- snakemake@params$db

xcr_table_path <- snakemake@input$xcr_table
prevalence_option <- snakemake@params$prevalence_option
distance_method <- snakemake@params$xcr_distance_method

clone_tree_file <- snakemake@input$clone_tree_file
clone_prevalence_file <- snakemake@input$clone_prevalence_file
clone_branch_length_file <- snakemake@input$clone_branch_length_file
```

## Metadata

```{r}
db <- src_sqlite(db_path, create=FALSE)

duplicates <- collect(tbl(db, "duplicates"))
```

## Analysis

### XCR repertoire similarity

```{r}
xcr_table <- read_clonotypes(xcr_table_path, duplicates = FALSE, db_path, verbose=1)
```

```{r}
tcr_segment_type <- "TRB"
bcr_segment_type <- "IGH"

id_type <- "condensed_id"
```

```{r}
patients <- unique(xcr_table$patient_id)

dists <- lapply(patients, function(patient) {
  tcr_clonotypes <- subset(xcr_table, type == tcr_segment_type & patient_id == patient)
  bcr_clonotypes <- subset(xcr_table, type == bcr_segment_type & patient_id == patient)
  tcr_cross_table <- cross_tabulate(tcr_clonotypes, id_type = id_type)
  bcr_cross_table <- cross_tabulate(bcr_clonotypes, id_type = id_type)
  
  cross_tables <- list(tcr=tcr_cross_table, bcr=bcr_cross_table)
  
  distance_matrices <- lapply(cross_tables, function(cross_table) {
    distmat <- compute_immune_distance_matrix(cross_table, method = distance_method)
    mat <- as.matrix(distmat)
    return(mat)
  })
  return(distance_matrices)
})
tcr_dists <- lapply(dists, function(x) x$tcr)
bcr_dists <- lapply(dists, function(x) x$bcr)

xcr_dists <- tibble(patient_id = patients, tcr=tcr_dists, bcr=bcr_dists)
xcr_dists$patient_id <- factor(xcr_dists$patient_id)
xcr_dists <- xcr_dists[order(xcr_dists$patient_id),]
```

The distance metric being used is `r distance_method`. 


### Tumour clones

```{r}
tree_branch_data <- read_clone_tree_data(clone_tree_file, clone_branch_length_file, clone_prevalence_file, db_path)
trees <- lapply(tree_branch_data, function(x) x$tree)
prevalences <- rbind.fill(lapply(tree_branch_data, function(x) x$prevalence_dat))
branch_lengths <- rbind.fill(lapply(tree_branch_data, function(x) x$branch_dat))

ccfs <- compute_ccf(prevalences, trees, id_type = id_type)
ccfs_labeled <- merge(ccfs, branch_lengths, by=c("label", "node", "patient_id"))

## Need non-normalized distances
clone_dists <- clone_distances(ccfs_labeled, normalize = FALSE, id_type = "condensed_id")
```

### Combination


```{r}
clone_dists$patient_id <- factor(clone_dists$patient_id)

total_dists <- inner_join(clone_dists, xcr_dists)
```

```{r}
compute_overlap_similarities <- function(vals) {
  common_samples <- lapply(vals, function(x) {
    colnames(x)
  })
  common_samples <- Reduce(intersect, common_samples)
  
  sim_dfs <- lapply(names(vals), function(category) {
    mat <- vals[[category]]
    mat <- mat[common_samples,common_samples]
    
    res <- setNames(melt(as.matrix(mat)), c("sample1", "sample2", category))
    return(res)
  })
  similarities <- Reduce(merge, sim_dfs)
  result <- subset(similarities, as.numeric(sample1) < as.numeric(sample2))
  return(result)
}
```

```{r}
pairwise_similarities <- rbind.fill(lapply(1:nrow(total_dists), function(i) {
  clone_distmat <- total_dists[i,]$dist_clones_weighted[[1]]
  tcr_distmat <- total_dists[i,]$tcr[[1]]
  bcr_distmat <- total_dists[i,]$bcr[[1]]
  
  inputs <- list(clone_distmat, tcr_distmat, bcr_distmat)
  names(inputs) <- c("clones", "tcr", "bcr")
  
  patient_id <- total_dists[i,]$patient_id
  
  pairwise_sims <- cbind(patient_id=patient_id, compute_overlap_similarities(inputs))
  return(pairwise_sims)
}))
pairwise_similarities$patient_id <- factor(pairwise_similarities$patient_id)

patient_counts <- pairwise_similarities %>% group_by(patient_id) %>% summarise(n=n())
singletons <- subset(patient_counts, n==1)$patient_id
pairwise_similarities <- subset(pairwise_similarities, !patient_id %in% singletons)
```


### TCR-clones

```{r}
stats <- setNames(ddply(pairwise_similarities, .(patient_id), function(x) {
  res <- with(x, cor.test(clones, tcr, method="pearson"))
  pvalue <- res$p.value
  eq <- substitute(italic(P)==p, list(p=format(pvalue, digits=3)))
  return(as.character(as.expression(eq)))
}), c("patient_id", "p.value"))

summary(lmer(tcr ~ clones + (1|patient_id), data = pairwise_similarities))
```


```{r}
ggplot(pairwise_similarities, aes(x=clones, y=tcr)) + geom_point(aes(colour=patient_id)) + theme_bw() + theme_Publication() + scale_color_manual(values = pal_patient) + xlab("Clone distance") + ylab("TCR distance") + facet_wrap(~ patient_id, scales="free") + geom_text(data=stats, aes(x=Inf, y=Inf, label=p.value), hjust=1.1, vjust=1.5, size=3, parse=TRUE)
```

### BCR-clones

```{r}
stats <- setNames(ddply(pairwise_similarities, .(patient_id), function(x) {
  res <- with(x, cor.test(clones, bcr, method="pearson"))
  pvalue <- res$p.value
  eq <- substitute(italic(P)==p, list(p=format(pvalue, digits=3)))
  return(as.character(as.expression(eq)))
}), c("patient_id", "p.value"))

summary(lmer(bcr ~ clones + (1|patient_id), data = pairwise_similarities))
```


```{r}
ggplot(pairwise_similarities, aes(x=clones, y=bcr)) + geom_point(aes(colour=patient_id)) + theme_bw() + theme_Publication() + scale_color_manual(values = pal_patient) + xlab("Clone distance") + ylab("BCR distance") + facet_wrap(~ patient_id, scales="free") + geom_text(data=stats, aes(x=Inf, y=Inf, label=p.value), hjust=1.1, vjust=1.5, size=3, parse=TRUE)
```


TCR-clone correlations are good for most patients, while BCR-clone correlations are poor for most patients. Patient 1 has good correlations for both -- not exactly surprising as patient 1 is very immunoreactive (highest out of the ITH2 cohort). Patient 4 is a clear opposite to this trend. Is it possible that patient 4 harbors a B-cell driven response, rather than a T-cell driven one?

Do we have corroborating evidence for this? 

Supports:

* Patient 4 has the highest intrapatient BCR similarity across the entire cohort (immune responses highly similar from site-to-site could imply that there's a widespread antigen, and tends to be associated with higher CD20+ TIL)
* Patient 4 has relatively high CD20+ TIL density (see the Immune Variability page) -- tied for 3rd/21

Does not support:

* Patient 3 also has high CD20+ TIL density (tied for 3rd/21), but does not exhibit this pattern