Setup

library(ithi.utils)
load_base_libs()

library(methods)
library(bedr)
library(biomaRt)
library(BSgenome.Hsapiens.UCSC.hg19)

library(ithi.meta)
library(ithi.figures)
library(ithi.utils)
library(ithi.expr)
library(ithi.seq)
library(ithi.bed)
library(ithi.supp)
nanostring_data_path <- snakemake@input$nanostring_data
nanostring_annotations_path <- snakemake@input$nanostring_annotations
ihc_table_path <- snakemake@input$ihc_table
master_variant_file <- snakemake@input$snv_table
master_breakpoint_file <- snakemake@input$breakpoint_table
somatic_coding_result_dir <- snakemake@input$somatic_coding_result_dir
molecular_subtype_file <- snakemake@input$molsubtypes

refseq_gene_file <- snakemake@params$refseq_gene_file
tils_for_cluster <- snakemake@params$tils_for_cluster
db_path <- snakemake@params$db
annotation_colours <- ithi.figures::get_annotation_colours()

mart <- useDataset("hsapiens_gene_ensembl", useMart(biomart = "ENSEMBL_MART_ENSEMBL", 
    host = "feb2014.archive.ensembl.org"))

ihc_table <- fread(ihc_table_path)
master_variant_table <- read_variant_file(master_variant_file, db_path)

Read 5.9% of 682516 rows
Read 682516 rows and 9 (of 9) columns from 0.026 GB file in 00:00:04
master_breakpoint_table <- read_variant_file(master_breakpoint_file, db_path)

titan_file <- file.path(somatic_coding_result_dir, "somatic_cnv_titan.tsv")
somatic_snv_file <- file.path(somatic_coding_result_dir, "somatic_snvs.tsv")
somatic_indel_file <- file.path(somatic_coding_result_dir, "somatic_indels.tsv")
titan_cnv <- fread(titan_file)
somatic_snvs <- fread(somatic_snv_file)
somatic_indels <- fread(somatic_indel_file)

exprs <- fread(nanostring_data_path)
nanostring_labels <- fread(nanostring_annotations_path)
molsubtypes <- fread(molecular_subtype_file)

til_clusters <- ithi.figures:::get_til_clusters(ihc_table, molsubtypes, tils_for_cluster = tils_for_cluster, 
    nclusts = 3)

Pathways immunoediting analysis

The purpose of this analysis is to show whether or not immune checkpoint or other immunosuppressive factors may be present and associated with TIL densities/our N/S/ES-TIL clusters. If so, this reviewer thinks they may interfere with any immunoediting that we claim may be taking place.

In actuality, I think this reviewer’s argument is flawed – we’re measuring the current state of the tumour, not the past. Even if checkpoint molecules are present – and interfering with immunoediting now – this may not have been the case in the tumour’s history.

Expression-level

# antigen-processing machinery APM_genes <- c('CANX', 'HSPA5', 'B2M',
# 'HLA-A', 'HLA-B', 'HLA-C', 'TAP1', 'TAP2', 'TAPBP', 'CALR', 'PDIA3')
APM_genes <- c("HLA-A", "HLA-B", "HLA-C", "TAP1", "TAP2", "TAPBP", "PSMB9")  #, 'PSMB7')
checkpoint_genes <- subset(nanostring_labels, str_detect(gene_class, "Checkpoint"))$Gene
immune_suppression <- c("IDO1")  #, 'IDO2', 'ALOX') # p53 excluded because p53 loss is ubiquituous in HGSC; others are absent
ifng_genes <- c("IFNG")

mutation_genes <- c(APM_genes, ifng_genes)
plot_cluster_expression <- function(expr, genes, ihc_table) {
    expr_subset <- subset(expr, Name %in% genes) %>% as.data.frame %>% column_to_rownames(var = "Name")
    
    sample_filter <- intersect(ihc_table$voa, colnames(expr_subset))
    expr_subset <- expr_subset[, sample_filter]
    
    expr_df <- melt(as.matrix(expr_subset)) %>% plyr::rename(c(Var1 = "Name", 
        Var2 = "voa"))
    expr_ihc <- plyr::join(expr_df, ihc_table)
    expr_ihc$patient_id <- factor(as.character(expr_ihc$patient_id))
    
    
    pvals <- setNames(ddply(expr_ihc, .(Name), function(x) {
        df <- as.data.frame(x)
        corres <- with(df, cor.test(value, E_CD8_density, method = "spearman"))
        
        pval <- corres$p.value
        return(pval)
    }), c("Name", "p.value"))
    pvals$p.adj <- p.adjust(pvals$p.value, method = "fdr")
    
    pvals$p.adj.text <- sapply(pvals$p.adj, function(x) as.character(as.expression(substitute(italic(P) == 
        p, list(p = format(x, digits = 3))))))
    
    p <- ggplot(expr_ihc, aes(x = E_CD8_density, y = value)) + geom_point(aes(colour = patient_id)) + 
        facet_wrap(~Name, scales = "free") + scale_x_continuous(trans = "log10") + 
        theme_bw() + ithi.utils::theme_Publication() + ithi.utils::theme_nature() + 
        scale_colour_manual(values = annotation_colours$patient_id) + geom_text(data = pvals, 
        aes(x = Inf, y = Inf, label = p.adj.text), hjust = 1.1, vjust = 1.5, 
        size = 2.5, parse = TRUE)
    
    return(p)
}
plot_cluster_expression(exprs, APM_genes, ihc_table)

plot_cluster_expression(exprs, checkpoint_genes, ihc_table)

plot_cluster_expression(exprs, ifng_genes, ihc_table)

plot_cluster_expression(exprs, immune_suppression, ihc_table)

So the expression of these genes is positively correlated with epithelial CD8+ TIL density.

What does this mean?

  • APM genes: Antigen processing machinery is not underexpressed in samples with high epithelial CD8+ TIL, and therefore does not likely prevent immunoediting.
  • IFNG and checkpoint genes: Interferon-gamma is known to stimulate PD1 (Benci et al. 2016). Higher levels of checkpoint inhibitors, e.g. PD-1, would theoretically interfere with immunoediting. However, contrary to many other settings, in HGSC PD1+CD8+ TIL (which typically coexpress CD103+ according to Webb et al. 2014 and Komdeur et al. 2016) are associated with superior outcomes and retain functional competence, i.e. produce robust amounts of TNF-alpha and IFN-gamma (Webb et al. 2014, Webb et al. 2015).
  • Immune suppression genes: From Rooney – we found that CYT was best correlated with immunosuppressive factors (Spranger et al., 2013), such as PDCD1LG2 (PDL2), IDO1/2, DOK3 (Lemay et al., 2000), GMCSF receptor (CSF2RA, CSF2RB) and the C1Q complex (Figure 1B).

Mutation-level

Here, our goal is just to look for the presence of mutations in those immune-associated genes. For example, if there are mutations in antigen presenting machinery genes, these may interfere with antigen recognition and therefore immunoediting.

titan_bed <- convert_to_bed(titan_cnv)
refseq_bed <- read_bed(refseq_gene_file)

gene_names <- getBM(attributes = c("refseq_mrna", "hgnc_symbol"), values = refseq_bed$refseq_id, 
    mart = mart)

refseq_bed_annotated <- merge(refseq_bed, gene_names %>% subset(refseq_mrna != 
    "") %>% plyr::rename(c(refseq_mrna = "refseq_id")), by = c("refseq_id"))

refseq_bed_annotated <- subset(refseq_bed_annotated, select = c("chr", "start", 
    "end", "hgnc_symbol", "refseq_id"))
refseq_bed_annotated <- bedr.sort.region(refseq_bed_annotated)

titan_merged <- bedr(engine = "bedtools", input = list(a = titan_bed, b = refseq_bed_annotated), 
    method = "intersect", params = "-loj")
titan_merged <- data.frame(titan_merged)
titan_merged$median_logR <- as.numeric(titan_merged$median_logR)
titan_merged$total <- as.numeric(titan_merged$total)
titan_merged$major <- as.numeric(titan_merged$major)
titan_merged$minor <- as.numeric(titan_merged$minor)

titan_merged_filtered <- subset(titan_merged, (median_logR < -1) | (median_logR > 
    1))
somatic_snvs_filtered <- plyr::join(somatic_snvs, master_variant_table %>% subset(is_present == 
    1), type = "inner")
somatic_indels_filtered <- subset(somatic_indels, mappability == 1)
titan_merged_filtered_immune <- subset(titan_merged_filtered, hgnc_symbol %in% 
    mutation_genes)
somatic_snvs_immune <- subset(somatic_snvs_filtered, gene_name %in% mutation_genes)
somatic_indels_immune <- subset(somatic_indels_filtered, gene_name %in% mutation_genes)

print(somatic_snvs_immune)
Empty data.table (0 rows) of 23 cols: chrom,coord,ref,alt,ref_counts,alt_counts...
NULL
print(somatic_indels_immune)
   chrom    coord ref       alt strelka_score gene_name      effect
1:     6 32797776   C CCTTATCAT            34      TAP2 FRAME_SHIFT
   effect_impact mappability patient_id condensed_id
1:          HIGH           1          3        3_Om1
## Because we have LOHHLA and that's more accurate in polymorphic regions
titan_merged_filtered_immune_nohla <- subset(titan_merged_filtered_immune, !hgnc_symbol %in% 
    c("HLA-A", "HLA-B", "HLA-C"))

print(titan_merged_filtered_immune_nohla)
          chr    start      end patient_id clone prevalence major_1
611155  chr12 68187399 73184855          9     1       0.93       4
2020099  chr6 32692061 32939785         11     1          1       3
2020103  chr6 32692061 32939785         11     1          1       3
2020104  chr6 32692061 32939785         11     1          1       3
2020105  chr6 32692061 32939785         11     1          1       3
2021547  chr6 32721019 35100159         17     1          1       2
2021586  chr6 32721019 35100159         17     1          1       2
2021587  chr6 32721019 35100159         17     1          1       2
2021588  chr6 32721019 35100159         17     1          1       2
2021604  chr6 32721019 35100159         17     1          1       2
2021605  chr6 32721019 35100159         17     1          1       2
2021606  chr6 32721019 35100159         17     1          1       2
        total_1 median_logR minor_1 major minor total tumour_content
611155        5        1.18       1     4     1     5         2.5334
2020099       4        1.03       1     3     1     4         0.9996
2020103       4        1.03       1     3     1     4         0.9996
2020104       4        1.03       1     3     1     4         0.9996
2020105       4        1.03       1     3     1     4         0.9996
2021547       4        1.03       2     2     2     4         0.9992
2021586       4        1.03       2     2     2     4         0.9992
2021587       4        1.03       2     2     2     4         0.9992
2021588       4        1.03       2     2     2     4         0.9992
2021604       4        1.03       2     2     2     4         0.9992
2021605       4        1.03       2     2     2     4         0.9992
2021606       4        1.03       2     2     2     4         0.9992
        condensed_id chr.b  start.b    end.b hgnc_symbol refseq_id
611155        9_LOv1 chr12 68548549 68553521        IFNG NM_000619
2020099      11_Rct2  chr6 32821937 32827628       PSMB9 NM_002800
2020103      11_Rct2  chr6 32789609 32806547        TAP2 NM_018833
2020104      11_Rct2  chr6 32793186 32806547        TAP2 NM_000544
2020105      11_Rct2  chr6 32812985 32821748        TAP1 NM_000593
2021547       17_Om3  chr6 32821937 32827628       PSMB9 NM_002800
2021586       17_Om3  chr6 32789609 32806547        TAP2 NM_018833
2021587       17_Om3  chr6 32793186 32806547        TAP2 NM_000544
2021588       17_Om3  chr6 32812985 32821748        TAP1 NM_000593
2021604       17_Om3  chr6 33267471 33282164       TAPBP NM_003190
2021605       17_Om3  chr6 33267471 33282164       TAPBP NM_172209
2021606       17_Om3  chr6 33271409 33282164       TAPBP NM_172208

The reason there are no SNVs left is because of the mappability threshold.

There’s one indel – in TAP, for sample 3_Om1. This sample is TIL subtype S-TIL.

There are CN high-level amplifications in IFNG/PSMB9/TAP1/TAP2/TAPBP. No deletions that might be interfering with antigen processing machinery.

---
title: "Pathways immunoediting analysis"
---
                        ```{r, echo=FALSE, message=FALSE, warning=FALSE}

######## Snakemake header ########
library(methods)
Snakemake <- setClass(
    "Snakemake",
    slots = c(
        input = "list",
        output = "list",
        params = "list",
        wildcards = "list",
        threads = "numeric",
        log = "list",
        resources = "list",
        config = "list",
        rule = "character"
    )
)
snakemake <- Snakemake(
    input = list('notebooks/pathways_immunoediting.Rmd', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ihc_table.tsv', '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_snvs.tsv', '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_breakpoints.tsv', '/shahlab/alzhang/projects/ITH_Immune/results/nanostring_results/ith_full/qc/limma_quantile/normalized_expression_voa_labels_filtered.tsv', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/somatic_coding_variants', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/molsubtypes.tsv', '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/pancancer_annotations.tsv', "molsubtypes" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/molsubtypes.tsv', "notebook" = 'notebooks/pathways_immunoediting.Rmd', "ihc_table" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ihc_table.tsv', "snv_table" = '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_snvs.tsv', "breakpoint_table" = '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_breakpoints.tsv', "nanostring_data" = '/shahlab/alzhang/projects/ITH_Immune/results/nanostring_results/ith_full/qc/limma_quantile/normalized_expression_voa_labels_filtered.tsv', "somatic_coding_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/somatic_coding_variants', "nanostring_annotations" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/pancancer_annotations.tsv'),
    output = list('/shahlab/alzhang/projects/ITH_Immune/paper/results/review/notebooks/run2/pathways_immunoediting.nb.html'),
    params = list(c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), '/shahlab/alzhang/data/genome/hg19/refseq_genes.bed', 'pathways_immunoediting_analysis', '/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3', "tils_for_cluster" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "refseq_gene_file" = '/shahlab/alzhang/data/genome/hg19/refseq_genes.bed', "name" = 'pathways_immunoediting_analysis', "db" = '/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3'),
    wildcards = list(),
    threads = 1,
    log = list('/shahlab/alzhang/clusttmp/paperreview2/notebooks/pathways_immunoediting_analysis.log'),
    resources = list(),
    config = list("ith_stats" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ith_statistics.tsv', "refseq_gene_file" = '/shahlab/alzhang/data/genome/hg19/refseq_genes.bed', "xcr_table" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/xcr_table.tsv', "mmctm_final_patient_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient_with-ov', "nanostring_data" = '/shahlab/alzhang/projects/ITH_Immune/results/nanostring_results/ith_full/qc/limma_quantile/normalized_expression_voa_labels_filtered.tsv', "neoediting_outdir" = '/shahlab/alzhang/pipeline_outputs/ith_immune/neoediting/run6', "copynumber_table" = '/shahlab/alzhang/data/ithi/master_copynumber_file.tsv', "bcr_diversity" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/IGH/postfilter_diversity_stats/diversity.strict.resampled.txt', "he_results_dir" = '/shahlab/alzhang/data/ithi/finn_results/he_output_Nov29', "array_expression_file" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/array/gene_exprs_rma_batch_corrected.txt', "snv_cluster_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/snv_cluster', "tils_for_cluster" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "total_tiltypes" = c('T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density'), "logdir" = '/shahlab/alzhang/clusttmp/paperreview2', "ith_icgc_bc" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ith_icgc_merged_bc.tsv', "patients_for_clonal" = c(1, 2, 3, 4, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17), "variability_type" = 'stabilize', "table_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/review/tables/run2', "db" = '/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3', "snv_table" = '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_snvs.tsv', "epitopes_unique_filtered" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/epitopes_unique_filtered.tsv', "benchmarkdir" = '/shahlab/alzhang/benchmarks/paperreview2', "tilcluster_supervised_ipynb" = '/shahlab/alzhang/projects/ITH_Immune/paper/review/ipy/tilcluster_supervisedmulticlass.ipynb', "image_summary" = '/shahlab/alzhang/data/ithi/yuan_hecr_image_results.csv', "ihc_features_output" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/ihc_features_output.txt', "clone_branch_lengths" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/branch_data.tsv', "distance_method" = 'horn', "clone_prevalences" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/clone_data.tsv', "til_clusters_output" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2/til_clusters_output.txt', "tumour_purity" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/tumour_purity.tsv', "prevalence_threshold" = 0.01, "notebook_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/review/notebooks/run2', "somatic_coding_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/somatic_coding_variants', "image_summary2" = '/shahlab/alzhang/data/ithi/yuan_hecr_image_results_2.csv', "known_subtypes_array" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/array/subtypes/known_subtypes.tsv', "ith_stat_types" = c('entropy', 'postprocessed_divergence', 'combined_ith_normalized', 'proportion_subclonal'), "nanostring_annotations" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/pancancer_annotations.tsv', "ihc_table" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ihc_table.tsv', "igpartition_outdir" = '/shahlab/alzhang/pipeline_outputs/ith_immune/igpartition/run22', "all_tiltypes" = c('T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density', 'E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "remixt_cellularity_ploidy" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/remixt_cellularity_ploidy.tsv', "breakpoint_table" = '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_breakpoints.tsv', "clola_result_file" = '/shahlab/alzhang/pipeline_outputs/ith_immune/clola/run4/clola_condensed_results/beta/clola_results.tsv', "clone_trees" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/clones/tree_data.tsv', "rooney_mutsigcv_file" = '/shahlab/alzhang/projects/ITH_Immune/external/other_papers/mmc6.xlsx', "molsubtypes" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/molsubtypes.tsv', "finnhe_pipeline_results_dir" = '/shahlab/alzhang/pipeline_outputs/ith_immune/finnhe/run1', "icgc_specimen" = '/shahlab/alzhang/data/ICGC/specimen.tsv', "icgc_subtypes" = '/shahlab/alzhang/data/ICGC/icgc_primary_tumour_subtypes.tsv', "tcr_diversity" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/TRB/postfilter_diversity_stats/diversity.strict.resampled.txt', "tils_for_variability" = c('T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density')),
    rule = 'pathways_immunoediting_analysis'
)
######## Original script #########

                        ```



## Setup

```{r global_chunk_options, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, tidy=TRUE, warning=FALSE, message=FALSE, cache=TRUE) #cache=TRUE
```

```{r}
library(ithi.utils)
load_base_libs()

library(methods)
library(bedr)
library(biomaRt)
library(BSgenome.Hsapiens.UCSC.hg19)

library(ithi.meta)
library(ithi.figures)
library(ithi.utils)
library(ithi.expr)
library(ithi.seq)
library(ithi.bed)
library(ithi.supp)
```

```{r}
nanostring_data_path <- snakemake@input$nanostring_data
nanostring_annotations_path <- snakemake@input$nanostring_annotations
ihc_table_path <- snakemake@input$ihc_table
master_variant_file <- snakemake@input$snv_table
master_breakpoint_file <- snakemake@input$breakpoint_table
somatic_coding_result_dir <- snakemake@input$somatic_coding_result_dir
molecular_subtype_file <- snakemake@input$molsubtypes

refseq_gene_file <- snakemake@params$refseq_gene_file
tils_for_cluster <- snakemake@params$tils_for_cluster
db_path <- snakemake@params$db
```

```{r}
annotation_colours <- ithi.figures::get_annotation_colours()

mart <- useDataset("hsapiens_gene_ensembl", useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "feb2014.archive.ensembl.org"))

ihc_table <- fread(ihc_table_path)
master_variant_table <- read_variant_file(master_variant_file, db_path)
master_breakpoint_table <- read_variant_file(master_breakpoint_file, db_path)

titan_file <- file.path(somatic_coding_result_dir, "somatic_cnv_titan.tsv")
somatic_snv_file <- file.path(somatic_coding_result_dir, "somatic_snvs.tsv")
somatic_indel_file <- file.path(somatic_coding_result_dir, "somatic_indels.tsv")
titan_cnv <- fread(titan_file)
somatic_snvs <- fread(somatic_snv_file)
somatic_indels <- fread(somatic_indel_file)

exprs <- fread(nanostring_data_path)
nanostring_labels <- fread(nanostring_annotations_path)
molsubtypes <- fread(molecular_subtype_file)

til_clusters <- ithi.figures:::get_til_clusters(ihc_table, molsubtypes, tils_for_cluster = tils_for_cluster, nclusts = 3)
```

## Pathways immunoediting analysis

The purpose of this analysis is to show whether or not immune checkpoint or other immunosuppressive factors may be present and associated with TIL densities/our N/S/ES-TIL clusters. If so, this reviewer thinks they may interfere with any immunoediting that we claim may be taking place. 

In actuality, I think this reviewer's argument is flawed -- we're measuring the current state of the tumour, not the past. Even if checkpoint molecules are present -- and interfering with immunoediting now -- this may not have been the case in the tumour's history. 

### Expression-level

```{r}
# antigen-processing machinery
# APM_genes <- c("CANX", "HSPA5", "B2M", "HLA-A", "HLA-B", "HLA-C", "TAP1", "TAP2", "TAPBP", "CALR", "PDIA3")
APM_genes <- c("HLA-A", "HLA-B", "HLA-C", "TAP1", "TAP2", "TAPBP", "PSMB9")#, "PSMB7")
checkpoint_genes <- subset(nanostring_labels, str_detect(gene_class, "Checkpoint"))$Gene
immune_suppression <- c("IDO1")#, "IDO2", "ALOX") # p53 excluded because p53 loss is ubiquituous in HGSC; others are absent
ifng_genes <- c("IFNG")

mutation_genes <- c(APM_genes, ifng_genes)
```

```{r}
plot_cluster_expression <- function(expr, genes, ihc_table) {
  expr_subset <- subset(expr, Name %in% genes) %>% as.data.frame %>% column_to_rownames(var = "Name")
  
  sample_filter <- intersect(ihc_table$voa, colnames(expr_subset))
  expr_subset <- expr_subset[,sample_filter]
  
  expr_df <- melt(as.matrix(expr_subset)) %>% plyr::rename(c('Var1'='Name', 'Var2'='voa'))
  expr_ihc <- plyr::join(expr_df, ihc_table)
  expr_ihc$patient_id <- factor(as.character(expr_ihc$patient_id))
  
  
  pvals <- setNames(ddply(expr_ihc, .(Name), function(x) {
    df <- as.data.frame(x)
    corres <- with(df, cor.test(value, E_CD8_density, method = "spearman"))
    
    pval <- corres$p.value
    return(pval)
  }), c("Name", "p.value"))
  pvals$p.adj <- p.adjust(pvals$p.value, method = "fdr")
  
  pvals$p.adj.text <- sapply(pvals$p.adj, function(x) as.character(as.expression(substitute(italic(P)==p, list(p=format(x, digits=3))))))
  
  p <- ggplot(expr_ihc, aes(x=E_CD8_density, y=value)) + geom_point(aes(colour=patient_id)) + facet_wrap(~ Name, scales = "free") + scale_x_continuous(trans = "log10") + theme_bw() + ithi.utils::theme_Publication() + ithi.utils::theme_nature() + scale_colour_manual(values = annotation_colours$patient_id) + geom_text(data=pvals, aes(x=Inf, y=Inf, label=p.adj.text), hjust=1.1, vjust=1.5,size=2.5,parse=TRUE) 
  
  return(p)
}
```

```{r, cache=TRUE}
plot_cluster_expression(exprs, APM_genes, ihc_table)
plot_cluster_expression(exprs, checkpoint_genes, ihc_table)
plot_cluster_expression(exprs, ifng_genes, ihc_table)
plot_cluster_expression(exprs, immune_suppression, ihc_table)
```

So the expression of these genes is positively correlated with epithelial CD8+ TIL density. 

What does this mean? 

* APM genes: Antigen processing machinery is not underexpressed in samples with high epithelial CD8+ TIL, and therefore does not likely prevent immunoediting. 
* IFNG and checkpoint genes: Interferon-gamma is known to stimulate PD1 (Benci et al. 2016). Higher levels of checkpoint inhibitors, e.g. PD-1, would theoretically interfere with immunoediting. However, contrary to many other settings, in HGSC PD1+CD8+ TIL (which typically coexpress CD103+ according to Webb et al. 2014 and Komdeur et al. 2016) are associated with superior outcomes and retain functional competence, i.e. produce robust amounts of TNF-alpha and IFN-gamma (Webb et al. 2014, Webb et al. 2015). 
* Immune suppression genes: From Rooney -- we found that CYT was best correlated with immunosuppressive factors (Spranger et al., 2013), such as PDCD1LG2 (PDL2), IDO1/2, DOK3 (Lemay et al., 2000), GMCSF receptor (CSF2RA, CSF2RB) and the C1Q complex (Figure 1B).

### Mutation-level

Here, our goal is just to look for the presence of mutations in those immune-associated genes. For example, if there are mutations in antigen presenting machinery genes, these may interfere with antigen recognition and therefore immunoediting. 

```{r, results='hide', cache=TRUE}
titan_bed <- convert_to_bed(titan_cnv)
refseq_bed <- read_bed(refseq_gene_file)

gene_names <- getBM(attributes = c("refseq_mrna", "hgnc_symbol"), values = refseq_bed$refseq_id, mart = mart)

refseq_bed_annotated <- merge(refseq_bed, gene_names %>% subset(refseq_mrna != "") %>% plyr::rename(c('refseq_mrna'='refseq_id')), by = c('refseq_id'))

refseq_bed_annotated <- subset(refseq_bed_annotated, select=c("chr", "start", "end", "hgnc_symbol", "refseq_id"))
refseq_bed_annotated <- bedr.sort.region(refseq_bed_annotated)

titan_merged <- bedr(engine = "bedtools", input = list(a=titan_bed, b=refseq_bed_annotated), method = "intersect", params = "-loj")
```

```{r}
titan_merged <- data.frame(titan_merged)
titan_merged$median_logR <- as.numeric(titan_merged$median_logR)
titan_merged$total <- as.numeric(titan_merged$total)
titan_merged$major <- as.numeric(titan_merged$major)
titan_merged$minor <- as.numeric(titan_merged$minor)

titan_merged_filtered <- subset(titan_merged, (median_logR < -1) | (median_logR > 1))
```


```{r}
somatic_snvs_filtered <- plyr::join(somatic_snvs, master_variant_table %>% subset(is_present == 1), type = 'inner')
somatic_indels_filtered <- subset(somatic_indels, mappability == 1)
```

```{r}
titan_merged_filtered_immune <- subset(titan_merged_filtered, hgnc_symbol %in% mutation_genes)
somatic_snvs_immune <- subset(somatic_snvs_filtered, gene_name %in% mutation_genes)
somatic_indels_immune <- subset(somatic_indels_filtered, gene_name %in% mutation_genes)

print(somatic_snvs_immune)
print(somatic_indels_immune)
```

```{r}
## Because we have LOHHLA and that's more accurate in polymorphic regions
titan_merged_filtered_immune_nohla <- subset(titan_merged_filtered_immune, !hgnc_symbol %in% c("HLA-A", "HLA-B", "HLA-C"))

print(titan_merged_filtered_immune_nohla)
```

The reason there are no SNVs left is because of the mappability threshold. 

There's one indel -- in TAP, for sample `r somatic_indels_immune$condensed_id`. This sample is TIL subtype `r subset(til_clusters, condensed_id == somatic_indels_immune$condensed_id)$cluster`. 

There are CN high-level amplifications in IFNG/PSMB9/TAP1/TAP2/TAPBP. No deletions that might be interfering with antigen processing machinery. 
