Packages

I load the packages I use the most while trying to avoid namespace conflicts. Any others are used with the :: syntax.

# Load packages in order of increasing importance
library(magrittr)
library(matrixStats)
library(DESeq2)
library(tidyverse)

File Paths

Below are the paths to the files that will be used in this report.

paths <- list()

# Metadata
paths$patient_md <- file.path(PROJHOME, "metadata", "patient_metadata.tsv")
paths$mrna_md    <- file.path(PROJHOME, "metadata", "mrna_metadata.bams.tsv")

# Reference
paths$mbl_genes    <- file.path(PROJHOME, "etc", "mbl_signature.genes.txt")
paths$morgan_genes <- file.path(PROJHOME, "etc", "morgan.genes.txt")
paths$wright_genes <- file.path(PROJHOME, "etc", "wright.genes.txt")
paths$grch38       <- file.path(PROJHOME, "reference", "GRCh38")
paths$tx2gene      <- file.path(paths$grch38, "Sequence", "GencodeTranscriptome", 
                                "25", "gencode_v25_and_NC_007605.tx2gene.tsv")

# Results
paths$ebv_status <- file.path(PROJHOME, "results", "ebv_status", 
                              "ebv_status.txt")
paths$sex_status <- file.path(PROJHOME, "results", "sex_status", 
                              "sex_status.txt")
paths$salmon     <- file.path(PROJHOME, "results", "salmon")

Global Variables

We define variables that are generally useful below.

# Set RNG seed for reproducibility
global_seed <- 87510475  # Generated by runif(1, 0, 10^8)
set.seed(global_seed)

# Use all available cores when multithreading
num_cores <- parallel::detectCores()
doMC::registerDoMC(cores = num_cores)
BiocParallel::register(BiocParallel::MulticoreParam(workers = num_cores))

# Parameters for multiple test correction
p_adjust <- partial(p.adjust, method = "BH")
qmin     <- 0.1

# Effective genome size
genome_size <- 2934876451

# Number of most variably expressed genes
ntop <- 1000