--- title: "Make VCF of gambl SSMs from all lymphoma genes" output: html_document editor_options: chunk_output_type: console --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # intro For each BL, DLBCL and FL pathology, create a VCF file with SSMs from the bunbled data, from all lymphoma genes, genome seq type, and grch37 projection. # fetch ssms from myc ```{r} # load libraries library(GAMBLR.data) library(dplyr) # regions all_lymphome_gene_regions <- GAMBLR.utils::gene_to_region(lymphoma_genes_comprehensive$Gene) # metadata meta_all <- get_gambl_metadata(seq_type_filter = "genome") meta_by_pathology <- filter(meta_all, pathology %in% c("BL", "DLBCL", "FL")) %>% split(.$pathology) # retrieve ssms maf_by_patology <- lapply(meta_by_pathology, function(meta_by_pathology_i){ get_ssm_by_regions(these_samples_metadata = meta_by_pathology_i, regions_list = all_lymphome_gene_regions, basic_columns = TRUE, streamlined = FALSE) }) # save maf table to file maf_dir <- "/projects/rmorin/projects/gambl-repos/gambl-vsouza/vlad_results/bundled_allLymphomaGenes_genome_grch37/" dir.create(maf_dir) maf_files <- paste0(maf_dir, names(maf_by_patology), ".maf") mapply(write.table, x = maf_by_patology, file = maf_files, quote = FALSE, sep = "\t", row.names = FALSE) ``` # convert maf file to vcf files ```{bash} # define variables INPUT_MAF_DIR=/projects/rmorin/projects/gambl-repos/gambl-vsouza/vlad_results/bundled_allLymphomaGenes_genome_grch37 # activate conda enve conda activate /home/vsouza/software/miniconda3/envs/vlad_env # get maf files maf_files=(${INPUT_MAF_DIR}/*) # convert maf to vcf vcf_dir=${INPUT_MAF_DIR}/vcf mkdir ${vcf_dir} for maf_file in ${maf_files[@]} ; do # maf2vcf perl /home/vsouza/repos/lcr-modules/modules/vcf2maf/1.3/src/maf2vcf.pl \ --input-maf ${maf_file} \ --output-dir ${vcf_dir} # get vcf file path vcf_file=${vcf_dir}/`basename ${maf_file}` vcf_file=${vcf_file%.maf}.vcf # sorted vcf file path sorted_vcf_file=${vcf_file%.vcf}_sorted.vcf bcftools sort \ ${vcf_file} \ -o ${sorted_vcf_file} rm ${vcf_file} # compress bgzip ${sorted_vcf_file} # index tabix -p vcf ${sorted_vcf_file}.gz done ``` # copy vcf files to gsc download dir ```{bash} VCF_DIR=/projects/rmorin/projects/gambl-repos/gambl-vsouza/vlad_results/bundled_allLymphomaGenes_genome_grch37/vcf DL_DIR=/gsc/www/bcgsc.ca/downloads/morinlab/vsouza/bundled_allLymphomaGenes_genome_grch37/vcf mkdir ${DL_DIR} cp ${VCF_DIR}/*.vcf.gz ${DL_DIR}/ cp ${VCF_DIR}/*.vcf.gz.tbi ${DL_DIR}/ ```