# 12 December 2010, 16h50 #R CMD BATCH '--args tissue="islet" marker="H3K4me1" nomap=T' Brad_PICS_MM9.R Brad_H3K4me1_res_islet.Rout& ##Read in the arguments listed at the command line rm(list=ls(all=TRUE)) args=(commandArgs(TRUE)) if (length(args)>0) for(i in 1:length(args)) eval(parse(text=args[[i]])) #if (!exists("tissue")) tissue="islet" if (!exists("tissue")) tissue="3T3L1_t2_GSM535750" if (!exists("minReads")) minReads=NULL if (!exists("marker")) marker="H3K4me1" if (!exists("nomap")) nomap=T print(paste("Analyze the",tissue,"data")) library(PICS) library(multicore) options(cores=6) #load the data and mappablility profile #indir <- "/nfs/data/xzhang/Brad/MM9/Data/" indir <- "/projects/remc_bigdata/Brad/3T3L1_t2_GSM535750_H3K4me1/" #infile <- "islet_H3K4me1_input_MM0368_MM0468_MM9.RData" infile <- "3T3L1_t2_GSM535750_H3K4me1_ChIP-seq.mm9.RData" load(file=paste(indir, infile, sep="")) IP=unique(dataIP) #CTL=unique(dataCTL) print(Reads.counts(IP) / Reads.counts(dataIP)) #proportion of retained reads rm(dataIP) #rm(dataIP,dataCTL) #IP["chrY"]=NULL # this chr only have tens of reads, segmentation step gave error message for "no candidate regions found" nChr=length(IP) chr.names=names(IP) #CTL=CTL[chr.names] #load(file="./convertedData/mappability.RData") #load(file="/projects/remc_bigdata/Brad/mappability/mm9.mappability-36.RData") #map.profile=map.profile[chr.names] # Error: object 'map.profile' not found (12 Dec 2010) if (marker=="H3K4me1") {rho1=1.2; sigmaB2=6400;rho=15;alpha1=10; alpha2=98; beta1=20000; beta2=200000} # fit the model setParaSeg(step=2,width=150,dataType="H") setParaEM(minK=0,maxK=0,tol=1e-4,B=100,mSelect="AIC3",mergePeaks=T,mapCorrect=T,dataType="H") setParaPrior(xi=150, rho=rho1, alpha=alpha1, beta=beta1, lambda=-0.000064, dMu=200, dataType="H") # add PICS-N result in a new chromosome to the existing PICS-N result add.res <- function(result=NULL, seg.tmp) { system.time(pics.tmp<-PICS(seg.tmp,dataType="H")) if(is.null(result)) return(list(pics=pics.tmp, seg=seg.tmp)) pics = result$pics seg = result$seg pics@N = pics@N + pics.tmp@N pics@Nc = pics@Nc + pics.tmp@Nc pics@List = c(pics@List, pics.tmp@List) seg@N = seg@N + seg.tmp@N seg@Nc = seg@Nc + seg.tmp@Nc seg@List = c(seg@List, seg.tmp@List) return(list(pics=pics, seg=seg)) } # Post-process PICS results postit <- function(result) { PS <- postPICS(result$pics, result$seg, rho=rho, makePlot=F, datname=datname, DupBound=DupBound, IP=IP, FragmentLenth=100, mart=NULL, sigmaB2=sigmaB2, rho1=rho1, alpha1=alpha1, alpha2=alpha2, beta2=beta2) PS$rank=1:nrow(PS) PS$center=round(PS$mu) PS$start=PS$mu-PS$delta/2 PS$end=PS$mu+PS$delta/2 temp=FilterPICS(PS,detail=F,deltaB=c(80,250),sigmaB1=25000,sigmaB2=20000,seB=Inf, score=0) return(temp$pics.df) } # 12 Dec 2010 - how do we configure this for no CTL and no mappability? Or for no CTL but with mappability? #result <- result2 <- NULL #for(idx.chr in 1:nChr) #{ # if(nomap) maps=NULL else maps=map.profile[idx.chr] # seg.tmp <- segmentReads(IP[idx.chr], CTL[idx.chr], map=maps, minReads=minReads, minReadsInRegion=5, dataType="H", maxLregion=1200, minLregion=80, jitter=T) # #set Nc=0 to ask PICS-N ignore control data # seg.tmp2 <- new("segReadsList", List=seg.tmp@List, paraSW=seg.tmp@paraSW, N=seg.tmp@N, Nc=as.integer(0)) # result <- add.res(result, seg.tmp) # result2 <- add.res(result2, seg.tmp2) #} #rm(seg.tmp, seg.tmp2) # 12 Dec 2010: for no-CTL/no-MAP (above: if (!exists("nomap")) nomap=T) result <- result2 <- NULL for(idx.chr in 1:nChr) { if(nomap) maps=NULL else maps=map.profile[idx.chr] seg.tmp <- segmentReads(IP[idx.chr], NULL, map=maps, minReads=minReads, minReadsInRegion=5, dataType="H", maxLregion=1200, minLregion=80, jitter=T) #set Nc=0 to ask PICS-N ignore control data seg.tmp2 <- new("segReadsList", List=seg.tmp@List, paraSW=seg.tmp@paraSW, N=seg.tmp@N, Nc=as.integer(0)) result <- add.res(result, seg.tmp) result2 <- add.res(result2, seg.tmp2) } rm(seg.tmp, seg.tmp2) predictions0 <- postit(result) #predictions2 <- postit(result2) # ignore this when you don't have a control #outdir <- "/nfs/data/xzhang/Brad/MM9/PICS_result/" outdir <- "/projects/remc_bigdata/Brad/3T3L1_t1_GSM535743_H3K4me1/PICS_result/" save.image(paste(outdir, tissue,"_",marker,"_",nomap,"_PICS_temp.rda",sep="")) if(tissue=="3T3L1_t1_GSM535743") { pdf(file="/projects/remc_bigdata/Brad/3T3L1_t1_GSM535743_H3K4me1/PICS_result/sorted.predictions0.score.pdf") plot(sort(predictions0$score),type="l",ylim=c(0,10), main="3T3L1_t1_GSM535743_H3K4me1", ylab="relative enrichment IP, no input", xlab="rank", lwd=3) abline(h=c(1.5,5), col=3, lty=2) abline(h=c(1.5), col=2, lty=3) dev.off() } if(tissue=="3T3L1_t2_GSM535750") { pdf(file="/projects/remc_bigdata/Brad/3T3L1_t2_GSM535750_H3K4me1/PICS_result/sorted.predictions0.score.pdf") plot(sort(predictions0$score),type="l",ylim=c(0,10), main="3T3L1_t2_GSM535750_H3K4me1", ylab="relative enrichment IP, no input", xlab="rank", lwd=3) abline(h=c(1.5,5), col=3, lty=2) abline(h=c(1.5), col=2, lty=3) dev.off() } if(tissue=="islet") { plot(sort(predictions0$score),type="l",ylim=c(0,10), main="islet_H3K4me1", ylab="relative enrichment IP vs. input", xlab="rank", lwd=3) abline(h=c(1.5,5), col=3, lty=2) abline(h=c(1.5), col=2, lty=3) } if(tissue=="liver") { plot(sort(predictions0$score),type="l",ylim=c(0,5), main="liver_H3K4me1", ylab="relative enrichment IP vs. input", xlab="rank", lwd=3) abline(h=c(1,3.5), col=3, lty=2) } table(predictions0$score>1.5) predictions3 <- subset(predictions0, predictions0$score>=1.5) idx <- paste(predictions2$chr,predictions2$mu) %in% paste(predictions3$chr,predictions3$mu) predictions <- predictions2[idx,] rm(predictions3) readsN=sum(Reads.counts(IP)) #outdir <- "/nfs/data/xzhang/Brad/MM9/PICS_result/" outdir <- "/projects/remc_bigdata/Brad/3T3L1_t1_GSM535743_H3K4me1/PICS_result/" save.image(paste(outdir, tissue,"_",marker,"_",nomap,"_PICS.rda",sep="")) predictions <- predictions0 #prediction0 is when there is no CTL sample save(predictions, readsN, file=paste(outdir, tissue,"_",marker,"_",nomap,"_PICS_predictions.rda",sep="")) q(save="no") ############################################################################## #filter the valley nucleosomes who have significantly higher nucleosomes within 500bp #load("PICS_result/islet_H3K4me1_TRUE_PICS_predictions.rda") load("/projects/remc_bigdata/Brad/3T3L1_t1_GSM535743_H3K4me1/PICS_result/3T3L1_t1_GSM535743_H3K4me1_TRUE_PICS_predictions.rda") library(PICS) library(multicore) predictions <- predictions0 #prediction0 is when there is no CTL sample system.time( predictions <- rm.valley.nuc(predictions) ) #export PICS result #exportPICS <- function(allow.cls=0, tissue="islet", marker="H3K4me1", outdir="/nfs/data/xzhang/Brad/MM9/PICS_result/") exportPICS <- function(allow.cls=0, tissue="3T3L1_t1_GSM535743", marker="H3K4me1", outdir="/projects/remc_bigdata/Brad/3T3L1_t1_GSM535743_H3K4me1/PICS_result/") { predictions <- predictions[predictions$class %in% allow.cls,] #chr.info.file="/nfs/data/xzhang/chromInfo.mm9.txt" chr.info.file="/projects/remc_bigdata/Brad/chromInfo_simpleChrs.mm9.txt" opsci=getOption("scipen") options(scipen=100) temp1=make.thickthin(predictions) temp1$thickthin <- truncate.result(temp1$thickthin, chr.info.name=chr.info.file, positions=c("start","end","thickstart","thickend")) temp1$scores <- truncate.result(temp1$scores, chr.info.name=chr.info.file, positions=c("start","end")) outfile1 <- paste(outdir, tissue, "_", marker, "_thickthin_filtered_mm9.txt", sep="") write.table(temp1$thickthin, file=outfile1, quote =F, row.names=F) outfile2 <- paste(outdir, tissue, "_", marker, "_scores_filtered_mm9.txt", sep="") write.table(temp1$scores, file=outfile2, quote =F, row.names=F) options(scipen=opsci) } exportPICS() save.image("/projects/remc_bigdata/Brad/3T3L1_t1_GSM535743_H3K4me1/PICS_result/3T3L1_t1_GSM535743_H3K4me1_FALSE_PICS_predictions_valleyNucRemoved.rda")