# 24 September 2010, map2bed_20100918.sh #The bwa alignments have completed using UCSC SacCer2 which was downloaded from UCSC yesterday. The bam alignment files can be found here: BAM=/archive/solexa1_4/analysis/SC0002/FC6255_5/bwa/FC6255_5.fastq.bam #BAM=/archive/solexa1_4/analysis/SC0007/FC6360_2/bwa/FC6360_2.fastq.bam #BAM=/archive/solexa1_4/analysis/SC0008/FC6360_3/bwa/FC6360_3.fastq.bam #Note that since these are older runs (single read - 36 bases), they only had the seq.txt and prb.txt files available. Chastity filtering was not applied at that time which means that alignments are not marked in the resulting bam files. #Hope that helps. #Cheers #Richard FPCU=/home/grobertson/FindPeaks/Code/v4.0.15/conversion_util # -help | Displays this text # -input | Provide a list of file(s) to read and process. # -output | Path where the output files should be deposited. # -name | name of file(s). # -aligner | name of the aligner. # -nochr | drop the name of the chromosome/genome. # -noprepend | do not prepend chr to the chromosome name. # -qualityfilter | the minimum quality to accept, when using maq files. #Junction specific flags: # -junctionmap | the file containing the map to the junction annotation. # -junctionsize | the size of the junction map in use. Expected values 36,42,50 BAM=/archive/solexa1_4/analysis/SC0002/FC6255_5/bwa/FC6255_5.fastq.bam OUTDIR=/projects/remc_bigdata/S_cere/SC0002 NAME=SC0002-wt_sacCer2 #BAM=/archive/solexa1_4/analysis/SC0007/FC6360_2/bwa/FC6360_2.fastq.bam #OUTDIR=/projects/remc_bigdata/S_cere/SC0007 #NAME=SC0007-Fpr3-ko_sacCer7 #BAM=/archive/solexa1_4/analysis/SC0008/FC6360_3/bwa/FC6360_3.fastq.bam #OUTDIR=/projects/remc_bigdata/S_cere/SC0008 #NAME=SC0002-Fpr4-ko_sacCer8 #echo "java16 -jar $FPCU/ConvertToBed.jar -input $BAM -output $OUTDIR -name $NAME -aligner sam -qualityfilter 10 -noprepend" #java16 -jar $FPCU/ConvertToBed.jar -input $BAM -output $OUTDIR -name $NAME -aligner sam -qualityfilter 10 -noprepend # Prepare a 'reads' BED file for Xuekui BASENAME=SC0002-wt_sacCer2_all echo "gUNzip" gunzip *.gz echo "cat to one file" cat *.bed > SC0002-wt_sacCer2_all.reads.txt echo "remove track lines" awk '/^chr/ { print $0 > "SC0002-wt_sacCer2_all.notrack.reads.bed" }' SC0002-wt_sacCer2_all.reads.txt echo "extract cols for {chr,start,end,strand}" awk '{print $1 "\t" $2 "\t" $3 "\t" $6 > "SC0002-wt_sacCer2_all.4cols.reads.bed" }' SC0002-wt_sacCer2_all.notrack.reads.bed echo "gzip" gzip SC0002-wt_sacCer2_all.4cols.reads.bed # rename the file # The result #/projects/remc_bigdata/MORGEN/MM0386_H3K4me1_adult_mouse_islets/mm9/MM0386_H3K4me1_mouse_adult_islets.MAQ.allreads.mm9.bed.gz