# 24 September 2010, map2bed_20100918.sh

#The bwa alignments have completed using UCSC SacCer2 which was downloaded from UCSC yesterday. The bam alignment files can be found here:
BAM=/archive/solexa1_4/analysis/SC0002/FC6255_5/bwa/FC6255_5.fastq.bam
#BAM=/archive/solexa1_4/analysis/SC0007/FC6360_2/bwa/FC6360_2.fastq.bam
#BAM=/archive/solexa1_4/analysis/SC0008/FC6360_3/bwa/FC6360_3.fastq.bam
#Note that since these are older runs (single read - 36 bases), they only had the seq.txt and prb.txt files available. Chastity filtering was not applied at that time which means that alignments are not marked in the resulting bam files.
#Hope that helps.
#Cheers
#Richard


FPCU=/home/grobertson/FindPeaks/Code/v4.0.15/conversion_util
# -help            | Displays this text
# -input           | Provide a list of file(s) to read and process.
# -output          | Path where the output files should be deposited.
# -name            | name of file(s).
# -aligner         | name of the aligner.
# -nochr           | drop the name of the chromosome/genome.
# -noprepend       | do not prepend chr to the chromosome name.
# -qualityfilter   | the minimum quality to accept, when using maq files.
#Junction specific flags:
# -junctionmap     | the file containing the map to the junction annotation.
# -junctionsize    | the size of the junction map in use.  Expected values 36,42,50

BAM=/archive/solexa1_4/analysis/SC0002/FC6255_5/bwa/FC6255_5.fastq.bam
OUTDIR=/projects/remc_bigdata/S_cere/SC0002
NAME=SC0002-wt_sacCer2

#BAM=/archive/solexa1_4/analysis/SC0007/FC6360_2/bwa/FC6360_2.fastq.bam
#OUTDIR=/projects/remc_bigdata/S_cere/SC0007
#NAME=SC0007-Fpr3-ko_sacCer7

#BAM=/archive/solexa1_4/analysis/SC0008/FC6360_3/bwa/FC6360_3.fastq.bam
#OUTDIR=/projects/remc_bigdata/S_cere/SC0008
#NAME=SC0002-Fpr4-ko_sacCer8

#echo "java16 -jar $FPCU/ConvertToBed.jar -input $BAM -output $OUTDIR -name $NAME -aligner sam -qualityfilter 10 -noprepend"
#java16 -jar $FPCU/ConvertToBed.jar -input $BAM -output $OUTDIR -name $NAME -aligner sam -qualityfilter 10 -noprepend

# Prepare a 'reads' BED file for Xuekui
BASENAME=SC0002-wt_sacCer2_all
echo "gUNzip"
gunzip *.gz
echo "cat to one file"
cat *.bed > SC0002-wt_sacCer2_all.reads.txt
echo "remove track lines"
awk '/^chr/ { print $0 > "SC0002-wt_sacCer2_all.notrack.reads.bed" }' SC0002-wt_sacCer2_all.reads.txt
echo "extract cols for {chr,start,end,strand}"
awk '{print $1 "\t" $2 "\t" $3 "\t" $6 > "SC0002-wt_sacCer2_all.4cols.reads.bed" }' SC0002-wt_sacCer2_all.notrack.reads.bed
echo "gzip"
gzip SC0002-wt_sacCer2_all.4cols.reads.bed
# rename the file
# The result
#/projects/remc_bigdata/MORGEN/MM0386_H3K4me1_adult_mouse_islets/mm9/MM0386_H3K4me1_mouse_adult_islets.MAQ.allreads.mm9.bed.gz