# concatenate per-chromosome wigs

import sys, os, gzip

in_dir = ""
in_prefix = ""
out_dir = ""
out_fn = ""

help_message = "\nSpecify:\n" + \
    "(1) input dir containing per-chrom wigs,\n" + \
    "(2) prefix of target files in input dir (e.g. HS0995),\n" + \
    "(2) output dir where to write single merged gzipped wig,\n" + \
    "(3) output file name (must end in .gz)\n\n" + \
    "python catWigs.py input_dir prefix output_dir output_filename\n"

if len(sys.argv) == 5:
    in_dir = sys.argv[1]
    in_prefix = sys.argv[2]
    out_dir = sys.argv[3]
    out_fn = sys.argv[4]
    if (not out_fn.endswith(".gz")):
        print("Output file name must end in .gz");
        sys.exit()
else:
    print help_message
    sys.exit()

# create trackless concatenated fiel of the per-chrom files
fh_out = gzip.open(out_dir + out_fn, 'wb')
fnames = os.listdir(in_dir)
fnames.sort()
for f in fnames:
    if f.startswith(in_prefix) and f.endswith("wig.gz"):
        print(f)
        fh_in = gzip.open(in_dir + f, 'rb')
        # search for file start (i.e. move past header lines)
        foundStart = False
        while not foundStart:
            line = fh_in.readline()
            if (line.startswith("fixed") or line.startswith("variable")):
                foundStart = True
                break
        # write out the rest of the file in blocks
        fh_out.write(line)
        size = 1000000
        next = fh_in.read(size)
        while (next != ""):
            fh_out.write(next)
            next = fh_in.read(size)
        fh_in.close()
fh_out.close()