# concatenate per-chromosome wigs import sys, os, gzip in_dir = "" in_prefix = "" out_dir = "" out_fn = "" help_message = "\nSpecify:\n" + \ "(1) input dir containing per-chrom wigs,\n" + \ "(2) prefix of target files in input dir (e.g. HS0995),\n" + \ "(2) output dir where to write single merged gzipped wig,\n" + \ "(3) output file name (must end in .gz)\n\n" + \ "python catWigs.py input_dir prefix output_dir output_filename\n" if len(sys.argv) == 5: in_dir = sys.argv[1] in_prefix = sys.argv[2] out_dir = sys.argv[3] out_fn = sys.argv[4] if (not out_fn.endswith(".gz")): print("Output file name must end in .gz"); sys.exit() else: print help_message sys.exit() # create trackless concatenated fiel of the per-chrom files fh_out = gzip.open(out_dir + out_fn, 'wb') fnames = os.listdir(in_dir) fnames.sort() for f in fnames: if f.startswith(in_prefix) and f.endswith("wig.gz"): print(f) fh_in = gzip.open(in_dir + f, 'rb') # search for file start (i.e. move past header lines) foundStart = False while not foundStart: line = fh_in.readline() if (line.startswith("fixed") or line.startswith("variable")): foundStart = True break # write out the rest of the file in blocks fh_out.write(line) size = 1000000 next = fh_in.read(size) while (next != ""): fh_out.write(next) next = fh_in.read(size) fh_in.close() fh_out.close()