# script to increase the step size of a wig without regenerating it import sys, gzip in_fn = None out_fn = None binSize = 0 newStep = 0 newSpan = 0 if len(sys.argv) != 6: print "\nSpecify:\n" print "(1) a fixedStep WIG file (can be gzipped)," print "(2) an output file name," print "(3) number of steps to bin," print "(4) new step size (not necessarily the same as (3) if fixedStep != 1)" print "(5) new span size\n" sys.exit() else: in_fn = sys.argv[1] out_fn = sys.argv[2] binSize = int(sys.argv[3]) newStep = int(sys.argv[4]) newSpan = int(sys.argv[5]) if (in_fn.endswith(".gz")): in_fh = gzip.open(in_fn, 'rb') else: in_fh = open(in_fn, 'r') out_fh = open(out_fn, 'w') oldLineCount = 0 newLineCount = 0 count = 0; total = 0.0; countFromHeader = 0; header = ""; firstValue = True while 1: line = in_fh.readline() if not line: break line = line.strip() if line.startswith("#"): continue; if line.strip() == "": continue; if line.startswith("browser"): out_fh.write(line + "\n") elif line.startswith("track"): out_fh.write(line + "\n") elif line.startswith("fixedStep"): pieces = line.split(" ") # reinitialize count = 0; total = 0.0 if len(pieces) < 4 or len(pieces) > 5: print "Unexpected fixedStep header: " + line sys.exit() else: header = "" # header contains "fixedStep", chrom info, and start info if len(pieces) == 4: if (not pieces[1].startswith('chrom=') or not pieces[2].startswith('start=') or not pieces[3].startswith('step=')): print "Unexpected fixedStep header format." print "Expect: 'fixedStep chrom=chr1 start=1 step=1'"; sys.exit(0); header = " ".join(pieces[:-1]) else: if (not pieces[1].startswith('chrom=') or not pieces[2].startswith('start=') or not pieces[3].startswith('step=') or not pieces[4].startswith('span=')): print "Unexpected fixedStep header format." print "Expect: 'fixedStep chrom=chr1 start=1 step=1 span=1'"; sys.exit(0); header = " ".join(pieces[:-2]) header += " step=" + str(newStep) + " span=" + str(newSpan) + "\n" firstValue = True countFromHeader = 0 else: oldLineCount += 1 total += float(line); count += 1; countFromHeader += 1 # print str(countFromHeader) + " " + str(count) + " " + \ # str(total) + " " + line if count == binSize: mean = int(round(total/binSize)) if (mean == 0): # skip zero values firstValue = True pieces = header.split(" ") oldStart = int(pieces[2].split("=")[1]) header = pieces[0] + " " + pieces[1] + " " + \ "start=" + str(oldStart+countFromHeader) + " " + \ pieces[3] + " " + pieces[4] countFromHeader = 0; # print "zero mean: poss header: " + header else: if firstValue: # write header out_fh.write(header) # print "header: " + header firstValue = False out_fh.write(str(mean)+"\n") newLineCount += 1 # print "new mean: " + str(mean) # reset count = 0; total = 0.0; # write out final line if count > 0: mean = int(round(total/binSize)) out_fh.write(str(mean) + "\n") print("initial data line count: " + str(oldLineCount)) print("new data line count: " + str(newLineCount)) in_fh.close() out_fh.close()