# Corrects overlaps between wig features which are not tolerated # by UCSC's wigToBigWig converter. If two features overlap, the # overlaping values in the second feature are deleted and the # second feature header is removed (i.e. features are merged). import sys, gzip in_fn = None out_fn = None if len(sys.argv) != 3: print "\nSpecify:\n" print "(1) a fixedStep WIG file," print "(2) an output file name\n" sys.exit() else: in_fn = sys.argv[1] out_fn = sys.argv[2] if (in_fn.endswith(".gz")): in_fh = gzip.open(in_fn, 'rb') else: in_fh = open(in_fn, 'r') if (out_fn.endswith(".gz")): out_fh = gzip.open(out_fn, 'wb') else: out_fh = open(out_fn, 'w') prevChrom = "" prevPosition = -1 step = -1 oHeader = False overlapEnd = -1 while 1: line = in_fh.readline() if not line: break line = line.strip() if line.startswith("track") or line.startswith("#"): out_fh.write(line + "\n") elif line.startswith("fixedStep"): # print line pieces = line.split(" "); if len(pieces) != 4: print "Expect at 4 fields in a fixedStep header, found " + \ str(len(pieces)) print line sys.exit() chrom = pieces[1] position = int(pieces[2].split("=")[1]) step = int(pieces[3].split("=")[1]) if (prevPosition != -1 and position < prevPosition and prevChrom == chrom): # skip this header overlapEnd = prevPosition print("found overlap error: " + chrom + " " + str(position) + " " + str(prevPosition-position)) else: out_fh.write(line + "\n") prevPosition = position prevChrom = chrom else: if (overlapEnd == -1 or overlapEnd == prevPosition): # print str(prevPosition) + " " + line out_fh.write(line + "\n") overlapEnd = -1 # if (overlapEnd == prevPosition): # print "corrected error" # sys.exit() # else: # print "skipping value " + str(prevPosition) + " " + line prevPosition += step in_fh.close() out_fh.close()