Source code for mavis.interval



[docs]class Interval: """ """ def __init__(self, start, end=None, freq=1, number_type=None): """ Args: start (int): the start of the interval (inclusive) end (int): the end of the interval (inclusive) freq (int): the frequency or weight of the interval """ self.start = start self.end = end if end is not None else start if number_type is None: if int(self.start) != float(self.start) or int(self.end) != float(self.end) \ or isinstance(self.start, float) or isinstance(self.end, float): number_type = float else: number_type = int self.number_type = number_type self.start = self.number_type(self.start) self.end = self.number_type(self.end) if self.start > self.end: raise AttributeError('interval start > end is not allowed', self.start, self.end) self.freq = int(freq) if self.freq <= 0: raise AttributeError('Interval frequency must be a natural number')
[docs] def __sub__(self, other): # difference """the difference of two intervals Example: >>> Interval(1, 10) - Interval(5, 50) [Interval(1, 4)] >>> Interval(1, 2) - Interval(10, 11) [Interval(1, 2)] >>> Interval(1, 2) - Interval(-1, 10) [] """ if Interval.overlaps(self, other): if other[0] <= self[0]: if other[1] >= self[1]: return [] else: return [Interval(other[1] + 1, self[1])] elif other[1] >= self[1]: return [Interval(self[0], other[0] - 1)] else: return [Interval(self[0], other[0] - 1), Interval(other[1] + 1, self[1])] else: return [Interval(self[0], self[1])]
[docs] def __and__(self, other): # intersection """the intersection of two intervals Example: >>> Interval(1, 10) & Interval(5, 50) Interval(5, 10) >>> Interval(1, 2) & Interval(10, 11) None """ return Interval.intersection(self, other)
[docs] def __or__(self, other): # union """the union of two intervals Example: >>> Interval(1, 10) | Interval(5, 50) Interval(1, 50) >>> Interval(1, 2) | Interval(10, 11) Interval(1, 11) """ return Interval.union(self, other)
[docs] def __xor__(self, other): return (self - other) + (other - self)
def __getitem__(self, index): try: index = int(index) except ValueError: raise IndexError('index input accessor must be an integer', index) if index == 0: return self.start elif index == 1: return self.end raise IndexError( 'index input accessor is out of bounds: 1 or 2 only', index)
[docs] @classmethod def overlaps(cls, first, other): """ checks if two intervals have any portion of their given ranges in common Args: first (Interval): an interval to be compared other (Interval): an interval to be compared Example: >>> Interval.overlaps(Interval(1, 4), Interval(5, 7)) False >>> Interval.overlaps(Interval(1, 10), Interval(10, 11)) True >>> Interval.overlaps((1, 10), (10, 11)) True """ if first[1] < other[0]: return False elif first[0] > other[1]: return False else: return True
[docs] def __len__(self): """ the length of the interval Example: >>> len(Interval(1, 11)) 12 Warning: only works for integer intervals """ return Interval.length(self)
[docs] def length(self): try: if self.number_type == float: return self[1] - self[0] except AttributeError: pass return self[1] - self[0] + 1
def __lt__(self, other): if self[0] < other[0]: return True elif self[0] == other[0] and self[1] < other[1]: return True return False def __gt__(self, other): if self[1] > other[1]: return True elif self[1] == other[1] and self[0] > other[0]: return True return False def __repr__(self): cls = self.__class__.__name__ number_type = '' if self.number_type != int: number_type = ', type={}'.format(self.number_type) if self.freq != 1: return '{}({}, {}, freq={}{})'.format(cls, self.start, self.end, self.freq, number_type) else: return '{}({}, {}{})'.format(cls, self.start, self.end, number_type) @property def center(self): """ the middle of the interval Example: >>> Interval(1, 10).center 5.5 >>> Interval(1, 11).center 6 """ return (self[1] + self[0]) / 2 def __eq__(self, other): if self[0] != other[0] or self[1] != other[1]: return False return True def __contains__(self, other): try: if other[0] >= self[0] and other[1] <= self[1]: return True except TypeError: if other >= self[0] and other <= self[1]: return True return False
[docs] @classmethod def dist(cls, first, other): """returns the minimum distance between intervals Example: >>> Interval.dist((1, 4), (5, 7)) -1 >>> Interval.dist((5, 7), (1, 4)) 1 >>> Interval.dist((5, 8), (7, 9)) 0 """ if first[1] < other[0]: return first[1] - other[0] elif first[0] > other[1]: return first[0] - other[1] else: return 0
def __hash__(self): return hash((self[0], self[1], self.freq)) # @classmethod # def weighted_mean_ci(cls, *intervals): # """ # Calculates the weighted mean of a set of intervals. The weighting is inversely proportional to # the size of the input interval. The length of the final interval is the weight mean length of # the input intervals also weighted by length # Args: # intervals (Interval): a list of intervals # Returns: # Interval: the weighted mean interval of the input intervals # Raises: # AttributeError: if the input list is empty # Example: # >>> Interval.weighted_mean((1, 2), (1, 9), (2, 10)) # Interval(1, 4) # >>> Interval.weighted_mean((1, 1), (10, 10)) # Interval(6) # """ # centers = [] # weights = [] # lengths = [] # number_type = int # if len(intervals) == 0: # raise AttributeError('cannot compute the weighted mean interval of an empty set of intervals') # for i in intervals: # try: # if i.number_type == float: # number_type = float # except AttributeError: # pass # if not isinstance(i, Interval): # i = Interval(i[0], i[1]) # for temp in range(0, i.freq): # centers.append(i.center) # weights.append(1 / i.length()) # lengths.append(i.length()) # center = np.average(centers, weights=weights) # size = max([np.average(lengths) - 1, 1]) if number_type == int else np.average(lengths) # return Interval(round(center - size / 2, 0), round(center + size / 2, 0))
[docs] @classmethod def position_in_range(cls, segments, pos): if len(segments) == 0: raise AttributeError('cannot compute on an empty list') num = 0 found_inbetween_segment = False segments = sorted(segments, key=lambda x: (x[0], x[1])) while num < len(segments): current = segments[num] if pos[0] >= current[0] \ and pos[1] <= current[1]: # pos range is fully contained in the current segment break elif num == 0: # first segment if pos[1] < current[0]: # before the first segment found_inbetween_segment = True break else: # check the previous segment previous = segments[num - 1] if pos[0] > previous[1] and pos[1] < current[0]: found_inbetween_segment = True break num += 1 return num, found_inbetween_segment
[docs] @classmethod def convert_pos(cls, mapping, pos, forward_to_reverse=None): i = cls.convert_ratioed_pos(mapping, pos, forward_to_reverse) if i.forward_to_reverse: return i.end else: return i.start
[docs] @classmethod def convert_ratioed_pos(cls, mapping, pos, forward_to_reverse=None): """ convert any given position given a mapping of intervals to another range Args: mapping (:class:`dict` of :class:`Interval` and :class:`Interval`): a mapping of a set of continuous intervals pos (int): a position in the first coordinate system Returns: int: the position in the alternate coordinate system given the input mapping Raises: AttributeError: if the input position is outside the set of input segments IndexError: if the input position cannot be converted to the output system Example: >>> mapping = {(1, 10): (101, 110), (11, 20): (555, 564)} >>> Interval.convert_pos(mapping, 5) 5 >>> Interval.convert_pos(mapping, 15) 559 """ if len(mapping.keys()) < 2 and forward_to_reverse is None: raise AttributeError( 'mapping is insufficient to determine orientation') # order the input intervals input_intervals = sorted(mapping.keys()) mapped_to_intervals = [mapping[i] for i in input_intervals] # input check interval ranges are non-overlapping and increasing/decreasing for i in range(0, len(input_intervals)): if i > 0: if Interval.overlaps(input_intervals[i - 1], input_intervals[i]): raise AttributeError( 'input intervals cannot be overlapping', input_intervals[i], input_intervals[i - 1] ) """if Interval.overlaps(mapped_to_intervals[i - 1], mapped_to_intervals[i]): raise AttributeError( 'mapped_to intervals cannot be overlapping', mapped_to_intervals[i], mapped_to_intervals[i - 1] )""" if mapped_to_intervals[i][0] > mapped_to_intervals[i - 1][1]: if forward_to_reverse is None: forward_to_reverse = False elif forward_to_reverse: raise AttributeError('direction of mapped intervals is not consistent') elif mapped_to_intervals[i][1] < mapped_to_intervals[i - 1][0]: if forward_to_reverse is None: forward_to_reverse = True elif not forward_to_reverse: raise AttributeError('direction of mapped intervals is not consistent') i, previous_flag = Interval.position_in_range( input_intervals, (pos, pos)) # get the input position if i == len(input_intervals) or previous_flag: raise IndexError(pos, 'is outside mapped range', mapping) else: # fell into a mapped region curr = input_intervals[i] nexxt = mapping[curr] if curr[1] - curr[0] == 0: i = Interval(nexxt[0], nexxt[1]) else: ratio = (nexxt[1] - nexxt[0]) / (curr[1] - curr[0]) shift = round((pos - curr[0]) * ratio, 0) shift2 = round((pos - curr[0]) * ratio + ratio, 0) number_type = int if ratio == 1 else float if forward_to_reverse: i = Interval(nexxt[1] - shift2, nexxt[1] - shift, number_type=number_type) else: i = Interval(nexxt[0] + shift, nexxt[0] + shift2, number_type=number_type) setattr(i, 'forward_to_reverse', forward_to_reverse) return i
[docs] @classmethod def union(cls, *intervals): """ returns the union of the set of input intervals Example: >>> Interval.union((1, 2), (4, 6), (4, 9), (20, 21)) Interval(1, 21) """ if len(intervals) < 1: raise AttributeError('cannot compute the union of an empty set of intervals') return Interval(min([i[0] for i in intervals]), max([i[1] for i in intervals]))
[docs] @classmethod def intersection(cls, *intervals): """ returns None if there is no intersection Example: >>> Interval.intersection((1, 10), (2, 8), (7, 15)) Interval(7, 8) >>> Interval.intersection((1, 2), (5, 9)) None """ if len(intervals) < 1: raise AttributeError('cannot compute the intersection of an empty set of intervals') low = max([i[0] for i in intervals]) high = min([i[1] for i in intervals]) if low > high: return None return Interval(low, high)
[docs] @classmethod def min_nonoverlapping(cls, *intervals): """ for a list of intervals, orders them and merges any overlap to return a list of non-overlapping intervals O(nlogn) Example: >>> Interval.min_nonoverlapping((1, 10), (7, 8), (6, 14), (17, 20)) [Interval(1, 14), Interval(17, 20)] """ if len(intervals) == 0: return [] intervals = sorted(list(intervals), key=lambda x: (x[0], x[1])) new_intervals = [Interval(intervals[0][0], intervals[0][1])] for i in intervals[1:]: if Interval.overlaps(new_intervals[-1], i): new_intervals[-1] = new_intervals[-1] | i else: new_intervals.append(Interval(i[0], i[1])) return new_intervals
[docs] @classmethod def split_overlap(cls, *intervals, weight_mapping={}): """ for a given set of intervals splits any overlap so that the result is a new list of intervals with a single bp overlap Warning: this method is meant for integer intervals only """ intervals = sorted(intervals) split_intervals = {} set_start = 0 while set_start < len(intervals): merge_itvl = intervals[set_start] breaks = {intervals[set_start].start, intervals[set_start].end} set_end = set_start for j in range(set_start + 1, len(intervals)): if Interval.overlaps(intervals[j], merge_itvl): merge_itvl = merge_itvl | intervals[j] breaks.add(intervals[j].start) breaks.add(intervals[j].end) set_end = j else: break breaks = sorted(breaks) new_intervals = [Interval(x[0], x[1] - 1) for x in zip(breaks[0:], breaks[1:])] new_intervals[-1] = Interval(breaks[-2], breaks[-1]) if not weight_mapping: split_intervals.update({Interval(*x): None for x in new_intervals}) elif set_start == set_end: split_intervals[merge_itvl] = weight_mapping[merge_itvl] else: for itvl in new_intervals: weight = 0 for input_itvl in intervals[set_start:set_end + 1]: intersection = input_itvl & itvl if intersection: l = (len(intersection) / len(input_itvl)) * weight_mapping[input_itvl] weight = max([l, weight]) split_intervals[itvl] = int(round(weight, 0)) set_start = set_end + 1 return split_intervals
[docs]class IntervalMapping: """ mapping between coordinate systems using intervals. source intervals cannot overlap but no such assertion is enforced on the target intervals """ def __init__(self, mapping=None, opposing=None): if mapping is None: mapping = dict() if opposing is None: opposing = [] self.mapping = {Interval(k[0], k[1]): Interval(v[0], v[1]) for k, v in mapping.items()} self.opposing_directions = {Interval(k[0], k[1]): True for k in opposing} for i in self.opposing_directions: if i not in self.mapping: raise ValueError('cannot defined an opposing direction for an interval that in not mapped', i) for i in self.mapping: self.opposing_directions.setdefault(i, False)
[docs] def add(self, src_interval, tgt_interval, opposing_directions=True): src_interval = Interval(src_interval[0], src_interval[1]) tgt_interval = Interval(tgt_interval[0], tgt_interval[1]) for curr in self.mapping: if Interval.overlaps(curr, src_interval): raise ValueError('source intervals in mapping must not overlap') self.mapping[src_interval] = tgt_interval self.opposing_directions[src_interval] = opposing_directions
[docs] def convert_pos(self, pos, simplify=True): """ convert any given position given a mapping of intervals to another range Args: pos (int): a position in the first coordinate system Returns: the position in the alternate coordinate system given the input mapping - int: if simplify is True - Interval: if simplify is False Raises: IndexError: if the input position is not in any of the mapped intervals Example: >>> mapping = IntervalMapping(mapping={(1, 10): (101, 110), (11, 20): (555, 564)}) >>> mapping.convert_pos(5) 5 >>> mapping.convert_pos(15) 559 """ for src_interval, tgt_interval in self.mapping.items(): if pos in src_interval: forward_to_reverse = self.opposing_directions[src_interval] # minus 1 because we start at the start pos result = tgt_interval.start if src_interval.length() > 0: ratio = tgt_interval.length() / src_interval.length() shift = (pos - src_interval.start) * ratio shift = (pos - src_interval.start) * ratio result = tgt_interval[1] - shift if forward_to_reverse else tgt_interval[0] + shift elif tgt_interval.length() > 0: shift = tgt_interval.length() / 2 result = tgt_interval[1] - shift if forward_to_reverse else tgt_interval[0] + shift return int(round(result, 0)) if simplify else result raise IndexError(pos, 'position not found in mapping', self.mapping.keys())