Source code for biofx.variants.features

"""
Created: July, 2014

@author: Carolyn Ch\'ng

.. todo::
    think about whether logging should be part of module.

- Variant
  SNV, Indel, CNV inherits Variant
"""
import collections
import logging
import re
import subprocess
import warnings

from biofx.variants.attributes import Zygosity

DEFAULT_SAMTOOLS = "/gsc/software/linux-x86_64-centos6/samtools-0.1.19/samtools"

DEFAULT_SNV_MPILEUP_PARAMS = ["--ff", "1540", "-BQ0", "-d", "1000000", "-A"]
DEFAULT_INDEL_MPILEUP_PARAMS = [
    "--ff",
    "1540",
    "-BQ0",
    "-d",
    "1000000",
    "-L",
    "1000000"]


[docs]class Variant(object): """ A base class for describing variants. Currently supports small mutations. Attributes: chromosome (string) start (int) end (int) ref (string): reference base alt (string): alternative base ref_count (int): reference base count alt_count (int): alternative base count coverage (int): total coverage state (string): insertion, deletion, duplication etc. misc (list): effects (string) snpeff_effects identifier (string): external IDs associated with variant. Usually column 3 in vcf provenance (string): if applicable, tool used to call variant cosmic (string): cosmic ID dbsnp (string): dbSNP ID bamfile (string): path to bamfile where this variant came from source (string): source ID where this variant came from is_special (bool): True if special characters (not ATCG) in ref/alt base homopolymer (bool): True if homopolymer clnsig_values (string): bar delimited integers cgl_pathogenicity (string): PATHOGENIC, VUS, BENIGN info (?): vcf info """ def __init__(self, chromosome, start, end=None): self.chromosome = chromosome self.start = int(start) self.end = int(end) if end is not None else end self.ref = "*" self.alt = "*" self.ref_count = None self.alt_count = None self.coverage = None self.state = None self.comments = [] self.annotations = None self.misc = None self.effects = None self.snpeff_effects = None self.identifier = 'NA' self.provenance = None self.cosmic = 'NA' self.dbsnp = 'NA' self.bamfile = None self.source = None self.is_special = False self.homopolymer = False self.clnsig_values = False self.gmaf = None self.allele_model = None self.zygosity = None self.genotype = None self.clinvar_pathogenicity = None self.clnsig_values = None self.cgl_pathogenicity = None self.info = None self.base_frequencies = None # snpeff attributes self.effect_impact = None self.functional_class = None self.codon_change = None self.gene_id = None self.gene_symbol = None self.gene_biotype = None self.aa_length = None self.transcript_id = None self.exon = None self.hgvs_aa_change = None self.hgvs_cds_change = None self.hgvs_effect_type = None self.classic_aa_change = None self.classic_effect_type = None self.alternative_transcripts = None self.alternative_genes = None self.transcript_info = None self.protein_id = None
[docs] def set_start(self, pos): self.start = int(pos)
[docs] def set_end(self, pos): self.end = int(pos)
[docs] def is_snp(self): return False
[docs] def get_chromosome_index(self): """1-based""" chr_ind = {'X': 23, 'Y': 24, 'MT': 25} if self.chromosome in ['X', 'Y', 'MT']: return chr_ind[self.chromosome] else: return int(self.chromosome)
[docs] def set_effects(self, effects): """ Args: effects (str) : snpeff effect string """ self.effects = effects.strip()
[docs] def set_id(self, identifier): self.identifier = identifier
[docs] def get_key(self): return "_".join( map(str, (self.chromosome, self.start, self.ref, self.alt)))
[docs] def get_coordinates(self): return "{v.chromosome}:{v.start}{v.ref}>{v.alt}".format(v=self)
[docs] def get_pathogenicity(self): if self.cgl_pathogenicity in ["PATHOGENIC", "PROBABLY_PATHOGENIC"] or \ self.clinvar_pathogenicity == "PATHOGENIC": pathogenicity = "PATHOGENIC" elif self.cgl_pathogenicity in ["BENIGN", "PROBABLY_BENIGN"] and self.clinvar_pathogenicity == "BENIGN": pathogenicity = "BENIGN" else: pathogenicity = "VUS" return pathogenicity
[docs] def set_provenance(self, provenance): self.provenance = provenance
[docs] def set_cosmic(self, cosmic): """ Args: cosmic (string): cosmic ID Raises: ValueError: not a cosmic ID """ if cosmic is None: cosmic = "NA" if not ((cosmic in ['', 'NA']) or ( all([cid.startswith("COSM") for cid in cosmic.split(",")]))): raise ValueError("{} is not a valid cosmic ID".format(cosmic)) self.cosmic = cosmic
[docs] def set_dbsnp(self, dbsnp): """ Args: dbsnp (string): dbSNP ID Raises: ValueError: not a dbsnp ID """ if dbsnp is None: dbsnp = "NA" if not ((dbsnp in ['', 'NA']) or ( all([did.startswith("rs") for did in dbsnp.split(",")]))): raise ValueError("{} is not a valid dbsnp ID".format(dbsnp)) self.dbsnp = dbsnp
[docs] def set_zygosity(self, genotype): """ Assign zygosity to variant. Args: v (Variant): an instance of :class`biofx.variants.Variants` genotype (string): genotype from vcf record Raises: AssertionError: assume no existing zygosity for variant AssertionError: assume 1/1 or 0/1 for genotype """ self.genotype = genotype if self.zygosity is not None: raise AssertionError( ("Variant {0.chromosome}:{0.start}{0.ref}>{0.alt} has an existing zygosity {0.zygosity}. " "Assumed no existing zygosity, will not set new zygosity.").format(self)) if genotype == "1/1": self.zygosity = Zygosity.HOMOZYGOUS elif genotype == "0/1": self.zygosity = Zygosity.HETEROZYGOUS else: raise AssertionError( ("Assumed genotype to be 1/1 or 0/1. " "Got unexpected value {}. ").format(genotype))
[docs] def set_eff(self, eff_map, hgvs=True, classic=False): """ Args: eff_map (dict): a dictionary with snpeff annotation key values (See output of parse_effect) hgvs (bool): is hgvs eff_map (parse_effect with hgvs=True) classic (bool): is classic eff_map Note: Set both hgvs and classic to True if it is a merged eff map from merge_eff_maps """ if eff_map is None: self.effect_impact = "." self.functional_class = "." self.codon_change = "." self.aa_length = "." self.gene_symbol = "." self.gene_biotype = "." self.transcript_id = "." self.exon = "." if hgvs: self.hgvs_effect_type = "." self.hgvs_aa_change = "." self.hgvs_cds_change = "." if classic: self.classic_aa_change = "." self.classic_effect_type = "." else: self.effect_impact = eff_map["effect_impact"] self.functional_class = eff_map["functional_class"] self.codon_change = eff_map["codon_change"] self.aa_length = eff_map["amino_acid_length"] self.gene_symbol = eff_map["gene_symbol"] self.gene_biotype = eff_map["gene_biotype"] self.transcript_id = eff_map["transcript"] self.exon = eff_map["exon"] if hgvs: self.hgvs_effect_type = eff_map["hgvs_effect_type"] self.hgvs_aa_change = eff_map["hgvs_protein_sequence_change"] self.hgvs_cds_change = eff_map["cds_change"] if classic: self.classic_aa_change = eff_map["classic_protein_sequence_change"] self.classic_effect_type = eff_map["classic_effect_type"]
[docs] def add_comment(self, comment): """ Adds a new comment to a list of comments Args: comment (string): comment string """ if comment not in self.comments: self.comments.append(comment)
[docs] def get_comments(self): """ Returns: string: semi-colon delimited comments """ return ";".join(self.comments)
[docs] def add_misc(self, **kwargs): """ add miscellaneous info Args: **kwargs: key word args for additional info. """ if self.misc is None: self.misc = kwargs else: for k, v in list(kwargs.items()): if k in self.misc: raise KeyError("{} previously added!") self.misc[k] = v
[docs] def normalize_splice_site(self): """ Checks hgvs effect type attributes and change value to a normalized splice site string if effect is a splice site. """ ss = "splice site" if self.hgvs_effect_type is None: raise TypeError("hgvs_effect_type is not set!") elif "splice" in self.hgvs_effect_type.lower(): self.hgvs_cds_change = ss self.hgvs_aa_change = ss
[docs] def set_alternative(self, eff_maps, model, to_string=True): """ Set alternative (not best/canonical) effect descriptions. Args: eff_maps (list): a list of dictionaries model (string): transcript or gene to_string: Raises: ValueError: invalid model """ if to_string: # make eff like string. # bar-delimited for fields, comma-delimited for each annotation data = ",".join(["|".join([x["transcript"], x["gene_symbol"], x["hgvs_protein_sequence_change"], x["classic_protein_sequence_change"], x["effect_impact"], x["functional_class"], x["hgvs_effect_type"]]) for x in eff_maps]) else: data = eff_maps if model == "transcript": self.alternative_transcripts = data elif model == "gene": self.alternative_genes = data else: raise ValueError("Invalid model {} provided. " "Select one of transcript|gene.".format(model))
[docs] def set_alternative_transcripts(self, eff_maps, to_string=True): # pragma: no cover """ .. todo:: merge set alternative transcripts and set alternative genes Args: eff_maps (list): a list of dictionaries to_string: Returns: """ warnings.warn("use set_alternative", PendingDeprecationWarning) return self.set_alternative(eff_maps, "transcript", to_string)
[docs] def set_alternative_genes(self, eff_maps, to_string=True): # pragma: no cover """ .. todo:: merge set alternative transcripts and set alternative genes Args: eff_maps: to_string: Returns: """ warnings.warn("use set_alternative", PendingDeprecationWarning) return self.set_alternative(eff_maps, "gene", to_string)
[docs] def get_variant_mpileup( self, bamfile, samtools=DEFAULT_SAMTOOLS, reference="/projects/alignment_references/9606/hg19a/genome/bwa_32/hg19a.fa", mpileup_params=[ "--ff", "1540", "-BQ0"]): """ Get mpileup record for variant. .. todo:: add functionality for region? move executables to a configs module/file or something like that Args: bamfile (string): path to bamfile samtools (string): samtools exectutable. Default: /gsc/software/linux-x86_64-centos6/samtools-0.1.19 reference (string): path to reference fasta file. Default: /projects/alignment_references/9606/hg19a/genome/bwa_32/GRCh37-lite.fa mpileup_params (list): list of mpileup parameters """ self.bamfile = bamfile samtools_mpileup = ([samtools, "mpileup", "-f", reference] + mpileup_params + ["-r", "{}:{}-{}".format(self.chromosome, self.start, self.end if self.end else self.start), self.bamfile]) logging.info( "Executing command:\n{}".format( " ".join(samtools_mpileup))) record = subprocess.check_output(samtools_mpileup) if not isinstance(record, str): record = record.decode("utf-8") logging.debug(record) if record == "": logging.warning( "No coverage at {}:{}-{} in {}".format( self.chromosome, self.start, self.end, self.bamfile)) return record
[docs] def get_variant_info(self, data, samtools, fasta_file): """ Get read support and total coverage for variant. Args: self (Variant): an instance of :class:`biofx.variants.Variants.Variant` data (dict): dictionary with library ID as keys. Required nested keys: - sample_prefix - tumour_content - bam samtools (basestring): path to samtools executable fasta_file (basestring): path to fasta file \ Returns: variant_info (dict): a dictionary with library IDs as keys and attribute keywords Raises: AssertionError: each bam file in bams is assumed to come from a different library """ variant_info = {} ref = self.ref alt = self.alt if self.is_snp(): mpileup_params = DEFAULT_SNV_MPILEUP_PARAMS else: # assume indel self.vt_normalize() # see variant class for more info mpileup_params = DEFAULT_INDEL_MPILEUP_PARAMS for l in data: sample_prefix = data[l]["sample_prefix"] if sample_prefix in variant_info: raise AssertionError( "One or more bam files is from " "the same sample {}!".format(sample_prefix)) variant_info[sample_prefix] = {} mpileup_record = self.get_variant_mpileup( data[l]["bam"], samtools=samtools, mpileup_params=mpileup_params, reference=fasta_file) variant_info[sample_prefix]["record"] = mpileup_record if mpileup_record == "": logging.warning( "No mpileup record for {} in {}".format( self.get_key(), sample_prefix)) if "tumour_content" in data[l]: variant_info[sample_prefix]["tumour_content"] = data[l]["tumour_content"] allele_frequencies = self.get_allele_frequencies(mpileup_record) # use original alt as output allele_frequencies[alt] = allele_frequencies.pop(self.alt) variant_info[sample_prefix]["allele_frequencies"] = allele_frequencies # initialize Support = collections.namedtuple('Support', ['supported', 'total']) allele_support = Support( supported=allele_frequencies[alt], total=self.coverage) # use pre-normalized alt as output key variant_info[sample_prefix]["alt_allele_support"] = { alt: allele_support} # set normalized ref and alt back to original ref alt for indels self.ref = ref self.alt = alt return variant_info
[docs] @staticmethod def has_pathogenicity_conflict(p1, p2): """ Check for pathogenicity conflict between two sources. Args: p1 (string): clinvar pathogenicity p2 (string): cgl pathogenicity Returns: boolean: True if has conflict """ return (p1 == "BENIGN" and p2 == "PATHOGENIC") or ( p1 == "PATHOGENIC" and p2 == "BENIGN")
[docs] def set_allele_model(self, gmaf, maf_cutoff): """ Get allele model string from gmaf. TODO: enumerate this? Args: gmaf (float): allele model gmaf maf_cutoff (float): Cut off for rare/common Returns: allele_model (string): allele model string """ allele_model = "COMMON" if gmaf < 0: allele_model = "UNKNOWN" elif gmaf <= maf_cutoff: allele_model = "RARE" self.allele_model = allele_model
[docs]class Indel(Variant): """ Generic Indel class with basic chromosome, start, end attributes, as well as optional ref/alt alleles, annotation attributes. Inherits methods from Variant. """
[docs] def get_length(self): """ Get indel length. .. todo:: think about definitions of start and end Raises: ValueError: if reference and/or alternate allele not set """ if self.end is None: if self.alt != "*" and self.ref != "*": length = abs(len(self.ref) - len(self.alt)) self.end = int(self.start) + length return length else: raise ValueError( "At least one of ref or alt not set for variant. " "Cannot compute length with variant end or ref alt alleles") return int(self.end) - int(self.start)
[docs] def set_state(self, state): """ Args: state (string): indel type, usually from genome validator Raises: ValueError: unrecognized state """ valid_states = ["ins", "del", "ITD", "dup"] if state not in valid_states: raise ValueError( "{} is not a recognized state (case sensitive).\n" "Choose one of {} or request a review.".format( state, ",".join(valid_states))) self.state = state
[docs] def set_alt(self, alt): """ Set alternative allele. White spaces are stripped off. Args: alt (string): alt string """ self.end = None alt = alt.strip() if any(base not in ['A', 'T', 'C', 'G'] for base in alt): logging.warning("Special characters seen in {}.".format(alt)) self.is_special = True self.alt = alt
[docs] def set_ref(self, ref): self.end = None ref = ref.strip() if any(base not in ['A', 'T', 'C', 'G'] for base in ref): logging.warning("Special characters seen in {}.".format(ref)) self.is_special = True self.ref = ref
[docs] def vt_normalize(self): """ Implementation of algorithm in http://genome.sph.umich.edu/wiki/Variant_Normalization. Modified for samtools/bcftools output - left aligned but not parsimonous. Raises: AssertionError: normalization cannot be done if ref alt bases are not set AssertionError: algorithm failed. shouldn't get here. never observed. Reference: http://genome.sph.umich.edu/wiki/Variant_Normalization See also: http://bioinformatics.oxfordjournals.org/content/31/13/2202.full. """ if self.alt == "*" or self.ref == "*": raise AssertionError( "Bases not set. Ref: {0.ref} Alt: {0.alt}".format(self)) normalizing = True while normalizing: if len(self.alt) == 1 or len(self.ref) == 1: normalizing = False elif self.alt[-1] == self.ref[-1]: self.alt = self.alt[:-1] self.ref = self.ref[:-1] else: normalizing = False # assume samtools VCF reports one base before event if len(self.alt) == 0 or len(self.ref) == 0: # pragma no cover raise AssertionError( ("Normalization algorithm failed. " "This can only be used for left aligned, non parsimonous records. " "Assumed variants are reported one base before event (left).")) logging.debug("ref: " + self.ref + "alt: " + self.alt) if (self.ref[0] == self.alt[0]) and ( len(self.ref) >= 2 and len(self.alt) >= 2): warnings.warn("Removing left most nucleotide") self.ref = self.ref[1:] self.alt = self.alt[1:] if len(set(self.alt[1:] + self.ref[1:])) == 1: self.homopolymer = True
[docs] def get_allele_frequencies(self, mpileup_record): """ Args: mpileup_record (string): output from :func:`Variants.get_variant_mpileup` or single mpileup record Returns: dict: dictionary containing: ref_count (int): reference allele count. 0 if empty record alt_count (int): alternate allele count. 0 if empty record Raises: ValueError: more than one record provided ValueError: if reference and/or alternate allele not set `pileup format specifications <http://samtools.sourceforge.net/pileup.shtml>`_ Note: Reference counts for indels are currently obtained by subtracting alternative allele counts from total coverage. Usage of total coverage (self.coverage) is preferred over reference counts. """ if mpileup_record == "": self.alt_count = 0 self.ref_count = 0 self.coverage = 0 else: if len(mpileup_record.splitlines()) > 1: raise ValueError( ("More than one record provided:\n{}\n" "This function parses one record at a time").format(mpileup_record)) if self.ref == "*" or self.alt == "*": raise ValueError( ("At least one of ref or alt not set for variant." "Allele frequencies can only be obtained for indels with specified ref and alts")) sign = '-' if len(self.ref) > len(self.alt) else '+' if sign == '-': base_change = self.ref[len(self.alt):] else: base_change = self.alt[len(self.ref):] entry = mpileup_record if not isinstance(entry, str): entry = entry.decode('UTF-8') entry = entry.strip().split('\t') chrom = entry[0] pos = entry[1] ref = entry[2] cov = entry[3] seq = entry[4] qual = entry[5] pattern = "\{}{}{}".format(sign, len(base_change), base_change) matches = re.findall(pattern, seq, flags=re.I) logging.debug(matches) if self.alt == ".": self.alt_count = "NA" self.ref_count = int(cov) else: self.alt_count = len(matches) self.ref_count = int(cov) - self.alt_count self.coverage = int(cov) self.base_frequencies = { self.ref: self.ref_count, self.alt: self.alt_count} return self.base_frequencies
[docs]class SNV(Variant): """ Generic SNV class with basic chromosome, start, end attributes, as well as optional ref/alt alleles, annotation attributes. Inherits from Variant """ def __init__(self, chromosome, start, end=None): if end is not None and end != start: raise ValueError( "start and end should be on the same position for {}".format( self.__class__.__name__)) else: end = start Variant.__init__(self, chromosome, start, end) self.base_frequencies = {}
[docs] def set_alt(self, alt): """ Args: alt (string): alt base Raises: ValueError: alt length must be 1 and one of ATCG. """ if "," in alt: if not all(len(x) == 1 for x in alt.split(",")): raise ValueError( "alt length must be 1 for {}. {} given.".format( self.__class__.__name__, alt)) elif not all(x in ['A', 'T', 'C', 'G', "."] for x in alt.split(",")): raise ValueError( "alt base must be one of ATCG for {}. {} given.".format( self.__class__.__name__, alt)) else: if len(alt) != 1: raise ValueError( "alt length must be 1 for {}. {} given.".format( self.__class__.__name__, alt)) elif alt not in ['A', 'T', 'C', 'G', "."]: raise ValueError( "alt base must be one of ATCG for {}. {} given.".format( self.__class__.__name__, alt)) self.alt = alt
[docs] def set_ref(self, ref): """ Args: ref (string): reference base Raises: ValueError: reference length must be 1 and one of ATCG. """ if len(ref) != 1: raise ValueError( "reference length must be 1 for {}. {} given.".format( self.__class__.__name__, ref)) elif ref not in ['A', 'T', 'C', 'G']: raise ValueError("reference base must be one of ATCG for {}. " "{} given.".format(self.__class__.__name__, ref)) self.ref = ref
[docs] def is_snp(self): """ True if is an SNV instance. Returns: bool: True on an SNV instance. """ return True
[docs] def get_allele_frequencies(self, mpileup_record): """ Get allele frequencies for each base at a given position of this SNV. Currently supports samtools 0.1.18 Args: mpileup_record (string): output from :func:`Variants.get_variant_mpileup` or single mpileup record Returns: base_frequencies (dict): frequency of each base """ logging.debug(mpileup_record) if mpileup_record == "": self.base_frequencies = { 'A': 0, 'C': 0, 'G': 0, 'T': 0, 'N': 0, ".": "NA"} self.coverage = 0 return self.base_frequencies # chunk below from /home/cmayoh/code/PileupParser_CountSnpBases.py, # with modifications. numA = 0 numC = 0 numG = 0 numT = 0 numN = 0 entry = mpileup_record.strip().split('\t') chrom = entry[0] pos = entry[1] ref = entry[2] cov = entry[3] seq = entry[4] qual = entry[5] info = chrom + ':' + pos + ':' + ref logging.debug(seq) bases = SNV._parseBases(seq) logging.debug(bases) # check to ensure length of parsed bases is correct if((not(len(bases) == int(cov))) or (not(len(bases) == len(qual)))): logging.error( "Error in parsing sequence: {}\n".format(mpileup_record)) return self.base_frequencies for b in bases: if(b == '.' or b == ','): if(ref == 'A'): numA += 1 if(ref == 'T'): numT += 1 if(ref == 'C'): numC += 1 if(ref == 'G'): numG += 1 if(ref == 'N'): numN += 1 elif(b == 'A' or b == 'a'): numA += 1 elif(b == 'C' or b == 'c'): numC += 1 elif(b == 'G' or b == 'g'): numG += 1 elif(b == 'T' or b == 't'): numT += 1 elif(b == 'N' or b == 'n'): numN += 1 self.coverage = int(cov) self.base_frequencies = { 'A': numA, 'C': numC, 'G': numG, 'T': numT, 'N': numN, '.': "NA"} return self.base_frequencies
[docs] def get_ref_count(self, **kwargs): """ Get reference base count. Args: **kwargs: used kwargs: mpileup_params Returns: int: ref base count """ if self.base_frequencies == {}: if self.bamfile is None: raise IOError( "Please get allele frequencies or provide a bam file") else: if "mpileup_params" in kwargs: record = self.get_variant_mpileup( self.bamfile, mpileup_params=kwargs["mpileup_params"]) else: record = self.get_variant_mpileup(self.bamfile) self.get_allele_frequencies(record) return self.base_frequencies[self.ref]
[docs] def get_alt_count(self, **kwargs): """ Args: **kwargs: used kwargs: mpileup_params Returns: int: alt base count """ if self.base_frequencies == {}: if self.bamfile is None: raise IOError( "Please get allele frequencies or provide a bam file") else: if "mpileup_params" in kwargs: record = self.get_variant_mpileup( self.bamfile, mpileup_params=kwargs["mpileup_params"]) else: record = self.get_variant_mpileup(self.bamfile) self.get_allele_frequencies(record) if "," in self.alt: # return single allele with high frequencies max_count = max( self.alt.split(","), key=lambda x: self.base_frequencies[x]) self.alt = max_count return self.base_frequencies[self.alt] return self.base_frequencies[self.alt]
@classmethod def _parseBases(cls, bases): # pragma: no cover """Copied verbatim from /home/cmayoh/code/PileupParser_CountSnpBases.py Updated with version from https://svn01.bcgsc.ca/svn/omics_custom_oneoffs/trunk/bin/PileupParser_countSnpBases_getBaseQs.py """ returnSeq = [] bs = list(bases) baseList = list('actgnACTGN.,') count = 0 # used to remove character following ^ and insertions for i in range(len(bs)): if(count > 0): # enters if preceding character is ^ or if an insertion occurred count -= 1 continue if(bs[i] == '$'): continue elif(bs[i] == '>'): # for handling large gapped alignments in RNAseq returnSeq.append(bs[i]) continue elif(bs[i] == '<'): # for handling large gapped alignments in RNAseq returnSeq.append(bs[i]) continue elif(bs[i] == '*'): returnSeq.append(bs[i]) continue elif(bs[i] == '+'): # insertion # determines the length of insertion n_skip = int(re.search(r'([0-9]+)', ''.join(bs[i:])).group()) count = n_skip + len(str(n_skip)) continue elif(bs[i] == '-'): # deletion # determines the length of deletion n_skip = int(re.search(r'([0-9]+)', ''.join(bs[i:])).group()) count = n_skip + len(str(n_skip)) continue elif(bs[i] == '^'): count = 1 continue elif bs[i] in baseList: returnSeq.append(bs[i]) else: returnSeq.append(bs[i]) return returnSeq
[docs]class CNV(Variant): """ Generic CNV class with basic chromosome, start, end attributes, as well as annotation attributes. Inherits from Variant """ NEUTRAL = "copy neutral" GAIN = "copy gain" LOSS = "copy loss" def __init__(self, chromosome, start, end): Variant.__init__(self, chromosome, start, end) self.copy_number = None self.overlap = None self.frequency = None
[docs] def get_type(self, ploidy=2): """ Infer copy number type Args: ploidy (int): ploidy or ploidy model for assessment. Default is 2 (human). Raise: ValueError: """ if self.copy_number is None: raise ValueError("Copy number not set") elif self.copy_number == ploidy: return CNV.NEUTRAL elif self.copy_number > ploidy: return CNV.GAIN elif self.copy_number < ploidy: return CNV.LOSS