fasta.py 1.79 KB
Newer Older
Mathieu Giraud's avatar
Mathieu Giraud committed
1 2 3



Mathieu Giraud's avatar
Mathieu Giraud committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
COMPLEMENT_NUCLEOTIDE = {
    'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C',
    'Y': 'R', 'R': 'Y', # pyrimidine (CT) / purine (AG)
    'W': 'S', 'S': 'W', # weak (AT) / strong (GC)
    'K': 'M', 'M': 'K', #  keto (TG) / amino (AC)
    'B': 'V', 'V': 'B', 'D': 'H', 'H': 'D',
    'N': 'N'
}

def revcomp(seq):
    '''Returns the reverse complement of a sequence

    >>> revcomp('ACGNTT')
    'AANCGT'
    '''
    rc = ''
    for nucl in seq[::-1]:
        rc += COMPLEMENT_NUCLEOTIDE[nucl.upper()]
    return rc

Mathieu Giraud's avatar
Mathieu Giraud committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
def parse(fasta, endline=''):
    '''Iterates over sequences in a fasta files, yielding (header, sequence) pairs'''

    header = ''
    sequence = ''
    
    for l in fasta:
        l = l.strip()

        if not l:
            continue
    
        if l[0] == '>':
            if header or sequence:
                yield (header, sequence)
            header = l[1:]
            sequence = ''

        else:
            sequence += l + endline
            
    if header or sequence:
        yield (header, sequence)
47 48 49 50 51 52 53

def extract_field_if_exists(str, separator, field_number):
    fields = str.split(separator)
    if len(fields) > field_number:
        return fields[field_number]
    return str

Mathieu Giraud's avatar
Mathieu Giraud committed
54 55 56 57 58 59 60 61 62 63 64
def parse_as_Fasta(fasta):
    for (header, sequence) in parse(fasta):
        yield Fasta(header, sequence)


class Fasta():

    def __init__(self, header, sequence):
        self.header = header
        self.seq = sequence

Mathieu Giraud's avatar
Mathieu Giraud committed
65 66 67
    def revcomp(self):
        self.seq = revcomp(self.seq)

Mathieu Giraud's avatar
Mathieu Giraud committed
68 69
    @property
    def name(self):
70
        return extract_field_if_exists(self.header, '|', 1)
Mathieu Giraud's avatar
Mathieu Giraud committed
71 72 73

    @property
    def species(self):
74
        return extract_field_if_exists(self.header, '|', 2)
Mathieu Giraud's avatar
Mathieu Giraud committed
75 76 77 78 79 80 81

    def __len__(self):
        return len(self.seq)
    
    def __str__(self):
        return '>%s\n%s\n' % (self.header, self.seq)