fasta.py 1.23 KB
Newer Older
Mathieu Giraud's avatar
Mathieu Giraud committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26



def parse(fasta, endline=''):
    '''Iterates over sequences in a fasta files, yielding (header, sequence) pairs'''

    header = ''
    sequence = ''
    
    for l in fasta:
        l = l.strip()

        if not l:
            continue
    
        if l[0] == '>':
            if header or sequence:
                yield (header, sequence)
            header = l[1:]
            sequence = ''

        else:
            sequence += l + endline
            
    if header or sequence:
        yield (header, sequence)
27 28 29 30 31 32 33

def extract_field_if_exists(str, separator, field_number):
    fields = str.split(separator)
    if len(fields) > field_number:
        return fields[field_number]
    return str

Mathieu Giraud's avatar
Mathieu Giraud committed
34 35 36 37 38 39 40 41 42 43 44 45 46
def parse_as_Fasta(fasta):
    for (header, sequence) in parse(fasta):
        yield Fasta(header, sequence)


class Fasta():

    def __init__(self, header, sequence):
        self.header = header
        self.seq = sequence

    @property
    def name(self):
47
        return extract_field_if_exists(self.header, '|', 1)
Mathieu Giraud's avatar
Mathieu Giraud committed
48 49 50

    @property
    def species(self):
51
        return extract_field_if_exists(self.header, '|', 2)
Mathieu Giraud's avatar
Mathieu Giraud committed
52 53 54 55 56 57 58

    def __len__(self):
        return len(self.seq)
    
    def __str__(self):
        return '>%s\n%s\n' % (self.header, self.seq)