fasta.py 1.83 KB
Newer Older
Mathieu Giraud's avatar
Mathieu Giraud committed
1 2 3



Mathieu Giraud's avatar
Mathieu Giraud committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
COMPLEMENT_NUCLEOTIDE = {
    'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C',
    'Y': 'R', 'R': 'Y', # pyrimidine (CT) / purine (AG)
    'W': 'S', 'S': 'W', # weak (AT) / strong (GC)
    'K': 'M', 'M': 'K', #  keto (TG) / amino (AC)
    'B': 'V', 'V': 'B', 'D': 'H', 'H': 'D',
    'N': 'N'
}

def revcomp(seq):
    '''Returns the reverse complement of a sequence

    >>> revcomp('ACGNTT')
    'AANCGT'
    '''
    rc = ''
    for nucl in seq[::-1]:
        rc += COMPLEMENT_NUCLEOTIDE[nucl.upper()]
    return rc

Mathieu Giraud's avatar
Mathieu Giraud committed
24 25 26 27 28 29 30 31 32 33 34
def parse(fasta, endline=''):
    '''Iterates over sequences in a fasta files, yielding (header, sequence) pairs'''

    header = ''
    sequence = ''
    
    for l in fasta:
        l = l.strip()

        if not l:
            continue
35 36 37

        if l[0] == '#':
            continue
Mathieu Giraud's avatar
Mathieu Giraud committed
38 39 40 41 42 43 44 45 46 47 48 49
    
        if l[0] == '>':
            if header or sequence:
                yield (header, sequence)
            header = l[1:]
            sequence = ''

        else:
            sequence += l + endline
            
    if header or sequence:
        yield (header, sequence)
50 51 52 53 54 55 56

def extract_field_if_exists(str, separator, field_number):
    fields = str.split(separator)
    if len(fields) > field_number:
        return fields[field_number]
    return str

Mathieu Giraud's avatar
Mathieu Giraud committed
57 58 59 60 61 62 63 64 65 66 67
def parse_as_Fasta(fasta):
    for (header, sequence) in parse(fasta):
        yield Fasta(header, sequence)


class Fasta():

    def __init__(self, header, sequence):
        self.header = header
        self.seq = sequence

Mathieu Giraud's avatar
Mathieu Giraud committed
68 69 70
    def revcomp(self):
        self.seq = revcomp(self.seq)

Mathieu Giraud's avatar
Mathieu Giraud committed
71 72
    @property
    def name(self):
73
        return extract_field_if_exists(self.header, '|', 1)
Mathieu Giraud's avatar
Mathieu Giraud committed
74 75 76

    @property
    def species(self):
77
        return extract_field_if_exists(self.header, '|', 2)
Mathieu Giraud's avatar
Mathieu Giraud committed
78 79 80 81 82 83 84

    def __len__(self):
        return len(self.seq)
    
    def __str__(self):
        return '>%s\n%s\n' % (self.header, self.seq)