[BioPython] RecordFile.py

Chunlei Wu chunlei.wu at uth.tmc.edu
Fri Apr 23 01:03:33 EDT 2004


Hi, group,
             I just tried "RecordFile.py", but it failed for both fasta 
file and genbank file I tested.

 >>> rec_h=RecordFile.RecordFile(open(r"gb_test.txt" ),'LOCUS','\\')
or
 >>> rec_h=RecordFile.RecordFile(open(r"gb_test.txt" ),'>','')

both returned the same error:

 >>> rec_h.read()
Traceback (most recent call last):
  File "<interactive input>", line 1, in ?
  File "C:\Python23\Lib\site-packages\Bio\RecordFile.py", line 83, in read
    text = self._in_record_state( args, keywds )
  File "C:\Python23\Lib\site-packages\Bio\RecordFile.py", line 120, in 
_in_record_state
    requested_text = text
UnboundLocalError: local variable 'text' referenced before assignment

I checked the code, but the code is not obvious for me to fix it.

Actually, I wrote a simply script before using Bio.File's UndoHandle for 
the same purpose. It looks much simpler, maybe not as powerful as 
RecordFile.py, but it does works for me.  I post it here and hope it is 
worth sharing with you.

Best,

Chunlei Wu
-------------- next part --------------
#Chunlei Wu 07/30/2003
'''
FlatRecHandle is a class simulating a file handle for Flatfile format record file,
using record as a reading unit instead of line.
FlatRecHandle.readrecord() returns a record everytime.
'''


from Bio import File,Fasta

class FlatRecHandle:
    '''A FileHandle for Flatfile format record file, using record as a reading unit instead of line.
       start_marker is the marker of the start of each record:
           ">" for Fasta format record,
           "LOCUS" for GenBank format record, etc.
       stop_marker is the marker of the stop of each record, if None, the record stops till next start_marker or file end.
         e.g.:
           None for Fasta format record,
           "//" for GenBank format record, etc.
        return '' if reaching eof.'''

    def __init__(self,handle,start_marker=None,stop_marker=None):
        self._handle = File.UndoHandle(handle)
        self.start_marker=start_marker
        self.stop_marker=stop_marker
        
        
    def readrecord(self):
        '''return one record at one time,just like readline().
           return '' if reaching eof.'''        
        is_record=0
        saved_record=''
        while 1:
            line=self._handle.readline()

            if line == '': ##reach eof.
                if self.stop_marker is not None and is_record :                    
                    print 'Warning: This record may be incomplete. No stop marker("%s") found,but reach EOF!' % self.stop_marker
                    break
                else:
                    break
                
            if line[:len(self.start_marker)] == self.start_marker:
                is_record=1
            if is_record:
                saved_record += line
                if self.stop_marker is None:
                    next_line=self._handle.peekline()
                    if next_line[:len(self.start_marker)] == self.start_marker or next_line == '':
                        break
                else:
                    if line[:len(self.stop_marker)] == self.stop_marker:
                        break
        return saved_record          
                
    def rewind(self):
        '''rewind the handler pointer to the beginning.'''
        return self._handle.seek(0)

    def tell(self):
        return self._handle.tell()

    def close(self):
        return self._handle.close()

    def closed(self):
        return self._handle.closed()

    def readrecords(self):
        '''return list of records,just like readlines()'''
        
        rec_list=[]
        while 1:
            rec=self.readrecord()
            if rec == '':
                break
            rec_list.append(rec)
        return rec_list

    

def fasta_handle(in_f_handle):
    '''return a FlatRecHandle for fasta format.
       input is a fasta format file handle.'''

    return FlatRecHandle(in_f_handle,">")

def fasta_iterator(fastafile_handle):
    '''return a Fasta file iterator using Bio.Fasta
    input is a fasta format file handle.'''
    parser=Fasta.RecordParser()
    return Fasta.Iterator(fastafile_handle,parser)

def gb_handle(in_f_handle):
    '''return a FlatRecHandle for GenBank format.'''

    return FlatRecHandle(in_f_handle,"LOCUS","//")



More information about the BioPython mailing list