[BioPython] SwissProt.SProt parser improvement
Andreas Kuntzagk
andreas.kuntzagk at mdc-berlin.de
Fri Jun 20 15:53:54 EDT 2003
Am Fre, 2003-06-20 um 16.26 schrieb Andreas Kuntzagk:
> Hi,
>
> I have added code to the parser to get the FeatureId (FTId) where it
> exists. Also replaced all occurences of the module string whith the
> functions for the string type. Makes for more readable code.
>
> Feel free to use or throw away
Forgot to attach the diff.
Andreas
-------------- next part --------------
Index: Bio/SwissProt/SProt.py
===================================================================
RCS file: /home/repository/biopython/biopython/Bio/SwissProt/SProt.py,v
retrieving revision 1.24
diff -r1.24 SProt.py
36d35
< import string
166c165
< data = string.join(lines, '')
---
> data = ''.join(lines)
506c505
< cols = string.split(line)
---
> cols = line.split()
523c522
< cols = string.split(self._chomp(string.rstrip(line[5:])), ';')
---
> cols= self._chomp(line[5:].rstrip()).split(';')
525c524
< self.data.accessions.append(string.lstrip(ac))
---
> self.data.accessions.append(ac.lstrip())
529,530c528,529
< if string.find(uprline, 'CREATED') >= 0:
< cols = string.split(line)
---
> cols = line.split()
> if uprline.find(uprline) >= 0:
537,538c536
< elif string.find(uprline, 'LAST SEQUENCE UPDATE') >= 0:
< cols = string.split(line)
---
> elif uprline.find('LAST SEQUENCE UPDATE') >= 0:
545,546c543
< elif string.find(uprline, 'LAST ANNOTATION UPDATE') >= 0:
< cols = string.split(line)
---
> elif uprline.find( 'LAST ANNOTATION UPDATE') >= 0:
570,571c567,568
< line = self._chomp(string.rstrip(line[5:]))
< cols = string.split(line, ';')
---
> line = self._chomp(line[5:].rstrip())
> cols = line.split(';')
573c570
< self.data.organism_classification.append(string.lstrip(col))
---
> self.data.organism_classification.append(col.lstrip())
587,588c584,585
< line = self._chomp(string.rstrip(line[5:]))
< index = string.find(line, '=')
---
> line = self._chomp(line[5:].rstrip())
> index = line.find('=')
592c589
< ids = string.split(line[index+1:], ',')
---
> ids = line[index+1:].split(',')
594,595c591,592
< ids = string.split(line, ',')
< self.data.taxonomy_id.extend(map(string.strip, ids))
---
> ids = line.split(',')
> self.data.taxonomy_id.extend([id.strip for id in ids])
598c595
< rn = string.rstrip(line[5:])
---
> rn = line[5:].rstrip()
606c603
< self.data.references[-1].positions.append(string.rstrip(line[5:]))
---
> self.data.references[-1].positions.append(line[5:].rstrip())
610c607
< cols = string.split(string.rstrip(line[5:]), ';')
---
> cols = line[5:].rstrip().split( ';')
616c613
< index = string.find(col, '=')
---
> index = col.find('=')
634c631
< ref.comments.append((string.lstrip(token), text))
---
> ref.comments.append((token.lstrip(), text))
646c643
< ind = string.find(line, '[NCBI, ExPASy, Israel, Japan]')
---
> ind = line.find('[NCBI, ExPASy, Israel, Japan]')
656,657c653,654
< if string.find(line, "=") != -1:
< cols = string.split(line)
---
> if line.find( "=") != -1:
> cols = line.split()
661c658
< id_cols = string.split(info_col, "=")
---
> id_cols = info_col.split("=")
670c667
< cols = string.split(line)
---
> cols = line.split()
719c716
< i = string.find(line, '[')
---
> i = line.find('[')
722,724c719,720
< cols = string.split(self._chomp(string.rstrip(line)), ';')
< for i in range(len(cols)):
< cols[i] = string.lstrip(cols[i])
---
> cols = self._chomp(line.rstrip()).split(';')
> cols = [col.lstrip() for col in cols]
728,731c724,726
< cols = string.split(self._chomp(string.rstrip(line[5:])), ';')
< for col in cols:
< self.data.keywords.append(string.lstrip(col))
<
---
> cols = self._chomp(line[5:].rstrip()).split(';')
> self.data.keywords.extend([c.lstrip for c in cols])
>
734c729
< name = string.rstrip(line[0:8])
---
> name = line[0:8].rstrip()
738c733
< from_res = string.lstrip(line[9:15])
---
> from_res = line[9:15].lstrip()
742,744c737,743
< to_res = string.lstrip(line[16:22])
< description = string.rstrip(line[29:70])
<
---
> to_res = line[16:22].lstrip()
> description = line[29:70].rstrip()
> #if there is a feature_id (FTId), store it away
> if line[29:35]==r"/FTId=":
> ft_id = line[35:70].rstrip()[:-1]
> else:
> ft_id =""
747c746
< name, from_res, to_res, old_description = self.data.features[-1]
---
> name, from_res, to_res, old_description,old_ft_id = self.data.features[-1]
754c753
< self.data.features.append((name, from_res, to_res, description))
---
> self.data.features.append((name, from_res, to_res, description,ft_id))
764c763
< descr_cols = string.split(description, " -> ")
---
> descr_cols = description.split(" -> ")
771c770
< extra_info_pos = string.find(second_seq, " (")
---
> extra_info_pos = second_seq.find(" (")
777,778c776,777
< first_seq = string.replace(first_seq, " ", "")
< second_seq = string.replace(second_seq, " ", "")
---
> first_seq = first_seq.replace(" ", "")
> second_seq = second_seq.replace(" ", "")
786c785
< cols = string.split(line)
---
> cols = line.split()
792c791
< seq = string.rstrip(string.replace(line, " ", ""))
---
> seq = line.replace(" ", "").rstrip()
797a797,798
> # from Python 2.2.2 could be replaced whith word.rstrip(".,;")
> # if there is always only one puctuation
814c815
< setattr(rec, m, string.rstrip(attr))
---
> setattr(rec, m, attr.rstrip())
823c824
< setattr(ref, m, string.rstrip(attr))
---
> setattr(ref, m, attr.rstrip())
849c850
< self.data.description = string.rstrip(self.data.description)
---
> self.data.description = self.data.description.rstrip()
852c853
< cols = string.split(line)
---
> cols = line.split()
856c857
< ids = string.split(string.rstrip(line[5:]), ';')
---
> ids = line[5:].rstrip().split(';')
861c862
< string.strip(line[5:]) + "\n"
---
> line[5:].strip() + "\n"
864c865
< seq = Seq.Seq(string.rstrip(string.replace(line, " ", "")),
---
> seq = Seq.Seq(line.replace(" ", "").rstrip(),
More information about the BioPython
mailing list