[Biojava-l] problem in format.writesequence for EmblLikeFormat

saerts@mailserv.esat.kuleuven.ac.be saerts@mailserv.esat.kuleuven.ac.be
Fri, 12 Jul 2002 16:05:32 +0200 (CEST)


Hello,
There appears to be a problem when writing a sequence to file in EMBL format, 
but only if it contains a multiple of 60 nucleotides (there are 60 nt's on each 
line) -> the last line of nucleotides is not written and the nucleotide-count 
of the last line is incorrect. 

Example code and example sequence files below. The line from 1141 to 1200 
disappears.

Best regards,
Stein Aerts.

________________________________________

import org.biojava.bio.seq.io.*;
import org.biojava.bio.seq.*;
import java.io.*;

public class TestEmbl {


  public static void main(String[] args) throws Exception {
    Sequence seq = null;
    BufferedReader br = new BufferedReader(new FileReader
("D:\\SAE\\temp\\test_in.embl"));
    SequenceIterator stream = SeqIOTools.readEmbl(br);
    if (stream.hasNext()){
      seq = stream.nextSequence();
    }
    SequenceFormat format = new EmblLikeFormat();
    OutputStream out = new FileOutputStream("D:\\SAE\\temp\\test_out.embl");
    format.writeSequence(seq, new PrintStream(out));
  }
}

___________________________________________
SEQUENCE IN:

AC   ENSG00000105974;
FT   exon            1100..1199
FT                   /end_phase="2"
FT                   /exon_id="ENSE00001085899"
FT                   /start_phase="0"
XX
SQ   Sequence 1200 BP; 328 A; 283 C; 231 G; 358 T; 0 other;
     tcctttatag ttcttttata cttttgtgtc ttctctctaa ctaaataatc aactctttca        60
     gcattccatc catttccctt tctcctccct cttactccca acccacattc ccctctccat       120
     tttaatttta acctgtgccc cttcaagtgt actccagctt tttttttaaa ataatttcaa       180
     gtgatacttt gacttttgac tgcatatgga agcataagta acatgtcctt tcatttttgg       240
     ataatgagtt tcctgattaa ttacagctca agagtaaaat gactgattac tatttaattc       300
     attttgtgct tctttacaat aaagtaaaga cagaagcccc agattcagga acagacaaaa       360
     tactttaatc gctatcacat tttttttaag tctagtcaat tagaaaagtc aaatctttcc       420
     tcacagccaa gcacattaaa aaaaaatctt ctctggtaat aaacttgaag ctttaaataa       480
     ttctacaatt ataaacattt tgtgtatttt gcaaatatgg cataacctgt tggcataaaa       540
     ttccattgtt ccagaaaata tcggtaataa aattatagaa aagttaaaga tcttcatttc       600
     ttatttcgaa gcgtttggga gacatttcag aaacggatgg gaaatgttaa attctgcatg       660
     cctgcttaag tttccatcca caccgactag atgtaaacga gtgtcaccaa aagtacacca       720
     caggcaccca cacagattcc ttccataagg gatccacaaa gtttagatgt gaaatgtacc       780
     taaaggttcc tagccgtctt tcatccctcc ctctgtgaaa cagggagaca catgtgtttt       840
     aaggcagaga tggaacttgg gcgatgggcg gggggtgggg gaggtgggaa gggacggctt       900
     aggacagggc aggattgtgg attgtttctg ccgccttggt tgcccatact gggcatctct       960
     gcaggcgcgt cggctccctc cacccctgct gagatgatgc actgcgaaaa cattcgctct      1020
     ccccgggacg cctctcggtg gttcagagca gggaaaatgt tgcctcaggt ttaaaataat      1080
     ctgcccaagc accccagcgc gggagaaacg ttctcactcg ctctctgctc gctgcgggcg      1140
     ctgcccaagc accccagcgc gggagaaacg ttctcactcg ctctctgctc gctgcgggcg      1200
//

SEQUENCE OUT:

AC   ENSG00000105974;
FT   exon            1100..1199
FT                   /end_phase="2"
FT                   /exon_id="ENSE00001085899"
FT                   /start_phase="0"
XX
SQ   Sequence 1200 BP; 328 A; 282 C; 232 G; 358 T; 0 other;
     tcctttatag ttcttttata cttttgtgtc ttctctctaa ctaaataatc aactctttca        60
     gcattccatc catttccctt tctcctccct cttactccca acccacattc ccctctccat       120
     tttaatttta acctgtgccc cttcaagtgt actccagctt tttttttaaa ataatttcaa       180
     gtgatacttt gacttttgac tgcatatgga agcataagta acatgtcctt tcatttttgg       240
     ataatgagtt tcctgattaa ttacagctca agagtaaaat gactgattac tatttaattc       300
     attttgtgct tctttacaat aaagtaaaga cagaagcccc agattcagga acagacaaaa       360
     tactttaatc gctatcacat tttttttaag tctagtcaat tagaaaagtc aaatctttcc       420
     tcacagccaa gcacattaaa aaaaaatctt ctctggtaat aaacttgaag ctttaaataa       480
     ttctacaatt ataaacattt tgtgtatttt gcaaatatgg cataacctgt tggcataaaa       540
     ttccattgtt ccagaaaata tcggtaataa aattatagaa aagttaaaga tcttcatttc       600
     ttatttcgaa gcgtttggga gacatttcag aaacggatgg gaaatgttaa attctgcatg       660
     cctgcttaag tttccatcca caccgactag atgtaaacga gtgtcaccaa aagtacacca       720
     caggcaccca cacagattcc ttccataagg gatccacaaa gtttagatgt gaaatgtacc       780
     taaaggttcc tagccgtctt tcatccctcc ctctgtgaaa cagggagaca catgtgtttt       840
     aaggcagaga tggaacttgg gcgatgggcg gggggtgggg gaggtgggaa gggacggctt       900
     aggacagggc aggattgtgg attgtttctg ccgccttggt tgcccatact gggcatctct       960
     gcaggcgcgt cggctccctc cacccctgct gagatgatgc actgcgaaaa cattcgctct      1020
     ccccgggacg cctctcggtg gttcagagca gggaaaatgt tgcctcaggt ttaaaataat      1080
     ctgcccaagc accccagcgc gggagaaacg ttctcactcg ctctctgctc gctgcgggcg      1140
                                                                            1140
//