[Biojava-l] writesequence problem
Stein Aerts
stein.aerts@esat.kuleuven.ac.be
Fri, 12 Jul 2002 14:41:40 +0200
This is a multi-part message in MIME format.
------=_NextPart_000_0034_01C229B2.3BB8F5D0
Content-Type: text/plain;
charset="iso-8859-1"
Content-Transfer-Encoding: 7bit
Hello,
There appears to be a problem when writing a sequence to file in EMBL
format, but only if it contains a multiple of 60 nucleotides (there are 60
nt's on each line): the last line of nucleotides is not written and the
nucleotide-count of the last line is incorrect.
Example code below and example sequence files in attachment.
Best regards,
Stein Aerts.
________________________________________
import org.biojava.bio.seq.io.*;
import org.biojava.bio.seq.*;
import java.io.*;
public class TestEmbl {
public static void main(String[] args) throws Exception {
Sequence seq = null;
BufferedReader br = new BufferedReader(new
FileReader("D:\\SAE\\temp\\test_in.embl"));
SequenceIterator stream = SeqIOTools.readEmbl(br);
if (stream.hasNext()){
seq = stream.nextSequence();
}
SequenceFormat format = new EmblLikeFormat();
OutputStream out = new FileOutputStream("D:\\SAE\\temp\\test_out.embl");
format.writeSequence(seq, new PrintStream(out));
}
}
------=_NextPart_000_0034_01C229B2.3BB8F5D0
Content-Type: application/octet-stream;
name="test_out.embl"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="test_out.embl"
AC ENSG00000105974;
FT exon 1100..1199
FT /end_phase=3D"2"
FT /exon_id=3D"ENSE00001085899"
FT /start_phase=3D"0"
XX
SQ Sequence 1200 BP; 328 A; 282 C; 232 G; 358 T; 0 other;
tcctttatag ttcttttata cttttgtgtc ttctctctaa ctaaataatc aactctttca =
60
gcattccatc catttccctt tctcctccct cttactccca acccacattc ccctctccat =
120
tttaatttta acctgtgccc cttcaagtgt actccagctt tttttttaaa ataatttcaa =
180
gtgatacttt gacttttgac tgcatatgga agcataagta acatgtcctt tcatttttgg =
240
ataatgagtt tcctgattaa ttacagctca agagtaaaat gactgattac tatttaattc =
300
attttgtgct tctttacaat aaagtaaaga cagaagcccc agattcagga acagacaaaa =
360
tactttaatc gctatcacat tttttttaag tctagtcaat tagaaaagtc aaatctttcc =
420
tcacagccaa gcacattaaa aaaaaatctt ctctggtaat aaacttgaag ctttaaataa =
480
ttctacaatt ataaacattt tgtgtatttt gcaaatatgg cataacctgt tggcataaaa =
540
ttccattgtt ccagaaaata tcggtaataa aattatagaa aagttaaaga tcttcatttc =
600
ttatttcgaa gcgtttggga gacatttcag aaacggatgg gaaatgttaa attctgcatg =
660
cctgcttaag tttccatcca caccgactag atgtaaacga gtgtcaccaa aagtacacca =
720
caggcaccca cacagattcc ttccataagg gatccacaaa gtttagatgt gaaatgtacc =
780
taaaggttcc tagccgtctt tcatccctcc ctctgtgaaa cagggagaca catgtgtttt =
840
aaggcagaga tggaacttgg gcgatgggcg gggggtgggg gaggtgggaa gggacggctt =
900
aggacagggc aggattgtgg attgtttctg ccgccttggt tgcccatact gggcatctct =
960
gcaggcgcgt cggctccctc cacccctgct gagatgatgc actgcgaaaa cattcgctct =
1020
ccccgggacg cctctcggtg gttcagagca gggaaaatgt tgcctcaggt ttaaaataat =
1080
ctgcccaagc accccagcgc gggagaaacg ttctcactcg ctctctgctc gctgcgggcg =
1140
=
1140
//
------=_NextPart_000_0034_01C229B2.3BB8F5D0
Content-Type: application/octet-stream;
name="test_in.embl"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="test_in.embl"
AC ENSG00000105974;
FT exon 1100..1199
FT /end_phase=3D"2"
FT /exon_id=3D"ENSE00001085899"
FT /start_phase=3D"0"
XX
SQ Sequence 1200 BP; 328 A; 283 C; 231 G; 358 T; 0 other;
tcctttatag ttcttttata cttttgtgtc ttctctctaa ctaaataatc aactctttca =
60
gcattccatc catttccctt tctcctccct cttactccca acccacattc ccctctccat =
120
tttaatttta acctgtgccc cttcaagtgt actccagctt tttttttaaa ataatttcaa =
180
gtgatacttt gacttttgac tgcatatgga agcataagta acatgtcctt tcatttttgg =
240
ataatgagtt tcctgattaa ttacagctca agagtaaaat gactgattac tatttaattc =
300
attttgtgct tctttacaat aaagtaaaga cagaagcccc agattcagga acagacaaaa =
360
tactttaatc gctatcacat tttttttaag tctagtcaat tagaaaagtc aaatctttcc =
420
tcacagccaa gcacattaaa aaaaaatctt ctctggtaat aaacttgaag ctttaaataa =
480
ttctacaatt ataaacattt tgtgtatttt gcaaatatgg cataacctgt tggcataaaa =
540
ttccattgtt ccagaaaata tcggtaataa aattatagaa aagttaaaga tcttcatttc =
600
ttatttcgaa gcgtttggga gacatttcag aaacggatgg gaaatgttaa attctgcatg =
660
cctgcttaag tttccatcca caccgactag atgtaaacga gtgtcaccaa aagtacacca =
720
caggcaccca cacagattcc ttccataagg gatccacaaa gtttagatgt gaaatgtacc =
780
taaaggttcc tagccgtctt tcatccctcc ctctgtgaaa cagggagaca catgtgtttt =
840
aaggcagaga tggaacttgg gcgatgggcg gggggtgggg gaggtgggaa gggacggctt =
900
aggacagggc aggattgtgg attgtttctg ccgccttggt tgcccatact gggcatctct =
960
gcaggcgcgt cggctccctc cacccctgct gagatgatgc actgcgaaaa cattcgctct =
1020
ccccgggacg cctctcggtg gttcagagca gggaaaatgt tgcctcaggt ttaaaataat =
1080
ctgcccaagc accccagcgc gggagaaacg ttctcactcg ctctctgctc gctgcgggcg =
1140
ctgcccaagc accccagcgc gggagaaacg ttctcactcg ctctctgctc gctgcgggcg =
1200
//
------=_NextPart_000_0034_01C229B2.3BB8F5D0--