[Biopython] [Samtools-help] Segmentation fault
Martin Mokrejs
mmokrejs at fold.natur.cuni.cz
Tue Oct 18 11:44:54 UTC 2011
Before running your python code, do (under bash):
$ ulimit -c unlimited
$ python mypython.py
$ file core
$ gdb /usr/bin/python ./core
gdb> where
gdb> bt full
gdb> quit
$
Martin
Mic wrote:
> Hello,
> Thank you for your email. I updated the code and find out that
> print reads['chr1'] #works fine
> but
> print reads['chr1'][0] #caused Segmentation fault
>
> Please find below the updated code:
>
> from Bio import SeqIO
> import pysam
> from optparse import OptionParser
> import subprocess, os, sys
> from multiprocessing import Pool
> import functools
>
>
> def GetReferenceInfo(referenceFastaPath):
> referencenames = []
> referencelengths = []
> referenceFastaFile = open(referenceFastaPath)
> for record in SeqIO.parse(referenceFastaFile, "fasta"):
> referencenames.append(record.name)
> referencelengths.append(len(record.seq))
> referenceFastaFile.close()
> return (referencenames, referencelengths)
>
>
> def GenerateSubsetBAM(bam_filename, ref_name):
> reads = []
> bam_fh = pysam.Samfile(bam_filename, "rb")
>
> for read in bam_fh.fetch(ref_name):
> reads.append(read)
>
> print ref_name + ' Done ' + str(len(reads))
> return (ref_name, reads)
>
>
> if __name__ == '__main__':
> parser = OptionParser("Usage: %prog -b BAMfile -f new_reference_fasta -o
> outputBAM")
> parser.add_option("-b", "--BAM", type="string", dest="inputBAMFilepath",
> help="Specify a BAM file")
> parser.add_option("-f", "--fasta", type="string", dest="fastaFilepath",
> help="Specify a reference fasta file.")
> parser.add_option("-o", "--output", type="string",
> dest="outputBAMFilepath", help="Specify an output BAM file.")
>
> (opts, args) = parser.parse_args()
>
> if (opts.inputBAMFilepath is None):
> print ("\nSpecify a BAM file. eg. -b large.bam\n")
> parser.print_help()
> elif not(os.path.exists(opts.inputBAMFilepath)):
> print ("\nReference BAM file does not exists: " + opts.inputBAMFilepath
> +"\n")
> elif (opts.fastaFilepath is None):
> print ("\nSpecify a reference fasta file. eg. -f Subset.fasta\n")
> parser.print_help()
> elif not(os.path.exists(opts.fastaFilepath)):
> print ("\nReference fasta file does not exists: " + opts.fastaFilepath
> +"\n")
> elif os.path.exists(opts.outputBAMFilepath) and
> not(raw_input(opts.outputBAMFilepath + " exists. Overwrite? (Y/n): ")=='Y'):
> print ("\nOutput BAM exists. Please specify alternative output file.
> eg. -o Subset.bam\n")
> else:
> print "Read fasta ..."
> (ref_names, ref_lengths) = GetReferenceInfo(opts.fastaFilepath)
> print 'Done!'
>
> print "creating subset...."
> pool = Pool()
> GenerateSubsetBAM_wrapper = functools.partial(GenerateSubsetBAM,
> opts.inputBAMFilepath)
> reads = dict(pool.imap_unordered(GenerateSubsetBAM_wrapper, ref_names))
> pool.close()
> print "Done!"
>
> print reads['chr1'] #works fine
> print "xxxxx"
>
> print reads['chr1'][0] #caused Segmentation fault
>
> I run the code with the pysam-0.5 examples (pysam-0.5/tests) in the
> following way:
>
> python SubsetBAM-P.py --BAM ex1.bam -f ex1.fa -o new.bam
>
> Read fasta ...
> Done!
> creating subset....
> chr1 Done 1464
> chr2 Done 1806
> Done!
> [<csamtools.AlignedRead object at 0x2b975635d168>, ...,
> <csamtools.AlignedRead object at 0x2b35d89b6ca8>]
> xxxxx
> Segmentation fault
>
> Thank you in advance.
>
>
> On Tue, Oct 18, 2011 at 7:00 PM, Peter Cock <p.j.a.cock at googlemail.com>wrote:
>
>> On Tue, Oct 18, 2011 at 4:44 AM, Mic <mictadlo at gmail.com> wrote:
>>> Hello,
>>> I have tried to generate a subset BAM, but I get a 'Segmentation fault'
>> with
>>> the following code:
>>> from Bio import SeqIO
>>> import pysam
>>> from optparse import OptionParser
>>> import subprocess, os, sys
>>> from multiprocessing import Pool
>>> import functools
>>> ...
>>
>> I tried this and it seemed to get stuck much earlier. Could you
>> cut down the example a bit by removing the multiprocessing?
>>
>> Peter
>>
>> P.S. Also you can remove the unused "import argparse" line.
>>
> _______________________________________________
> Biopython mailing list - Biopython at lists.open-bio.org
> http://lists.open-bio.org/mailman/listinfo/biopython
>
>
More information about the Biopython
mailing list