[Biopython] Segmentation fault
Mic
mictadlo at gmail.com
Tue Oct 18 03:44:14 UTC 2011
Hello,
I have tried to generate a subset BAM, but I get a 'Segmentation fault' with
the following code:
from Bio import SeqIO
import pysam
from optparse import OptionParser
import subprocess, os, sys
from multiprocessing import Pool
import functools
import argparse
def GetReferenceInfo(referenceFastaPath):
referencenames = []
referencelengths = []
referenceFastaFile = open(referenceFastaPath)
for record in SeqIO.parse(referenceFastaFile, "fasta"):
referencenames.append(record.name)
referencelengths.append(len(record.seq))
referenceFastaFile.close()
return (referencenames, referencelengths)
def GenerateSubsetBAM(bam_filename, ref_name):
reads = []
bam_fh = pysam.Samfile(bam_filename, "rb")
for read in bam_fh.fetch(ref_name):
reads.append(read)
print ref_name + ' Done ' + str(len(reads))
return (ref_name, reads)
def writeBAM(reads, ref_names, ref_lengths, output_BAM):
#print ref_names
#print ref_lengths
#print output_BAM
#with pysam.Samfile(output_BAM, "wb", referencenames = ref_names,
referencelengths = ref_lengths) as bh:
bh = pysam.Samfile(output_BAM, "wb", referencenames = ref_names,
referencelengths = ref_lengths)
print reads.keys()
for ref_name in ref_names:
print ref_name
for read in reads[ref_name]:
print read
#bh.write(read)
print ref_name + 'Done'
if __name__ == '__main__':
parser = OptionParser("Usage: %prog -b BAMfile -f new_reference_fasta -o
outputBAM")
parser.add_option("-b", "--BAM", type="string", dest="inputBAMFilepath",
help="Specify a BAM file")
parser.add_option("-f", "--fasta", type="string", dest="fastaFilepath",
help="Specify a reference fasta file.")
parser.add_option("-o", "--output", type="string",
dest="outputBAMFilepath", help="Specify an output BAM file.")
(opts, args) = parser.parse_args()
if (opts.inputBAMFilepath is None):
print ("\nSpecify a BAM file. eg. -b large.bam\n")
parser.print_help()
elif not(os.path.exists(opts.inputBAMFilepath)):
print ("\nReference BAM file does not exists: " + opts.inputBAMFilepath
+"\n")
elif (opts.fastaFilepath is None):
print ("\nSpecify a reference fasta file. eg. -f Subset.fasta\n")
parser.print_help()
elif not(os.path.exists(opts.fastaFilepath)):
print ("\nReference fasta file does not exists: " + opts.fastaFilepath
+"\n")
elif os.path.exists(opts.outputBAMFilepath) and
not(raw_input(opts.outputBAMFilepath + " exists. Overwrite? (Y/n): ")=='Y'):
print ("\nOutput BAM exists. Please specify alternative output file.
eg. -o Subset.bam\n")
else:
print "Read fasta ..."
(ref_names, ref_lengths) = GetReferenceInfo(opts.fastaFilepath)
print 'Done!'
print "creating subset...."
pool = Pool()
GenerateSubsetBAM_wrapper = functools.partial(GenerateSubsetBAM,
opts.inputBAMFilepath)
reads = dict(pool.imap_unordered(GenerateSubsetBAM_wrapper, ref_names))
pool.close()
print "Done!"
print "Writting results to subset BAM file..."
writeBAM(reads, ref_names, ref_lengths, opts.outputBAMFilepath)
print "Done!"
I run the code in the following way:
python SubsetBAM-P.py --BAM ex1.bam -f ex1.fa -o new.bamRead fasta ...
Done!
creating subset....
chr1 Done 1464
chr2 Done 1806
Done!
Writting results to subset BAM file...
['chr2', 'chr1']
chr1
Segmentation fault
Thank you in advance.
More information about the Biopython
mailing list