[Biopython] Segmentation fault

Mic mictadlo at gmail.com
Tue Oct 18 03:44:14 UTC 2011


Hello,
I have tried to generate a subset BAM, but I get a 'Segmentation fault' with
the following code:

from Bio import SeqIO
import pysam
from optparse import OptionParser
import subprocess, os, sys
from multiprocessing import Pool
import functools
import argparse


def GetReferenceInfo(referenceFastaPath):
  referencenames = []
  referencelengths = []
  referenceFastaFile = open(referenceFastaPath)
  for record in SeqIO.parse(referenceFastaFile, "fasta"):
    referencenames.append(record.name)
    referencelengths.append(len(record.seq))
  referenceFastaFile.close()
  return (referencenames, referencelengths)


def GenerateSubsetBAM(bam_filename, ref_name):
    reads = []
    bam_fh = pysam.Samfile(bam_filename, "rb")

    for read in bam_fh.fetch(ref_name):
        reads.append(read)

    print ref_name + ' Done ' + str(len(reads))
    return (ref_name, reads)


def writeBAM(reads, ref_names, ref_lengths, output_BAM):
    #print ref_names
    #print ref_lengths
    #print output_BAM
    #with pysam.Samfile(output_BAM, "wb", referencenames = ref_names,
referencelengths = ref_lengths) as bh:
    bh = pysam.Samfile(output_BAM, "wb", referencenames = ref_names,
referencelengths = ref_lengths)

    print reads.keys()
    for ref_name in ref_names:
        print ref_name
        for read in reads[ref_name]:
            print read
            #bh.write(read)
        print ref_name + 'Done'


if __name__ == '__main__':
  parser = OptionParser("Usage: %prog -b BAMfile -f new_reference_fasta -o
outputBAM")
  parser.add_option("-b", "--BAM", type="string", dest="inputBAMFilepath",
help="Specify a BAM file")
  parser.add_option("-f", "--fasta", type="string", dest="fastaFilepath",
help="Specify a reference fasta file.")
  parser.add_option("-o", "--output", type="string",
dest="outputBAMFilepath", help="Specify an output BAM file.")

  (opts, args) = parser.parse_args()

  if (opts.inputBAMFilepath is None):
    print ("\nSpecify a BAM file. eg. -b large.bam\n")
    parser.print_help()
  elif not(os.path.exists(opts.inputBAMFilepath)):
    print ("\nReference BAM file does not exists: " + opts.inputBAMFilepath
+"\n")
  elif (opts.fastaFilepath is None):
    print ("\nSpecify a reference fasta file.  eg. -f Subset.fasta\n")
    parser.print_help()
  elif not(os.path.exists(opts.fastaFilepath)):
    print ("\nReference fasta file does not exists: " + opts.fastaFilepath
+"\n")
  elif os.path.exists(opts.outputBAMFilepath) and
not(raw_input(opts.outputBAMFilepath + " exists. Overwrite? (Y/n): ")=='Y'):
    print ("\nOutput BAM exists. Please specify alternative output file.
 eg. -o Subset.bam\n")
  else:
    print "Read fasta ..."
    (ref_names, ref_lengths) = GetReferenceInfo(opts.fastaFilepath)
    print 'Done!'

    print "creating subset...."
    pool = Pool()
    GenerateSubsetBAM_wrapper = functools.partial(GenerateSubsetBAM,
opts.inputBAMFilepath)
    reads = dict(pool.imap_unordered(GenerateSubsetBAM_wrapper, ref_names))
    pool.close()
    print "Done!"

    print "Writting results to subset BAM file..."
    writeBAM(reads, ref_names, ref_lengths, opts.outputBAMFilepath)
    print "Done!"


I run the code in the following way:

python SubsetBAM-P.py --BAM ex1.bam -f ex1.fa -o new.bamRead fasta ...
Done!
creating subset....
chr1 Done 1464
chr2 Done 1806
Done!
Writting results to subset BAM file...
['chr2', 'chr1']
chr1
Segmentation fault

Thank you in advance.



More information about the Biopython mailing list