[Biopython] multiprocessing problem with pysam

Michal mictadlo at gmail.com
Sun Apr 10 02:18:38 UTC 2011


Hello,
I have tried to rewrite the following code from 
http://wwwfgu.anat.ox.ac.uk/~andreas/documentation/samtools/api.html
----------------------------
import pysam
samfile = pysam.Samfile("ex1.bam", "rb" )
for pileupcolumn in samfile.pileup( 'chr1', 100, 120):
     print
     print 'coverage at base %s = %s' % (pileupcolumn.pos , pileupcolumn.n)
     for pileupread in pileupcolumn.pileups:
         print '\tbase in read %s = %s' % (pileupread.alignment.qname, 
pileupread.alignment.seq[pileupread.qpos])

samfile.close()
----------------------------

with the following multiprocessing code:

----------------------------
import pysam
import os
from multiprocessing import Pool
from pprint import pprint

class Pileup_info():
     def __init__(pileup_pos, coverage):
         self.pileup_pos = pileup_pos
         self.coverage = coverage

     reads = []

class Reads_info():
     def __init__(read_name, read_base):
         self.read_name = read_name
         self.read_base = read_base

def calc_pileup(samfile, reference_name, start_pos, end_pos):
     coverages = []
     print reference_name, os.getpid()
         for pileupcolumn in samfile.pileup(reference_name, start_pos, 
end_pos):
                 pileup_inf = Pileup_info(pileupcolumn.pos, pileupcolumn.n)
                 #print 'coverage at base %s = %s' % (pileupcolumn.pos , 
pileupcolumn.n)
                 for pileupread in pileupcolumn.pileups:
                     #print '\tbase in read %s = %s' % 
(pileupread.alignment.qname, pileupread.alignment.seq[pileupread.qpos])
             
pileup_inf.reads.append(Reads_info(pileupread.alignment.qname, 
pileupread.alignment.seq[pileupread.qpos]))
         coverages.append(pileup_inf)

     return (reference_name, coverages)

def output(coverage):
     #for
     print
     print

if __name__ == '__main__':
     pool = Pool()

     samfile = pysam.Samfile("ex1.bam", "rb")
     references = samfile.references

     for reference in samfile.references:
         print ">", reference
         pool.apply_async(calc_pileup, [samfile, reference, 100, 120])
     pool.close()
     pool.join()
     pprint(pool.get())


     samfile.close()
----------------------------

However, I got the following out:

----------------------------
$ python multi.py
 > chr1
 > chr2
Process PoolWorker-1:
Traceback (most recent call last):
Process PoolWorker-2:
   File 
"/home/mictadlo/apps/python/lib/python2.7/multiprocessing/process.py", 
line 232, in _bootstrap
Traceback (most recent call last):
   File 
"/home/mictadlo/apps/python/lib/python2.7/multiprocessing/process.py", 
line 232, in _bootstrap
     self.run()
     self.run()
   File 
"/home/mictadlo/apps/python/lib/python2.7/multiprocessing/process.py", 
line 88, in run
   File 
"/home/mictadlo/apps/python/lib/python2.7/multiprocessing/process.py", 
line 88, in run
     self._target(*self._args, **self._kwargs)
   File 
"/home/mictadlo/apps/python/lib/python2.7/multiprocessing/pool.py", line 
59, in worker
     self._target(*self._args, **self._kwargs)
   File 
"/home/mictadlo/apps/python/lib/python2.7/multiprocessing/pool.py", line 
59, in worker
     task = get()
     task = get()
   File 
"/home/mictadlo/apps/python/lib/python2.7/multiprocessing/queues.py", 
line 352, in get
   File 
"/home/mictadlo/apps/python/lib/python2.7/multiprocessing/queues.py", 
line 352, in get
     return recv()
   File "csamtools.pyx", line 446, in csamtools.Samfile.__cinit__ 
(pysam/csamtools.c:4791)
     return recv()
   File "csamtools.pyx", line 446, in csamtools.Samfile.__cinit__ 
(pysam/csamtools.c:4791)
   File "csamtools.pyx", line 459, in csamtools.Samfile._open 
(pysam/csamtools.c:5148)
   File "csamtools.pyx", line 459, in csamtools.Samfile._open 
(pysam/csamtools.c:5148)
TypeError: _open() takes at least 1 positional argument (0 given)
TypeError: _open() takes at least 1 positional argument (0 given)
----------------------------

Where did I do mistakes?

Thank you in advance.

Michal





More information about the Biopython mailing list