from Bio import SeqIO
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
from Bio import Entrez


#Open file to blast
file = "ryr2fasta.txt"

#Blast, save copy
record = SeqIO.read(open(file), format="fasta")
result_handle = NCBIWWW.qblast("blastp", "nr", record.seq.tostring(), hitlist_size=200)

blast_results = result_handle.read()
save_file = open(file[:-4]+"123.xml", "w")
save_file.write(blast_results)
save_file.close()

result_handle = open(file[:-4]+"123.xml")

#Load the blast record
blast_records = NCBIXML.parse(result_handle)
blast_record = blast_records.next()


output = {}

for x in blast_record.alignments:
    for hsp in x.hsps:
        output[x.accession] = [x.title]
        output[x.accession].extend([x.length])
        output[x.accession].extend([hsp.score])

for x in output:
    handle = Entrez.efetch(db="protein", id=x, rettype="genbank")
    record = SeqIO.parse(handle, "genbank")
    recurd = record.next()
    output[x].insert(0, recurd.id)
    output[x].insert(1, recurd.annotations["source"])
    output[x].extend([recurd.seq.tostring()])

#print output
save_file = open(file[:-4]+"123.csv", "w")

#Generate CSV
for item in output:
   # save_file.write('%s,%s,%s\n' % (output[item][0],output[item][1],output[item][2]))
    save_file.write('%s,%s,%s,%s,%s,%s\n' % (output[item][0],output[item][1],output[item][2],output[item][3],output[item][4],output[item][5]))
save_file.close()