[Biopython-dev] hmmpfam parser
Wagied
Wagied.Davids at ebc.uu.se
Fri Jan 30 15:48:10 EST 2004
Hi,
I have some code which is able to parse hmmer output,
as well as code donated by Joanne Adamkewicz from Exilexis.
If you guys/gals find it useful, updates and modification will be done!
Wagied Davids
Dept.of Molecular Evolution
Uppsala University
Sweden
-------------- next part --------------
###########################################################################
# Copyright (c) 1997-2004 Exelixis Pharmaceuticals, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
# CONSEQUENTIAL DAMAGES OR ANY CLAIM, DAMAGES OR OTHER LIABILITY WHATSOEVER,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
# USE, PERFORMANCE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# Module Notes :
#
# hmmpfam related routines
#
# Original Authors : Joanne Adamkewicz, Darren Platt
#
###########################################################################
import re
# To make valid hyperlinks, append the domain accession number to the end of the url string.
accno2url = {'PF': 'http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?',
'SM': 'http://smart.embl-heidelberg.de/smart/do_annotation.pl?BLAST=DUMMY&ACC=',
'CO': 'http://www.ncbi.nlm.nih.gov/cgi-bin/COG/COG_info.plx?',
'TI': 'http://www.tigr.org/tigr-scripts/CMR2/hmm_report.spl?user=access&password=access&acc='
}
def parseHmmpfam(outputfile, debug=0):
"""
Purpose:
hmmpfam is one of the algorithms in Sean Eddy's HMMER software. This function will
parse hmmpfam output that was generated either WITH or WITHOUT the --acc option
If with, the first column of every hit and domain contains the accno (PF00595)
If without, the 1st column contains the domain name (pkinase). You can't get both
in the same output. Whichever one is present is returned as the first element in
each tuple. In the code below, the variable is 'accno' for simplicity.
**NOTE THAT NAMES DO NOT NECESSARILY UNIQUELY DETERMINE ACCNOS, if you are running
against an HMM library with models from more than one source. i.e., names are unique
within Pfam, but not when you merge both Pfam and Smart models. Example:
SM00542 and PF05965 both have the name 'FYRC'. Therefore, if both are in your library,
you can't determine which model gave the hit unless you use --acc.
Also, Pfam itself changes the names of its models occasionally, but accnos are stable.
You are STRONGLY RECOMMENDED to get in the habit of running hmmpfam with the --acc option!
Arguments:
outputfile - (string) - path to text file containing raw hmmpfam output
debug - 0 or 1 - whether to print debugging statements to stdout
Returns:
[A,B] list, where A and B are each tuples of data as follows:
A = hit results - 5 element tuple of:
accno of hmm - string
score - float
evalue - float
N (number of occurences of hmm domain in the seq) - int
description of hmm - string
B = domain results - 11 element tuple of:
accno of hmm - string
x/y - string (e.g. 1/2 = '1 of 2' -- identifies which occurence of the domain)
seq-from - int
seq-to - int
model-from - int
model-to - int
score - float
eval - float
modelAlign - seq string of the model in the alignment
matchString - consensus string for model<->seq alignment
subjectAlign - seq string of sequence in the alignment
Note that proteins with no domain hits will return ( [], [] )
Raises:
'ParseError', with error message to indicate the problem
"""
if debug:
print "Now opening file '%s'" % outputfile
s = open(outputfile, 'r')
trigger="Description.*Score.*E-value.*N" # header for first data chart
hits = [] # will be returned
domains = [] # temporary data holder
domains2 = [] # will be returned
stage = 0 # parser has 5 stages, from top to bottom
while 1:
line = s.readline()
if debug:
print line
print "stage = %s" % stage
if not line or line == None:
# EOF
break
if stage == 0:
#
# Passing over all the header stuff until we get to the first data table
#
if re.search(trigger,line):
s.readline()
if debug: print "**Change to stage 1**"
stage = 1
elif stage == 1:
#
# Reading hits from first data table
#
if line.find('no hits') != -1:
# This protein was not hit by any models
# return empty lists
if debug: print "No hits found!"
return (hits, domains)
cols = line.split()
if len(cols) == 0:
# Blank line = end of table
if debug: print "**Change to stage 2**"
stage = 2
else:
accno = cols[0] # if run without --acc option, this is actually model name, not accno
desc = ' '.join(cols[1:-3]) # note that model descriptions are truncated to 38 char in raw output
score = float(cols[-3])
eval = float(cols[-2])
n = int(cols[-1])
hits.append( (accno, score, eval, n, desc) )
elif stage == 2:
#
# Waiting for domains
#
if line.find('Domain') != -1:
stage = 3
if debug: print "**Change to stage 3**"
s.readline()
elif stage == 3:
#
# Parsing domains
#
cols = line.split()
if len(cols) == 0:
stage = 4
if debug: print "**Change to stage 4**"
else:
accno = cols[0]
count = cols[1] # x of y
seqFrom = int(cols[2])
seqTo = int(cols[3])
hmmFrom = int(cols[5])
hmmTo = int(cols[6])
score = float(cols[-2])
eval = float(cols[-1])
# Note this 8-element tuple doesn't include the alignment
# strings, because we haven't gotten there yet.
domains.append( (accno, count, seqFrom, seqTo, hmmFrom, hmmTo, score, eval) )
elif stage == 4:
#
# Waiting for alignments
#
if line.startswith('Alignments of top-scoring domains'):
stage = 5
if debug: print "**Change to stage 5**"
elif stage == 5:
#
# Parsing alignments - see sample raw output for help understanding this code
#
# At the start of each new model hit alignment, there is an info line with accno,
# score, and e-value of the hit. Then comes the alignment itself in three-line
# groups (model, consensus, sequence), with a blank line between each group, until
# the end of the domain is reached.
# The parser numbers the lines with variable names as follows:
# one - the info line, only occurs once per domain
# two - model sequence
# three - consensus sequence
# four - protein sequence
# five - blank line
# Thus, two three four and five will repeat several times for each 'one' line.
# Each time, we will grab and append the contents to the variable of the appropriate name.
#
# Note that the model sequence begins and ends with a * character, which helps us
# find the last group-of-three in our parsing.
#
# One complication: for some domain outputs, an extra 'RF' line is inserted above
# the model sequence ('two') line in each group of three, making it a group of four.
# This parser can handle that case; it ignores the line (doesn't return it).
#
one = line # get the current line
if one.strip() == '//':
# End-of-output marker found
return (hits,domains2);
two=''
three=''
four=''
check=s.readline()
# check for, and ignore, the extra 'RF' lines as they appear
if check.strip() and check.split()[0] == 'RF':
check=s.readline()
st=check.index('*')
begin= check.strip()
end=len(begin)
if begin.strip() and begin.split()[0] == 'RF':
begin= s.readline().strip()
if begin.count('*') == 2:
# Both the start and the end of the model sequence are on this line,
# so the entire thing only consists of one three-line group.
two = begin.strip()[3:-3] # model line starts with *-> and ends with <-*, ignore those chars
three = s.readline().strip()
four = s.readline().split()[2] # the seq line has flanking start and stop integers, we don't want those
five = s.readline()
if five.strip() != '':
raise "ParseError",'Stage 5: expected blank line'
else:
# This is the more usual case, the alignment is spread out over
# >1 three-line group.
flag=1
read =1
while (flag==1):
two = two + begin.strip() # append current 'two' line
temp=s.readline() # this is 'three'
if temp == None:
# Yikes! - hit end of file parsing, shouldn't
# happen.
raise "ParseError",'unexpected end of PFam result'
temp = temp[st:st+end] # read only the characters from * to the end
three += temp # append current 'three' line
four += s.readline().split()[2] # see note above about flanking integers
five = s.readline()
if five.strip() != '':
raise "ParseError",'Stage 5: expected blank line'
if begin.strip()[-1] == '*':
# We hit the end of the model sequence
flag=0
if read != 0:
begin= s.readline().strip()
if begin.strip() and begin.split()[0] == 'RF':
# skip the RF line
begin= s.readline().strip()
if begin.strip()[-1]=='*':
read=0
three=three[3:-1] # append the last bit we found
two=two[3:-3]
# We are done parsing this domain alignment, save the results and go to next domain
domains2.append(domains[0]+(two,three,four))
domains = domains[1:]
# All done!
return (hits, domains2)
# 'RF' example: resultid 14693470
-------------- next part --------------
hmmpfam - search one or more sequences against HMM database
HMMER 2.2g (August 2001)
Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
Freely distributed under the GNU General Public License (GPL)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
HMM file: /usr/local/biotools/lib/Pfam
Sequence file: query
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Query sequence: gi|24649473|ref|NP_651199.2|
Accession: [none]
Description: [none]
Scores for sequence family classification (score includes all domains):
Model Description Score E-value N
-------- ----------- ----- ------- ---
SM00045 Diacylglycerol kinase accessory domai 262.2 7.8e-76 1
PF00609 Diacylglycerol kinase accessory domai 237.0 2.9e-68 1
PF00781 Diacylglycerol kinase catalytic domai 167.8 2e-47 1
SM00046 Diacylglycerol kinase catalytic domai 166.6 4.6e-47 1
SM00109 Protein kinase C conserved region 1 ( 149.8 5.3e-42 3
PF00130 Phorbol esters/diacylglycerol binding 142.6 7.6e-40 3
PF00788 Ras association (RalGDS/AF-6) domain 60.5 4e-15 1
SM00314 Ras association (RalGDS/AF-6) domain 58.4 1.8e-14 2
PF03107 DC1 domain 25.0 0.00019 2
COG1597 Predicted kinase related to diacylgly -73.3 0.0053 1
SM00360 RNA recognition motif 11.0 0.0059 1
PF00076 RNA recognition motif. (a.k.a. RRM, R 11.2 0.015 1
PF00628 PHD-finger -9.1 0.09 1
SM00249 PHD zinc finger 6.2 0.18 1
SM00184 Ring finger -0.8 0.47 1
PF01500 Keratin, high sulfur B2 protein -83.9 1.1 1
PF04928 Poly(A) polymerase central domain -47.5 1.2 1
SM00361 RNA recognition motif -4.9 1.3 1
COG0284 Orotidine-5'-phosphate decarboxylase, -107.0 2.3 1
PF02376 CUT domain -42.8 3.1 1
PF00412 LIM domain -24.0 3.1 1
SM00215 von Willebrand factor (vWF) type C do -22.0 3.9 1
SM00217 Four-disulfide core domains -21.1 4 1
PF04041 Domain of unknown function (DUF377) -168.6 4.9 1
PF04014 SpoVT / AbrB like domain -14.1 5.1 1
PF03768 Attacin, N-terminal region -14.7 5.2 1
PF00219 Insulin-like growth factor binding pr -23.6 5.4 1
PF01021 TYA transposon protein -296.2 5.6 1
PF03154 Atrophin-1 family -731.6 5.8 1
SM00343 zinc finger C2HC, DNA-binding -4.4 6.3 1
PF04236 Tc5 transposase C-terminal domain -23.8 6.3 1
SM00336 B-Box-type zinc finger, protein inter -18.9 6.4 1
PF03208 Prenylated rab acceptor (PRA1) -89.5 6.5 1
PF00503 G-protein alpha subunit -308.3 6.9 1
COG0008 Glutamyl- and glutaminyl-tRNA synthet -325.3 7.1 1
PF03792 PBX domain -116.2 7.7 1
PF03302 Giardia variant-specific surface prot -292.4 8.7 1
PF04395 Poxvirus B22R protein -557.5 8.9 1
PF04396 Protein of unknown function, DUF537 -40.4 9 1
SM00463 Small MutS-related domain -20.0 9.4 1
SM00157 Major prion protein -87.0 9.5 1
PF01391 Collagen triple helix repeat (20 copi -86.6 9.5 1
Parsed for domains:
Model Domain seq-f seq-t hmm-f hmm-t score E-value
-------- ------- ----- ----- ----- ----- ----- -------
PF00130 1/3 6 55 .. 1 51 [] 74.6 2.3e-19
SM00109 1/3 6 55 .. 1 61 [] 68.3 1.8e-17
PF03107 1/2 17 48 .. 1 44 [] 4.9 0.39
PF01500 1/1 32 183 .. 1 177 [] -83.9 1.1
PF03302 1/1 36 303 .. 1 412 [] -292.4 8.7
PF00412 1/1 36 81 .. 1 62 [] -24.0 3.1
PF00130 2/3 69 116 .. 1 51 [] 13.1 0.00045
SM00109 2/3 69 116 .. 1 61 [] 30.5 4.3e-06
SM00336 1/1 77 106 .. 1 51 [] -18.9 6.4
PF03107 2/2 80 109 .. 1 44 [] 20.1 0.0057
PF00628 1/1 81 119 .. 1 51 [] -9.1 0.09
SM00249 1/1 81 152 .. 1 39 [] 6.2 0.18
SM00184 1/1 82 151 .. 1 23 [] -0.8 0.47
PF04236 1/1 91 140 .. 1 69 [] -23.8 6.3
SM00215 1/1 100 166 .. 1 105 [] -22.0 3.9
PF00130 3/3 135 185 .. 1 51 [] 54.9 1.9e-13
SM00109 3/3 135 185 .. 1 61 [] 51.0 2.9e-12
SM00217 1/1 138 174 .. 1 51 [] -21.1 4
SM00343 1/1 147 162 .. 1 17 [] -4.4 6.3
PF00219 1/1 148 197 .. 1 84 [] -23.6 5.4
PF04928 1/1 183 1018 .. 1 205 [] -47.5 1.2
PF04395 1/1 189 835 .. 1 1361 [] -557.5 8.9
PF03154 1/1 256 1058 .. 1 1046 [] -731.6 5.8
PF01021 1/1 416 830 .. 1 440 [] -296.2 5.6
PF03768 1/1 469 519 .. 1 72 [] -14.7 5.2
SM00157 1/1 678 903 .. 1 221 [] -87.0 9.5
COG0008 1/1 697 1149 .. 1 592 [] -325.3 7.1
PF03792 1/1 706 876 .. 1 209 [] -116.2 7.7
PF01391 1/1 761 818 .. 1 60 [] -86.6 9.5
PF00503 1/1 819 1019 .. 1 362 [] -308.3 6.9
SM00314 1/2 831 922 .. 1 102 [] 9.9 0.011
PF04041 1/1 887 1149 .. 1 363 [] -168.6 4.9
PF00788 1/1 923 1024 .. 1 113 [] 60.5 4e-15
SM00314 2/2 923 1024 .. 1 102 [] 48.5 1.7e-11
PF04396 1/1 937 1001 .. 1 115 [] -40.4 9
PF02376 1/1 969 1022 .. 1 88 [] -42.8 3.1
SM00361 1/1 1031 1103 .. 1 91 [] -4.9 1.3
SM00360 1/1 1031 1103 .. 1 121 [] 11.0 0.0059
PF00076 1/1 1032 1102 .. 1 77 [] 11.2 0.015
PF03208 1/1 1098 1256 .. 1 162 [] -89.5 6.5
COG1597 1/1 1117 1512 .. 1 332 [] -73.3 0.0053
PF00781 1/1 1119 1267 .. 1 154 [] 167.8 2e-47
SM00046 1/1 1119 1267 .. 1 157 [] 166.6 4.6e-47
COG0284 1/1 1333 1511 .. 1 266 [] -107.0 2.3
SM00045 1/1 1334 1489 .. 1 195 [] 262.2 7.8e-76
PF00609 1/1 1334 1489 .. 1 190 [] 237.0 2.9e-68
SM00463 1/1 1346 1416 .. 1 94 [] -20.0 9.4
PF04014 1/1 1447 1487 .. 1 47 [] -14.1 5.1
Alignments of top-scoring domains:
PF00130: domain 1 of 3, from 6 to 55: score 74.6, E = 2.3e-19
*->HrFkrttfyksptfCdhCgellwglakQGlkCsnCglnvHkrChekV
H F+ +tf ++pt+C hC +llwgl+ QG+ C++C++ +H+rC++ V
gi|2464947 6 HSFVKKTF-HKPTYCHHCSDLLWGLIQQGYICEVCNFIIHERCVSSV 51
ptnC<-*
+t+C
gi|2464947 52 VTPC 55
SM00109: domain 1 of 3, from 6 to 55: score 68.3, E = 1.8e-17
*->Hkfvfrtf.kptfCdvCrksiwgsfkqaaksqglrCseCkvkcHkkC
H+fv++tf+kpt+C++C +++wg+ +q g+ C++C++ +H++C
gi|2464947 6 HSFVKKTFhKPTYCHHCSDLLWGLIQQ-----GYICEVCNFIIHERC 47
aekvpaqshksglsC<-*
++ v +C
gi|2464947 48 VSSVVT-------PC 55
PF03107: domain 1 of 2, from 17 to 48: score 4.9, E = 0.39
*->fsCdvCerkidpgsngffYsCskeegCndeeetsdyfvhdvrCa<-*
C C + + +Y C + Cn f+++ rC+
gi|2464947 17 TYCHHCSDLLWG-LIQQGYICEV---CN--------FIIHERCV 48
PF01500: domain 1 of 1, from 32 to 183: score -83.9, E = 1.1
*->qtScCGfptCStlgtrPsCGsscCQPsCCe...SCCQpsCcqpSCCq
q+ C C C+ss P C + C +
gi|2464947 32 QGYICEV--CNFII-HERCVSSVVTP--CSgiaPCIIKNPVAHCWSE 73
PtcsqtscCqPtcfqs..........sCCrPsCcqTSCCq....PtCcqs
Pt +C +c ++++ + C ++ Cq+ P C +
gi|2464947 74 PTHHKRKFCT-VCRKRldetpavhclVCEYFAHIE---CQdfavPDCTEN 119
ssCqtgCgigGsiGyGQeGsSGAvScrirWCRPdCrvegtClPpCCvvsC
+g v + W R ++t++ C +C
gi|2464947 120 ATYVPGKELL------------NVKHQHHW-R-EGNLPSTSKCAYCKKTC 155
taPTCCqpvsaQasCCRPsCqPyCgqsCCRPaCccsvtCtrTccePc<-*
++ C + ++ +Cg C+ + P+
gi|2464947 156 WSSECLTGYRCE----------WCG-MTTHAGCRMY--------LPT 183
PF03302: domain 1 of 1, from 36 to 303: score -292.4, E = 8.7
*->CaeCklGyelsadktkcetsaPPdCkveNCkaCsnekeeNevCeeCn
C+ C +++ +c++s v C + +N v C
gi|2464947 36 CEVCN--FII---HERCVSSV-----VTPCSGIAPCIIKNPV-AHCW 71
SgfyLtpnTsqCidaCakiGnyYaqTnaqnKkiCkeCtvAnCktCedqGq
S p T C + T a C C+ + +C d +
gi|2464947 72 S----EP-THHKRKFCTVCRKRLDETPA---VHCLVCEYFAHIECQDFAV 113
cqaCndGfYksGdaCsPChes..cKTCsgGTaSdCTeCltGkaLrYGnDg
+ + +Y G + ++ + + + S+C C
gi|2464947 114 PDCTENATYVPGKELLNVKHQhhWREGNLPSTSKCAYC------------ 151
TKGtC.GegCttgtGaGPaCkTCGLtIDGtsYCSeCateteyPqNGVCtS
K tC C tg C+ CG t
gi|2464947 152 -KKTCwSSECLTGY----RCEWCGMT------------------------ 172
taaRatatCkdstvanGvCssCanGyl..rmnGGCYeTtKfPGKSVCeea
+ a C yl++ n G + +P SV +
gi|2464947 173 ----THAGCR--------------MYLptECNFGILQPIYLPPHSVSIPR 204
ngggDTCqkeapGYkldsgdLvvCSeGCktCtssTvCttCadGyvkdggs
+ +vk+ s
gi|2464947 205 TEVP--------------------------------IEAIIGVQVKSKTS 222
dv....CtkCDssCeTCTaGatttCktCaTGYYKsgtgcvkCtssesdSn
v++ +C+ D sC aG+ + +g + + +
gi|2464947 223 LVrdysCPSPDLSCPIPGAGSGSL----------TSLGLKELLELHRQ-R 261
gitGVkgClsCAPP.snnkGSV.lCYLikdss.sGGnSTNKSGLSTGAIA
l PP++ + GS++lC ss + G N
gi|2464947 262 LEQSKQHFLLSTPPtPTSCGSIsLCHSPTPSSlTVGETSN---------- 301
GIsVAvviVVGGLVGFLCWWFiCRGKA<-*
A
gi|2464947 302 -------------------------EA 303
PF00412: domain 1 of 1, from 36 to 81: score -24.0, E = 3.1
*->CagCnkpIydrevvrralnkvwHpeCFrCavCgkpLtegdefyekdg
C Cn I++r+v+ + +p++ + +k+
gi|2464947 36 CEVCNFIIHERCVS-SVV---------------TPCSGIAPCIIKNP 66
skelYC..khDyyklfg<-*
C +++ ++k+++
gi|2464947 67 -V-AHCwsEPTHHKRKF 81
PF00130: domain 2 of 3, from 69 to 116: score 13.1, E = 0.00045
*->HrFkrttfyksptfCdhCgellwglakQGlkCsnCglnvHkrChekV
H++ t ++ +fC +C+++l ++ ++C +C + +H +C+ ++
gi|2464947 69 HCWSEPTH-HKRKFCTVCRKRLDETP--AVHCLVCEYFAHIECQDFA 112
ptnC<-*
+++C
gi|2464947 113 VPDC 116
SM00109: domain 2 of 3, from 69 to 116: score 30.5, E = 4.3e-06
*->Hkfvfrtf.kptfCdvCrksiwgsfkqaaksqglrCseCkvkcHkkC
H ++++t++k++fC vCrk++ + ++ C +C + +H C
gi|2464947 69 HCWSEPTHhKRKFCTVCRKRLDETP-------AVHCLVCEYFAHIEC 108
aekvpaqshksglsC<-*
++++ + +C
gi|2464947 109 QDFAVP-------DC 116
SM00336: domain 1 of 1, from 77 to 106: score -18.9, E = 6.4
*->eraplCeeHgd..eepaeffCveedgallCrdCdeageHqanklfrg
+++ C++++++ +e+ + C ++C+ + H
gi|2464947 77 HKRKFCTVCRKrlDETPAVHC---------LVCEYF-AH-------- 105
Hrvvll<-*
+
gi|2464947 106 -----I 106
PF03107: domain 2 of 2, from 80 to 109: score 20.1, E = 0.0057
*->fsCdvCerkidpgsngffYsCskeegCndeeetsdyfvhdvrCa<-*
+ C vC++++d+ + +C + C+ yf h+ +C
gi|2464947 80 KFCTVCRKRLDE---TPAVHCLV---CE-------YFAHI-ECQ 109
PF00628: domain 1 of 1, from 81 to 119: score -9.1, E = 0.09
*->yCsvCgkvdddaggdllqCDgCdrwfHlaClgppleeppegkWlCpe
+C vC+k d + + C C+ H++C ++ +
gi|2464947 81 FCTVCRKRLD--ETPAVHCLVCEYFAHIECQDFAVP----------D 115
Ctpk<-*
Ct+
gi|2464947 116 CTEN 119
SM00249: domain 1 of 1, from 81 to 152: score 6.2, E = 0.18
*->yC.vCgk....g.llqCdkgCdrwyHv.Clgpple............
+C+vC+k+ ++ + + C C+ H++C +++ ++ +++ + +++
gi|2464947 81 FCtVCRKrldeTpAVHCL-VCEYFAHIeCQDFAVPdctenatyvpgk 126
..............epdg.wyCprCk<-*
+ + +++++ ++++ +++ +C Ck
gi|2464947 127 ellnvkhqhhwregNLPStSKCAYCK 152
SM00184: domain 1 of 1, from 82 to 151: score -0.8, E = 0.47
*->CpICle.......pvvlpCgH.FCr.Ci...................
C++C + ++++ + +l C + +C + ++ +++ + ++++
gi|2464947 82 CTVCRKrldetpaVHCLVCEYfAHIeCQdfavpdctenatyvpgkel 128
...................CPlC<-*
+ ++++++++++ +++++C C
gi|2464947 129 lnvkhqhhwregnlpstskCAYC 151
PF04236: domain 1 of 1, from 91 to 140: score -23.8, E = 6.3
*->dshdeFlTPsqYCfgvsGhvdtviCyftgCqnlaFIrCARCKkfPar
+TP+ +C+ ++ + Cq++a C +
gi|2464947 91 ------ETPAVHCLV----CE--YFAHIECQDFAVPDCTENATY--V 123
tGknfiCFnHfVvsefhacpcp<-*
Gk++ v +h +
gi|2464947 124 PGKEL-----LNVKHQHHWREG 140
SM00215: domain 1 of 1, from 100 to 166: score -22.0, E = 3.9
*->CqNAvnnGsyYppLNkGakWdDiALtGRtEDtdDCsnrCtClnGrvs
C y++++ +++++ DC+ + t G+ +
gi|2464947 100 C-------EYFAHIECQDFAV-----------PDCTENATYVPGKEL 128
lCtkvwCgpkpClLhgslsKSSnlsgeCplgqgcvpslsdqKqYtvHGDC
l k + +++ g l+ +++ C+ + +s s+ C
gi|2464947 129 LNVKHQHHWRE----GNLP----STSKCAYCKKTCWS-SE---------C 160
fsvltsP.C<-*
lt ++C
gi|2464947 161 ---LTGYrC 166
PF00130: domain 3 of 3, from 135 to 185: score 54.9, E = 1.9e-13
*->HrFkrttfyksptfCdhCgellwgla.kQGlkCsnCglnvHkrChek
H+++ ++ +s+ C++C++ +w +G++C++Cg++ H+ C +
gi|2464947 135 HHWREGNL-PSTSKCAYCKKTCWSSEcLTGYRCEWCGMTTHAGCRMY 180
VptnC<-*
+pt+C
gi|2464947 181 LPTEC 185
SM00109: domain 3 of 3, from 135 to 185: score 51.0, E = 2.9e-12
*->Hkfvfrtf.kptfCdvCrksiwgsfkqaaksqglrCseCkvkcHkkC
H++++++ ++ ++C++C+k++w+s + g+rC++C+++ H C
gi|2464947 135 HHWREGNLpSTSKCAYCKKTCWSSECLT----GYRCEWCGMTTHAGC 177
aekvpaqshksglsC<-*
+p C
gi|2464947 178 RMYLPT-------EC 185
SM00217: domain 1 of 1, from 138 to 174: score -21.1, E = 4
*->KpGsCPwvqlpiiasCplgnppnkCssDsqCpGnkKCCengCGKksC
++G P +++ + ++ C+s s+C + C CG
gi|2464947 138 REGNLP-----STSK--CAYCKKTCWS-SECLTGYRC--EWCG---- 170
ltPv<-*
+t
gi|2464947 171 MTTH 174
SM00343: domain 1 of 1, from 147 to 162: score -4.4, E = 6.3
*->kCynCGkeGHiardCpk<-*
kC +C+k+ ++++C
gi|2464947 147 KCAYCKKTC-WSSECLT 162
PF00219: domain 1 of 1, from 148 to 197: score -23.6, E = 5.4
*->CprPcGGpCpaerlarCpPgPpvaPpaecaelvredGCGCClvCArq
C+ C C + ec + r C+ C
gi|2464947 148 CA-----YCKKT----CWSS-------ECLTGYR------CEWCGMT 172
eGeaCGvytPrDeskGLyCarGaedaakaLrCrpppG<-*
+ C y+P +C++G +L+ +p
gi|2464947 173 THAGCRMYLPT------ECNFG------ILQPIYLPP 197
PF04928: domain 1 of 1, from 183 to 1018: score -47.5, E = 1.2
RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
*->stkqyGvtkpislagpkekdvkltesLieeLkefgsf..........
++ ++G+ +pi l+ p++ + +te ie++ +++ ++++ ++ +
gi|2464947 183 TECNFGILQPIYLP-PHSVSIPRTEVPIEAIIGVQVKsktslvrdys 228
RF
..................................................
++++ + + ++ ++++ ++ + ++ + ++++ ++++++ +++++++
gi|2464947 229 cpspdlscpipgagsgsltslglkellelhrqrleqskqhfllstpptpt 278
RF
..................................................
+ ++ + +++++++ + ++++++ ++++++++++++++++++++++++
gi|2464947 279 scgsislchsptpssltvgetsneaeqdrerdqdqpeeepeeenteqdsa 328
RF
..................................................
+ ++++++ ++ ++ ++ +++ + ++ ++ ++++++++++ ++++
gi|2464947 329 lqlttstsnvignlqkwpsansslhllytnlfrklgqgkrrrkrgissgg 378
RF
..................................................
++++++++ +++ + ++++ +++ ++ + ++++ +++++++ ++++
gi|2464947 379 lspsededdvdggvcdisggdlsddydhcdvalrrrslrsrqprdvsetd 428
RF
..................................................
+++ + + ++++ ++++ ++++++++ +++++ +++ + ++ + +++
gi|2464947 429 yhgdaeaeaegetvprescyetsdtggeltntddldsslnlisnlsynss 478
RF
..................................................
++++ + +++ + ++ +++ +++++ ++++++ + ++++++++++
gi|2464947 479 nnsnacnvpggatapdarntattsttapgksghalsvqggrqqpktgala 528
RF
..................................................
+ ++++++ ++++ ++++++ +++ +++++++ ++ +++ + + + +
gi|2464947 529 qikpkpkpilmpkhkaqgkggslssplsnsnssdcssaspsapatllqls 578
RF
..................................................
+ +++++ +++ + ++ ++ +++ +++++++++++ + +++++ +
gi|2464947 579 pvgrsksfqesaaitavsrykkygrglfqrrrskrspknavgvggksnys 628
RF
..................................................
++ +++ + + ++++++ ++ +++ + ++ + ++ +++ + ++ +
gi|2464947 629 ldrlsqnieitiqdedgnfhpyddnyhmlagrldatdvdddvgfddlyld 678
RF
..................................................
+++++ +++ ++ ++++ +++++ ++ ++++ ++ ++ +++ + +
gi|2464947 679 drpsgasddvafagdisdggassrsrasdasdghvlgrllrqvrqglsvg 728
RF
..................................................
++++ ++++ ++ +++ +++++++ +++++ ++ ++++++++++ ++++
gi|2464947 729 wrkpryqkrrarsiseefssgdtprfkdeesaskaesghgpssggagggg 778
RF
..................................................
++++ ++++ + + + ++++++ +++++++++++++++++++++++++
gi|2464947 779 gsggaggssaagasasaaggssghyrpdsgsghksdksekdrekkekere 828
RF xxxxxxxxxxxxxxxxxxxxxxxxxxx
.......................eilkLVPnkyevFrltLraiKlWAkrr
+++ + + +++++ ++++ + i+++ + +++Lra + +
gi|2464947 829 ekdiemikvfdgnnsfrrqqyrvIIVQRTYTLEQLLTTALRAFHITRDPQ 878
RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
giYsNvlGFlgGvaWAiLVAriCQlYPnavpstlvekfFlvfsqWlrhnw
+ Y l + G + + P + l k ++ + h++
gi|2464947 879 AFYLTDLYAPAGMEDTPMLDPT----PVLNLVHLEGKRPAIYLRF--HDR 922
RF xxx xxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxx
pnP...VlLkeinsdsieernlqvrvWdprknk...sDricyhlmPiiTP
+ +V+ ++++ +e+ +v+v ++ + k+ +D+ ++ +
gi|2464947 923 DRGhvrVYPGKLQCSMLEDPYVSVPVDNSTVIKdliRDA--LDKFGLQDN 970
RF xx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Ay.PqqnstynVsestlkvileefkrgleildeielgkaeWskLfeka<-
+ + +s + + il+ +r +i++++ + ++ ++L+ +
gi|2464947 971 QIqDYRCSEVLLDRGVTERILSWNERPWDIMKQLGKDSIRQMELMRFY 1018
RF
*
gi|2464947 - -
PF04395: domain 1 of 1, from 189 to 835: score -557.5, E = 8.9
*->lfsltilaIyiliteSegyetClRKtplYHdtqkkiepKentDhkAs
+++ ++l+ +++S + + +p e +
gi|2464947 189 ILQPIYLP---PHSVS---------------IPRTEVPIEAI----- 212
AtykYLsiaekkEkerflesFnWtkIkeeVKdaFirkCdlssnkdRLdgv
I VK+ + +
gi|2464947 213 -------------------------IGVQVKS--------K-------TS 222
ykYNYtiaysltVskksekktkgtdiestykkitknivastlslskvdee
+ +Y+ s + s + + ++ +s ++k + ++ + l+ sk
gi|2464947 223 LVRDYS-CPSPDLSCPIP-GAGSGSLTSLGLKELLELHRQRLEQSK---- 266
yTFttiiyaTvtssleTsSvPiddrSsdyvntiaikiLikvLdVNETele
gi|2464947 - -------------------------------------------------- -
aylisnESLimAkyinttknkdskvdfnlPkvehitYenskCnNiTvdkV
++P+ + ++s C T +
gi|2464947 267 ----------------------QHFLLSTPPTPTSCGSISLCHSPTPSSL 294
tIGnFSvidvdsaenakedIrIiFkGvStsdPYvdSDkfieCitkkInnc
t+G +++ ae+ e d D+ +e + ++
gi|2464947 295 TVGE----TSNEAEQDRE---------------RDQDQPEE---EPEEEN 322
knsndvkgkvkveKsvTsNCekCsMgLMaeVtsvPeEFnnTLKenGikdD
++ + + + +v++N L++ +
gi|2464947 323 TEQDSALQLTTSTSNVIGN----------------------LQKWPSANS 350
dlteLYNFYlCmltnnddCseYvpLtekikedtlksLssYsliktsrsRr
l L Y +L +k ++ + Rr
gi|2464947 351 SLHLL----------------YTNLFRK--------------LGQGKRRR 370
KsRPRRnAGDsRDtdeeteiS.sEdLe......CmYlsYdtddDDDredd
K R + + +S+sEd ++ +++ C d +dD
gi|2464947 371 K----------RGIS-SGGLSpSEDEDdvdggvCDISGGDLSDD------ 403
drydqCvnspekeItaKsRkkRsdseeknekRKqsYKnRPKRsLdddltd
yd+C sR +R se + ++ d +
gi|2464947 404 --YDHC-DVALRRRSLRSRQPRDVSETDYHG-------------DAEAEA 437
ylKKyLgie.eVIPkkAsHlQVGistsYgkseedgViGDs.....sIysd
e+e P++ + ts ++e ++ D+ +++ ++s+
gi|2464947 438 --------EgETVPRESCY-----ETSDTGGELTNT--DDldsslNLISN 472
vKdrAkkllekimPsvPldTdpeslyakirkptkikLPpdsKnivtealr
++ +++ + + +vP ++ + + + ++t
gi|2464947 473 LSYNSSN--NSNACNVPGGATAPDARNTATTSTT---------------- 504
siieqKqeSvkevLkteselssssieeaetegkskhkssveteivvLskd
+gks h +sv
gi|2464947 505 -----------------------------APGKSGHALSVQGG------- 518
DldvkenysrkglvsriddepvyedirsvdrlkekirdyrkkGGkkkess
r +++ ++ + +k + k +Gk +
gi|2464947 519 ---------RQQPKTG-------ALAQIKPKPKPILMPKHKAQGK----G 548
isvlkevsrtssgmfdvDtStvvvkPsrrkitsasrnfessskpsrrlss
s+ ss+++++++S+ + + + t + + sk+ ++
gi|2464947 549 GSL-------SSPLSNSNSSDCSSASPSAPATLLQLSPVGRSKSFQESAA 591
develeyeknyrdSlepekssssrkrCkrglnkAvcaiLgrvplpeknnn
+ ++ y k+y +++ r+r kr+ + Av +++ n
gi|2464947 592 ITAVSRY-KKY------GRGLFQRRRSKRSPKNAV--------GVGGKSN 626
dvvkdaravssvvdskrsSSaslySllPgvdtgeAAaagniardRqanaq
ySl+ + +e
gi|2464947 627 -----------------------YSLDRLSQNIE---------------- 637
venesitTPltRraaaaRrfqqGRvpdrgetnlvnelqklpls...tsqL
+ i++ + + ++ + + l ++ + t
gi|2464947 638 ---ITIQDEDGNFH-----------------PYDDNYHMLAGRldaTDVD 667
snsvykeavqlstsgdesllqvpqRpsqsvvqgstPvrpsPPlpPardrl
++ + +++ ++++ s+ + +++ + +s ++r s+
gi|2464947 668 DDVGFDDLYLDDRPSGASDDVAFAGDISD-GGASSRSRASDASDG----- 711
rrPlaAiipedsipkskgipkvvsprlRrStsGvvcGMlQSkvksdgtYs
v+++ lR G + G +
gi|2464947 712 --------------------HVLGRLLRQVRQGLSVGWRKPR-------- 733
LvqlPiDGYPGnPArRPLPRIPiRsDssDssDHiYEtiGsRsRsYAGssG
Y +Rs s ssG
gi|2464947 734 ----------------------------------YQKRRARSISEEFSSG 749
.tHYnAiegSssdagsiessslesssgipkdkvvvgdrSgtssGGrrsGR
+t e S s a s +++ Sg+ +GG++sG
gi|2464947 750 dTPRFKDEESASKAESGHGP-----------------SSGGAGGGGGSGG 782
rnsvrseSgySsddsevsmEGSVYqPSiKElnsksskkYkekMkkISsSf
++ + ++ s + + ss+ Y+ + + +
gi|2464947 783 AGGSSA-----AGASASA-------------AGGSSGHYRPD-SGSGHKS 813
DKsmaFglAmQligQqaInrqsRseriqkddrdkaEkvFEAVStsLSTiG
DKs + dr k Ek
gi|2464947 814 DKS-------------------------EKDREKKEK------------- 825
ttmttAGIiaSPhLAfAGMGLSlISGLIDtGKDIYYlfSGkekPeDPlvK
gi|2464947 - -------------------------------------------------- -
kFNtYrelVsDtskmGVRKClmPGsDltIYlaYRNDSSFkPslEkLaLyF
gi|2464947 - -------------------------------------------------- -
iDtIdSvLYYLNTSnIIlDysLtVACPIGyLRSPdLDITAYTiLKFtTed
e+
gi|2464947 826 ----------------------------------------------EREE 829
nVKFYqFtRLGAMLSKfPvVrLTCGrdiTLT<-*
+di
gi|2464947 830 -------------------------KDIEMI 835
PF03154: domain 1 of 1, from 256 to 1058: score -731.6, E = 5.8
*->ekhssRtfrargs..astlrsGRkkyPastdGvlSPvnedvrskGrn
e h R +++ st+ P s G +S + +
gi|2464947 256 ELHRQRLEQSKQHflLSTPPT-----PTSC-GSIS-----LCHSPTP 291
aaSavstssNdsK......aeavkksakkVkeeaASglknTKrqrekVas
+ v+ sN+ ++++++++ + + e S l T ++
gi|2464947 292 SSLTVGETSNEAEqdrerdQDQPEEEPEEENTEQDSALQLTTSTSNVIG- 340
dtldsDRAaskkakfqevsRPNlPse.gEGEssdlRslNdesaSdPklid
+l + + Nl g G R + + S+ + d
gi|2464947 341 -NLQK---WPSANSSLHLLYTNLFRKlGQGKRRRKRGISSGGLSPSEDED 386
QdnRslsgslPSPqDnEsDsDyaaqQqMlqlqPgalkaPslAaSAPsslP
d + g + s D + + al+ sl P +
gi|2464947 387 -D---VDGGVCDI----SGGDLSDDYDHCDV---ALRRRSLRSRQPRDVS 425
PassslPaPGPtrfaysvssssSaAaSsssssssSsvaPaaasLiQalPs
a + s +s + + l s
gi|2464947 426 ETDYHGDAE--------AEAEGETVPRESCYETSDTGGELTNT--DDLDS 465
lHPhrlPsPhtsLsvstaPPkytsAQPslPsqalhsQGPPgPhslqtGrL
l s+ +y+ s
gi|2464947 466 S----LN------LISNL--SYN--------------------SS----- 478
LansnahPqPFGLtPq...SsqaQstlgPsPvaaHhHstiQlqasQsalQ
nsna P G t ++ + + st P + H s +
gi|2464947 479 -NNSNACNVPGGATAPdarNTATTSTTAP-GKSGHALSV--------QGG 518
qQQhhrneqPlPPaalamPLEGGssHHikPyatsPsLGslrqlPagqAHk
QQ+ +ala kP + +
gi|2464947 519 RQQPKT-------GALAQI---------KPKPKPI--------------L 538
hPPHLSqvSyfsanaNlPPvssalkslSSlStgsyPsaHPsPlQLgPQsa
P H + + s+ ++s SS Psa lQ
gi|2464947 539 MPKH------KAQGKGGSLSSPLSNSNSSDCSSASPSAPATLLQ------ 576
PlPfsPvqPtvlTsSasLstviatvASsPaGYKTasPPGlhqvgkraPfP
sPv + S s i t+ S K + Gl q + +P
gi|2464947 577 ---LSPV---GRSKSFQESAAI-TAVSRYK--KYG--RGLFQRRRSKRSP 615
GAyktavPgGykPisPPSFRtGtPPGYRtssPPAGPGtFKPGSssvqPGP
k av G k R + G F P
gi|2464947 616 ---KNAVGVGGKSNYSLD-RLSQNI---EITIQDEDGNFHPYDDNYHM-- 656
lsaAvsSGlPslPPPPaAPasGpPLsAvQIKeEa.ldEaEePESPvPPaR
l A L A+ + + + d
gi|2464947 657 --------L--------AGR----LDATDVDDDVgFDDL----------- 675
SPSPePkVVDvPSHASQSARFyKHL.DRGyNSCAR.sDLYFvPLeGSKLA
D PS AS F +D G S R sD G L
gi|2464947 676 ------YLDDRPSGASDDVAFAGDIsDGGASSRSRaSDASDGHVLGRLLR 719
KKRedlvEKvkREAEQkAREEkEREkEkEkEkEREREkERElERavkkAs
R l + Qk R E R E ++ kA
gi|2464947 720 QVRQGLSVGWRKPRYQKRRARSISEEFSSGDTPRFKDEE-----SASKAE 764
ssAHEGRAPledPsLsGPvhmRPsFEPgPsavAaVPPYlGPDTPALRTLS
G Ps G + s g s A G A S
gi|2464947 765 ----SGH----GPSSGGAGGGGGSGGAGGSSAA------GASASAAGGSS 800
EYARPHVMSPtNRNHPFYvPLnavDPGLLaYnvPaLYsvDPaiRERELRE
RP D G +D RE +E
gi|2464947 801 GHYRP-------------------DSG-------SGHKSDKSEKDREKKE 824
REiREREi............RERdLR....dRlKPGFEVKPsELdPLHgv
+E E i+ + +++++ R R R L H
gi|2464947 825 KEREEKDIemikvfdgnnsfRRQQYRviivQRTYTLEQLLTTALRAFHIT 874
tnPGldhFaRHsaLalqPGaaGlHPFasFHPs....LnPLERERLALAAG
P F L+ +aG + P + +L LE R A
gi|2464947 875 RDP--QAFY----LTDLYAPAGMEDTPMLDPTpvlnLVHLEGKRPAIY-- 916
PaLRPdMSYadRLAAERiHAERvAsLtsDPLARLQMlNVTPHHHQHSHIH
LR + dR R H V P Q S
gi|2464947 917 --LR----FHDR---DRGH-----------------VRVYPGKLQCSML- 939
SHLHLHQQDalHaaSAsPVHPLvDPLaaGsHLaRiPYPaGTLPNPLLgqP
D+ S vD + L R L
gi|2464947 940 -------EDP--YVSVP-----VDNSTVIKDLIR---------DALDKFG 966
lHEnEvLRHqlFaaPYPRDLPaalsa....PMSAAHQLQAMHAQSAELQR
l n + R + +++P QL EL R
gi|2464947 967 LQDNQIQDYRCSEVLLDRGVTERILSwnerPWDIMKQLGKDSIRQMELMR 1016
LAlEQQqWLHa.HhhlHsvhLP...aQEDYYSrLKKEsDKqL<-*
+ q H++ l + LP++ +Q Y L K
gi|2464947 1017 FYMQHKQDPHGpNIALFVGNLPtglSQRNYEQILNKYVTDEN 1058
PF01021: domain 1 of 1, from 416 to 830: score -296.2, E = 5.6
*->MESQQLsQnsri.lHGSAyASVTSKEVh..............sNQDP
+ S Q s + HG A A V++++ ++++++++ +N D
gi|2464947 416 LRSRQPRDVSETdYHGDAEAEAEGETVPrescyetsdtggelTNTDD 462
LdVSASkleEfdkdSTKvNSQQeTTPasSAVPENhHHvSPQtAs......
Ld S + S NS P+ P + t ++++++
gi|2464947 463 LDSSLNLISNLSYNSSN-NSNACNVPGGATAPDARNTATTSTTApgksgh 511
vhsPQNG.qYqQqgMMTqNkAnaSnWafYqqPSMityshYQ......tSP
s Q G+q + g + q k + +P +++ Q+++++ SP
gi|2464947 512 ALSVQGGrQQPKTGALAQIKPKP-------KPILMPKHKAQgkggslSSP 554
a..YyqPdPqyqlPQYissvGtPLSTsSPdsidsftdsSevdsdeTkvkk
++ d + P + L SP + S + + + kk
gi|2464947 555 LsnSNSSDCSSASPSAPA----TLLQLSPVGRSKSFQESAAITAVSRYKK 600
yVlPPhtLTSeedFstWVKfYIkFLkNSNLGdIIPtvnGkikRQiTddEl
y + L K + SN n +i Q d
gi|2464947 601 YGR---GLFQRRRSKRSPKNAVGVGGKSNYSLDRLSQNIEITIQDEDGNF 647
aylYNTFQiFAPfqlLPTWVKdILevdYaDIlkvLsKSveKMQsdtQElk
+ A +l T V d +v + D + +
gi|2464947 648 HPYDDNYHMLA-GRLDATDVDD--DVGFDDLY---------LDDRPSGAS 685
DivaLANLeYdGSTsADaFEikVstIIdRLkeNnInvsdklACQLIlkGL
D va A dG s s +v +l Q + +GL
gi|2464947 686 DDVAFAGDISDGGASSRSRASDAS---------DGHVLGRLLRQ-VRQGL 725
SGdyKyLRytrrrklNMklaeLFldIqlIYdEnkisrlsKPsyrknhSde
S ++ Ry++rr + I +E s P ++ + S
gi|2464947 726 SVGWRKPRYQKRRA------------RSISEE--FSSGDTPRFKDEESAS 761
KNvSRsytNTTktKViaRNyQkTNsSKskaAkAHNvaTSskfsrvdNDsI
K S + sS A A Ss r d s
gi|2464947 762 KAESGHGPSSGGAGGGGGSGGAGGSS-AAGASASAAGGSSGHYRPDSGSG 810
skSTvesiyLsddndLsLrqetk<-*
kS + + e k
gi|2464947 811 HKSDKSEKDREKKEK---EREEK 830
PF03768: domain 1 of 1, from 469 to 519: score -14.7, E = 5.2
*->leGSltlNsdGgsdArlklkVplvGndknnvsaeVFAlGsvdlndqg
l l+ Ns+ s A + Vp G A +d+
gi|2464947 469 LISNLSYNSSNNSNA---CNVP--GG----------AT----APDAR 496
kpvtaGaglAldNvnGHGLSLTkth<-*
+t++ + + + GH+LS+++++
gi|2464947 497 NTATTSTT--APGKSGHALSVQGGR 519
SM00157: domain 1 of 1, from 678 to 903: score -87.0, E = 9.5
*->kkrPkPGGGWntGGsRYPGqgsPGGnrYPpqgggGgWGqPhGGgWGq
+P G + + G s GG +++ + G+ G q
gi|2464947 678 --DDRPSG--ASDDVAFAGDISDGGASSRSRASDASDGHVLGRLLRQ 720
PHgG...gWGqPHgGgWGqPHGgggWgqGGGthnqWnkPsKPKtnlKH..
G + gW +P ++ G t + s K + H++
gi|2464947 721 VRQGlsvGWRKPRYQKRRARSISEEFSSGD-TPRFKDEESASKAESGHgp 769
vAGAAAAGAvvGGLGGYmLGsams..........rPliHFGndyED.RYY
G A G GG GG a ++ ++++++ rP G + +
gi|2464947 770 SSGGAGGGGGSGGAGGSSAAGASAsaaggssghyRPDSGSGHKSDKsEKD 819
rEnmyRYPnqvyYrPvDqYsnqnnfvHDCvnitvKqHtvttttKGEnFtE
rE + + + + n+f + + q t t E
gi|2464947 820 REKKEKEREEKDIEMIKVFDGNNSFRRQQYRVIIVQRTYTL----EQLLT 865
tDvKimErvveqmCitqYqkEsqAyyqRgasvvlfssPpv<-*
t ++ + Y + A + +l +P+
gi|2464947 866 TALRAFH--ITRDPQAFYLTDLYAPAGMEDTPMLDPTPVL 903
COG0008: domain 1 of 1, from 697 to 1149: score -325.3, E = 7.1
*->alvNAi.h.GKAn.kAVMGkvm.enpelRsma.ea.eiv.nfieqvn
++ ++ ++A+++ V+G +++ ++ +++++
gi|2464947 697 GGASSRsRaSDASdGHVLGRLLrQ----VRQGlSVgWRKpRY----- 734
smslmekk.lle.lype.................................
+k+++ ++++e +++++++ +++++ ++ ++++++++++ ++++
gi|2464947 735 ------QKrRARsISEEfssgdtprfkdeesaskaesghgpssggagggg 778
.................................LpelevmgkVrTRFAPS
++++ ++++ + + + ++++++ ++++++++ ++ ++++ +
gi|2464947 779 gsggaggssaagasasaaggssghyrpdsgsghKSDKSEKDREK------ 822
PTGyLHIGgARtALfNylfARhygGkFiLRIEDTDpTeRstpea.eeaIl
+ e++++e ++e+I+
gi|2464947 823 -----------------------------------K-EKEREEKdIEMIK 836
edLkWLGlnWDegpdvGGpYgpyyQSeRfdiYyeyaekLieeGkAYyCyc
++ + ++Q +Y+ + + +
gi|2464947 837 V------FDGNNS--------FRRQ-----QYRVIIVQR---------TY 858
tpEELealRGtltregaeapgrdprYdgnlrlltkmeegeypageGeppv
t E+L t++r+ + + Y l + me+ ++ pv
gi|2464947 859 TLEQLLT---TALRAFHITRDPQAFYLTDLYAPAGMEDTPMLDP---TPV 902
vRfKvplegep.k.lnivfrDlvkGrIvfanad.....ilhDfvilRsDG
+ + v leg +++ f+D +G ++ ++ + + + ++ v
gi|2464947 903 LNL-VHLEG-KrPaIYLRFHDRDRGHVRVYPGKlqcsmLEDPYV------ 944
yPTYnFAVVVDDhlMGITHViRGeDhlsNTprQillyeAlGwpvtwepPv
V+VD ++T + +l+ +Al+
gi|2464947 945 ------SVPVD----------------NSTVIKDLIRDALDKFG------ 966
faHlplilneglSKrklkkledgkKLSKRdgpRaptveayRrrGylPEAl
+ + + + + + +rG++
gi|2464947 967 ------LQD-----------NQIQDYRCSEV--------LLDRGVT---- 987
rNflallGvwspddddqEifsleelirkFdlervskspavfDpkKLewlN
++ s+ + ++ + + +l ++ ++ ++ ++++
gi|2464947 988 ----ERIL--SW-N-ERPWDIMKQLGKD----------SIRQMELMRFYM 1019
aeyikeelddeplhpllkpflphpeaGerelpftrelkkdidyidredle
+ k + p+ l++ l p+ +++ + e
gi|2464947 1020 QH--KQ-DPHGPNIALFVGNL--PTG-----------------LSQRNYE 1047
ellplvkerlktlkelrlltryffeapdvvedadedvakklfkeedkevL
++l+ k + ++ +++ p +++ + l e+ + +
gi|2464947 1048 QILN---------KYVTDENKFISIGP---IYYEYGSV-VLTFEDSMKAV 1084
eklkekLeklkgvihWtpeeie.aikvrlaeelglKgkklfmplRvalTG
+++++ L+++ i +++ + + l++ + + ++ +R l+
gi|2464947 1085 RAFYN-LRET---IIEDK---KlLVL-LLPNIE---PSMVPSDVRPLLVF 1123
saegpelfetiellGkeeqleRlgyalad<-*
++++ + +el++ ++ l + ++
gi|2464947 1124 VNVKSGGCQGLELIS---SFRKLLNPYQV 1149
PF03792: domain 1 of 1, from 706 to 876: score -116.2, E = 7.7
*->segtvrhdkrkdIgdlLqevlkItdqtLDeeqvNakKhqLkchpmkr
s ++ + +g lL +v q L v +K+ + + +
gi|2464947 706 SDASDG----HVLGRLLRQV----RQGL---SVGWRKPRYQKRRARS 741
AlfdVLcEiKeKtvLSvrnmkdeeppdPqlmRLDnMLvAEGVAGPdkGG.
E + + + kdee +AE GP GG
gi|2464947 742 -----ISEEFSSGD--TPRFKDEESAS----------KAESGHGPSSGGa 774
.........GaAAsllaaqasgGtSlsidGaDsalehsdYRqkLlqiRri
++++++++ G+ ++ a+++++G+S +Ds h ++ + R
gi|2464947 775 gggggsggaGGSSAAGASASAAGGSSGHYRPDSGSGHKSDKS--EKDREK 822
yenElkkYekaCneFtehVenlLreQSrtRPItqkeiErmvniisrKFns
e+E + + + + r+Q r v i++r +
gi|2464947 823 KEKEREEKDIEMIKVFDGNNSFRRQQYR------------VIIVQRTYT- 859
iqvqLKQstCEaVmiLrsRFLD<-*
qL ++ a i r D
gi|2464947 860 -LEQLLTTALRAFHITR----D 876
PF01391: domain 1 of 1, from 761 to 818: score -86.6, E = 9.5
*->GppGppGppGppGppGppGppGpaGapGppGppGepGpPGppGppGp
+ + G G +G G G aG+ +G++ ++ + G+ G+ p
gi|2464947 761 -SKAESGHGPSSGGAGGGGGSGGAGGSSAAGAS-ASAAGGSSGHYRP 805
pGppGapGapGpp<-*
G + ++ +
gi|2464947 806 DSGSGHKSDKSEK 818
PF00503: domain 1 of 1, from 819 to 1019: score -308.3, E = 6.9
*->seeekeqakrnkeIekqLkqekkkakrevKLLLLGAGESGKSTIlKQ
e+ke+++ k+Ie ++ r
gi|2464947 819 DREKKEKEREEKDIEMIKVFDGNNSFR-------------------- 845
MKIIHgnGFSqEEkkeyrpvIyqNivqsmrvlvdAmetLgIpfgdperea
+ +yr +I q ++ ++l A +++I+ +++
gi|2464947 846 -------------RQQYRVIIVQRTYTLEQLLTTALRAFHITRDPQ---- 878
seadavmiletaketeeveeplpkeyadaikaLWkDpGiqecfdRsrEfq
a ++ + +++++ e p+ ++ L +G + +++ +
gi|2464947 879 ----AFYLTDLYAPAG--MEDTPMLDPTPVLNLVHLEGKRPAIYLR---- 918
LnDSAkYFLdnldRisdpdYiPTeQDILrsRvkTTGIvEtkFsvkkltFR
+ + dR + + Y +kl+
gi|2464947 919 --------FHDRDRGHVRVY-----------------------PGKLQ-- 935
MfDVGGQRSERKKWIHCFEdVTAIIFlVALSEYDQvLfEDettNRMqESL
+S + ED+ +
gi|2464947 936 -----------------------------CS-----MLEDPYV------- 944
kLFdsIcNnrwFvntSiILFLNKkDLFeEKIkktpssisd.yFPeYedys
s+ n+ +I kDL ++ + k ++d+ +Y+ +
gi|2464947 945 ----SVPVD----NSTVI-----KDLIRDALDKFG--LQDnQIQDYR--C 977
.....sGppqdyeaAkeFIkkkFvslnrnnekpkKeIYsHfTCATDTnnI
++ + + ++ +s n+++ +
gi|2464947 978 sevllDRG----------VTERILSWNERPWD------------------ 999
rfVFdaV.kDiIlqenLkecGL<-*
+++ +kD I q+ L+ +
gi|2464947 1000 --IMKQLgKDSIRQMELMRFYM 1019
SM00314: domain 1 of 2, from 831 to 922: score 9.9, E = 0.011
*->dtyvlrVyvddlsavdpgqtyktlrvskrtTardViqqllekfhltd
d + ++V+++ +s + q+y+++ v + T ++++ +l+ fh+t
gi|2464947 831 DIEMIKVFDGNNSF--RRQQYRVIIVQRTYTLEQLLTTALRAFHITR 875
edpeeYvLvevlp.sggkErvLlddenPlqlqklwprdaksprqsslrFv
dp+ + L + + g ++ +ld + +l l+ l +++ + ++
gi|2464947 876 -DPQAFYLTDLYApAGMEDTPMLDPTPVLNLVHLEGKR--------PAIY 916
Lrkrdd<-*
Lr +d+
gi|2464947 917 LRFHDR 922
PF04041: domain 1 of 1, from 887 to 1149: score -168.6, E = 4.9
*->elrKiptipileRpsyitGfdsriennPiiGRgpvrkpvavFNPavv
+ ++ p l p+ ++ + v+
gi|2464947 887 APAGMEDTPMLD-PT----PVLNL---------------------VH 907
lyegeLrVYaRfVmlYrayvediatfrIgLadssdGRCSeinfkkepepv
l +++ +Y Rf+ r+ v + ++ + ++d+ ++ + v
gi|2464947 908 LEGKRPAIYLRFHDRDRGHV-RVYPGKLQCSMLEDP---YVSVPVDNSTV 953
v..lPedkwElwGPsYvEDPRvvkigkryymTYTGydgkyarlcvattkn
+++l +d +++G D i + y + l+ ++t
gi|2464947 954 IkdLIRDALDKFG---LQD---NQIQD--------YRCSEVLLDRGVTER 989
lltwarlgNGeWvkfaefelnedrislwtksgaifPvKinGkyvmyfris
l+w + W + k+ + ++ m ++
gi|2464947 990 ILSWNERP---WDIMKQLG----------KDSIRQMELMR--FYMQHK-- 1022
DnvHdldsniwLavSnvddlvhWenerepSYidvgsprpgmfdapFElKi
+ H +++ + v+n + ++ +n+ + ++ +++ i
gi|2464947 1023 QDPHGPNIALF--VGNLPTGLSQRNYEQI------LNKYVTDEN---KFI 1061
GwgtPPveteeGwSLLVLiHGvNvaGrytenlvYRvGaaLlDlegRPskv
g+ + e+G VL + + Y + + +++
gi|2464947 1062 SIGP--IYYEYGS--VVLTF---EDSMKAVRAFYNLRETIIEDKK-LLVL 1103
lartpeYILePeeewEvyGdvpnVVFPcgalvdegtgrvliyYGaADtav
l+ e+ ++P+ + +p VF v g +
gi|2464947 1104 LLPNIEPSMVPSDV------RPLLVF---VNVKSG------------GCQ 1132
GLAeipGdleelmnflke<-*
GL i+ ++l+n
gi|2464947 1133 GLELIS-SFRKLLNPYQV 1149
PF00788: domain 1 of 1, from 923 to 1024: score 60.5, E = 4e-15
*->dqgvlrvyfqdllsvtpgvayKtirvssedtapdViqeaLeKfrldd
d g++rvy++ l ++y ++ v +++ + d+i++aL Kf+l+d
gi|2464947 923 DRGHVRVYPGKLQCSMLEDPYVSVPVDNSTVIKDLIRDALDKFGLQD 969
RMedpeeYaLvevlltregalesggkerkLpddenPlqlrlnlprddrrs
+ +++Y evl l++g++er+L+ +e+P ++++l++d s
gi|2464947 970 --NQIQDYRCSEVL------LDRGVTERILSWNERPWDIMKQLGKD---S 1008
vrqqsslrFlLkrrdd<-*
+rq rF+++ ++d
gi|2464947 1009 IRQMELMRFYMQHKQD 1024
SM00314: domain 2 of 2, from 923 to 1024: score 48.5, E = 1.7e-11
*->dtyvlrVyvddlsavdpgqtyktlrvskrtTardViqqllekfhltd
d +++rVy++ l ++y+++ v++ t +d+i+++l kf+l+d
gi|2464947 923 DRGHVRVYPGKLQCSMLEDPYVSVPVDNSTVIKDLIRDALDKFGLQD 969
edpeeYvLvevlp.sggkErvLlddenPlqlqklwprdaks.prqsslrF
++ ++Y evl+++g++Er+L +e+P ++k +++d s ++ + +rF
gi|2464947 970 NQIQDYRCSEVLLdRGVTERILSWNERPWDIMKQLGKD--SiRQMELMRF 1017
vLrkrdd<-*
+++++ d
gi|2464947 1018 YMQHKQD 1024
PF04396: domain 1 of 1, from 937 to 1001: score -40.4, E = 9
*->eesaeakTsVfWDvEdCPvPdGldarrVapnIksALeksGYpGpVSI
+++ +sV P + + + + I+ AL k
gi|2464947 937 SMLEDPYVSV---------PVD-NSTVIKDLIRDALDK--------- 964
tAYGdltkiprdtfilvsstiqllraLsstGIsLkhvPaGdkKdArdkki
+G + ++ d ++ +++L+++G+
gi|2464947 965 --FGLQDNQIQD--------YRCSEVLLDRGV------------------ 986
lvdillWaldNppPanlm<-*
+ il+W N P +m
gi|2464947 987 TERILSW---NERPWDIM 1001
PF02376: domain 1 of 1, from 969 to 1022: score -42.8, E = 3.1
*->nqqigmneelDTaeIarrvkeeLkrhnIgQriFAekvLGlSQGslSd
qi ++ e L ++++ ri
gi|2464947 969 DNQIQDYR----------CSEVLLDRGVTERI--------------- 990
LLrkPK.PWskLtqkGREpFrRMqnWLsdpnavrdlilqqek<-*
L++ ++PW++++q G+++ r M + ++ ++q k
gi|2464947 991 -LSWNErPWDIMKQLGKDSIRQM---------ELMRFYMQHK 1022
SM00361: domain 1 of 1, from 1031 to 1103: score -4.9, E = 1.3
*->lvlvnglvspeeakdEdferelseeeeyfgevgkinKivinkvtkrl
++v +l +++++ + ++e+ l ++ v+++nK++ ++ +
gi|2464947 1031 ALFVGNL-PTGLS-QRNYEQILN----KY--VTDENKFISIGPIY-- 1067
NayenhkrgsggvYitFFersEDAarAivdlnGryfdGRtlkae<-*
+ gs v++tF e s A rA +l + ++ l+
gi|2464947 1068 -----YEYGS--VVLTF-EDSMKAVRAFYNLRETIIEDKKLLVL 1103
SM00360: domain 1 of 1, from 1031 to 1103: score 11.0, E = 0.0059
*->tlfVgNLndppdvteedLrelF.kevksvevfraeteskfGkvvsvr
lfVgNL p ++ +++ +k+v k +s+
gi|2464947 1031 ALFVGNL--PTGLSQRNYEQILnKYV-----------TDENKFISIG 1064
ivrdkdnilirressleqkvqlgkdsgTGkskGfaFVeFedeedAekAll
++ + G +Fed A +A
gi|2464947 1065 PIYYEY--------------------------GSVVLTFEDSMKAVRA-- 1086
iealnaskGkeledgGrptlglrVe<-*
+l + + ++++ l V
gi|2464947 1087 FYNLR---ETII--EDKK---LLVL 1103
PF00076: domain 1 of 1, from 1032 to 1102: score 11.2, E = 0.015
*->lfVgNLppdvteedLkdlFskfGpivsikivkDhiekpketgkskGf
lfVgNLp+ +++ +++ k+ ++ i++ G
gi|2464947 1032 LFVGNLPTGLSQRNYEQILNKYVTDENKFISIG------PIYYEYGS 1072
aFVeFeseedAekAlealnGkelggrklrv<-*
+Fe++ A +A +l +++++++kl v
gi|2464947 1073 VVLTFEDSMKAVRAFYNLRETIIEDKKLLV 1102
PF03208: domain 1 of 1, from 1098 to 1256: score -89.5, E = 6.5
*->sslevissikeslqsslsslRPWgEFldfsa.....fSrPsSfseat
++l v+ + +++ s RP ++F+++++++ +++ + sSf +
gi|2464947 1098 KKLLVLLLPNIEPSMVPSDVRPLLVFVNVKSggcqgLELISSFRKL- 1143
sRvkrNlsyFrvNYvlIfavliiysLitnPllLvvililva.awlfLYlr
l ++v + + l +y +P+ +vi l+++++ LY++
gi|2464947 1144 ------LNPYQVFDLDNGGPLPGYV---QPITVFVIRPLIFdSIISLYVF 1184
rsldepLVlfGrsisdrqlyvgLilvsipvlf..Ltgvgs........vl
r + i++ ++v+ + ++i+ +++ L +vg++++ ++++++
gi|2464947 1185 R--Q---------ITNYKILVCGGDGTIGWVLqcLDNVGQdsecssppCA 1223
iwtvgas..vvvvlvHAafrenpddlfvdEqee<-*
i +g++++++ vl ++ + + +d ++ ++
gi|2464947 1224 IVPLGTGndLARVLCWGSGYTGGEDPLNLLRDV 1256
COG1597: domain 1 of 1, from 1117 to 1512: score -73.3, E = 0.0053
*->mkrarliyNptaGkgkakkalrevadrLe.................k
+++ ++++N+++G+ ++ + +++ L + + + +++++ ++ +
gi|2464947 1117 VRPLLVFVNVKSGGCQGLELISSFRKLLNpyqvfdldnggplpgyvQ 1163
rggeasvrvttepgvagdAvriakeaaadgrieavDlviaaGGDGTinev
+ + +r ++ + + ++++ +++ ++++GGDGTi+ v
gi|2464947 1164 PITVFVIRPLIFDS--IISLYVFRQITNYK-------ILVCGGDGTIGWV 1204
angLagtdgevkafnkpaLgilPaGTgNdFARaLgIPrddieaaakaiad
+ L + + ++ + p+ +i+P+GTgNd+AR L + ++ + +
gi|2464947 1205 LQCLDNVGQDSE-CSSPPCAIVPLGTGNDLARVLCWGS-GYTGGEDPLNL 1252
gktrqvDlgrasyglqrekaneryflniaggGfgae.vtkrvneelkrrl
D+ +a+ e +r+ + + e+++k+ + ++++
gi|2464947 1253 L----RDVIEAE-----EIRLDRWTVVFHPEDKPEEpAMKAPSQTTGKKK 1293
GplaYllaalrrlsrlrpfplairvdgdgksfegealfllvnntn.....
+ + l+ +++++ + p+ + d+ g+ ++ ++v+n++ + +
gi|2464947 1294 KAHQAHLSQSQQTNQHHQLPALTSSDISGGAQNEDNSQIFVMNNYfgigi 1343
..................................................
+ + + ++ +++++++ +++ ++++ + + ++ +++ ++ +++
gi|2464947 1344 dadlcldfhnareenpnqfnsrlrnkgyyvkmglrkivgrkavkdlqkel 1393
.............................npyyGGgmklaPdasldDGll
+ + +++ + ++ ++ + + +++ np ++ ++ DG+l
gi|2464947 1394 rlevdgkivelppvdgiiilnilswgsgaNPWGPDKDDQFSTPNHYDGML 1443
dviivkaase.a.qllellrllrdvlrGkkhrehpevehlqakkieieth
v+ v +++ l ++ +r+++r q+ +i+i++
gi|2464947 1444 EVVGV----TgVvHLGQIQSGIRTAMRI-----------AQGGHIKIHLN 1478
gdqakpipvqlDGEiypgalPvririlpgalrvlvPadr<-*
+ +pvq+DGE+ P + +l+ al + + + +
gi|2464947 1479 T----DMPVQVDGEPW-IQSPGDVVVLKSALKATMLKKN 1512
PF00781: domain 1 of 1, from 1119 to 1267: score 167.8, E = 2e-47
*->plLVfvNPkSGggqgekelaseskllqkfrelLnprqVfdltktggp
plLVfvN kSGg+qg +l+ +fr+lLnp+qVfdl++ ggp
gi|2464947 1119 PLLVFVNVKSGGCQGL-------ELISSFRKLLNPYQVFDLDN-GGP 1157
avg....................lelfrdlpdfkeqdqGddrvlvcGGDG
+g ++ + ++ ++ +l +fr++ ++ +lvcGGDG
gi|2464947 1158 LPGyvqpitvfvirplifdsiisLYVFRQITNY--------KILVCGGDG 1199
TvgwVlnaldklelplqcqrefpkPpvgilPlGTGNdLarvLgwgggydg
T+gwVl++ld+ ++ c Pp++i+PlGTGNdLarvL wg gy+g
gi|2464947 1200 TIGWVLQCLDNVGQDSECS----SPPCAIVPLGTGNDLARVLCWGSGYTG 1245
aqlinekllkilgdaleeadtvmldrW<-*
e +l+ l+d++ ea + ldrW
gi|2464947 1246 ----GEDPLNLLRDVI-EAEEIRLDRW 1267
SM00046: domain 1 of 1, from 1119 to 1267: score 166.6, E = 4.6e-47
*->plLVfvNPkSGggqgeellkseskllrkfrelLnprqVfdltktggp
plLVfvN kSGg+qg +l+ +fr+lLnp+qVfdl++ ggp
gi|2464947 1119 PLLVFVNVKSGGCQGL-------ELISSFRKLLNPYQVFDLDN-GGP 1157
dvgle....................lefrdvpkfkeqsdqkgddrvlvcG
+g++++ + ++ ++ + + fr++ ++ +lvcG
gi|2464947 1158 LPGYVqpitvfvirplifdsiislyV-FRQITNY----------KILVCG 1196
GDGTvgwVlnaldkrelplqcqvedrefpePPvailPlGTGNdLarvLgw
GDGT+gwVl++ld+ +++ c+ PP+ai+PlGTGNdLarvL w
gi|2464947 1197 GDGTIGWVLQCLDNVGQDSECS-------SPPCAIVPLGTGNDLARVLCW 1239
gggydginekllkilkealeeadtvkldrW<-*
g gy+g e +l l++++ ea+ + ldrW
gi|2464947 1240 GSGYTG-GEDPLNLLRDVI-EAEEIRLDRW 1267
COG0284: domain 1 of 1, from 1333 to 1511: score -107.0, E = 2.3
*->laadlplsvmndprlIrvALDvpd..redalalveelddeeyvlfiK
++ + ++++d +l+ LD+++ ++e+ ++ +l + ++K
gi|2464947 1333 FVMNNYFGIGIDADLC---LDFHNarEENPNQFNSRLRN--KGYYVK 1374
vGlaFFeLflsaGpdivkeLkargklgvkvFLDLKlhDIPnTvalaakal
Gl + G ++vk+L ++ ++ D K+ P ++ +
gi|2464947 1375 MGL---RKIV--GRKAVKDLQKEL----RLEVDGKIVELPPVDGIIILNI 1415
aelgplAaDmvtVHafgGeemlraavealeelgkGkrPlLiaVtvLTSms
+g +g + + g +
gi|2464947 1416 LSWG-----------SG-----------ANPWG----------------P 1427
epgllqeigidnsladqvirlaklakeaGldGvVcGAspqeaaaiRealg
++++ ++ ++++ + + l + G+ GvV +q + iR a+
gi|2464947 1428 DKDD----QFSTPNH----YDGMLEVV-GVTGVVH--LGQIQSGIRTAMR 1466
egspdflilTPGIRaDkgsakgDQgRvmTpaeAiaaGaDyiVVGRpItqA
+ a ++g ++++ V G p q+
gi|2464947 1467 --I-----------A-------QGGH---IKIHLNTDMPVQVDGEPWIQS 1493
GedPvaaaeaireaaemalee<-*
+ v +++ ++ + ++l++
gi|2464947 1494 -PGDVVVLKSAL--KATMLKK 1511
SM00045: domain 1 of 1, from 1334 to 1489: score 262.2, E = 7.8e-76
*->iMNNYFSiGvDAkiaLeFHnsREanPekFnSRlkNKlwYfelGtkel
+MNNYF+iG+DA+ +L+FHn+RE+nP FnSRl+NK +Y+++G++++
gi|2464947 1334 VMNNYFGIGIDADLCLDFHNAREENPNQFNSRLRNKGYYVKMGLRKI 1380
fa.rtcKdLheqIeLecDGvdidlpnkdlslEGIivLNIPSygGGtnLWG
+ ++ KdL + +Le+DG++++lp GIi+LNI S+g+G+n+WG
gi|2464947 1381 VGrKAVKDLQKELRLEVDGKIVELP----PVDGIIILNILSWGSGANPWG 1426
ePfgskkkravcgifkksftdkedlnfekqsidDgllEVVGvtgamhmaq
++d++f+ + ++Dg+lEVVGvtg++h++q
gi|2464947 1427 P--------------------DKDDQFSTPNHYDGMLEVVGVTGVVHLGQ 1456
vrtsiqvglasiilvkllKgrRiaQCsevrlkdtiltkktiPmQVDGEP<
+ q g + + RiaQ+ ++++ + t +P+QVDGEP
gi|2464947 1457 I----QSGIRT--------AMRIAQGGHIKI--HLNT--DMPVQVDGEP 1489
-*
gi|2464947 - -
PF00609: domain 1 of 1, from 1334 to 1489: score 237.0, E = 2.9e-68
*->iINNYFSiGVDAsialrFHimREknPekFnSRmkNKlwYfefGtset
++NNYF+iG+DA ++l+FH +RE+nP FnSR++NK++Y+++G+ ++
gi|2464947 1334 VMNNYFGIGIDADLCLDFHNAREENPNQFNSRLRNKGYYVKMGLRKI 1380
l.astcknLhesvelecdGqevdLsnrDaslEGIiiLNIPSygGGsnLWG
++ + k+L + +le+dG+ v+L+ GIiiLNI S+g+G+n+WG
gi|2464947 1381 VgRKAVKDLQKELRLEVDGKIVELP----PVDGIIILNILSWGSGANPWG 1426
eskkgkgdigefkksitdpkdlktavqdidDgLlEVVGlegamhmgQiyT
+kd++++ + + Dg lEVVG++g++h+gQi
gi|2464947 1427 P-----------------DKDDQFSTPNHYDGMLEVVGVTGVVHLGQI-- 1457
siqlklasWvkLmkgrRlaQCsevRlkDtiktkktlPMQVDGEP<-*
q + + + R+aQ+ + +i+ + +P+QVDGEP
gi|2464947 1458 --QSGIRT------AMRIAQGGHI----KIHLNTDMPVQVDGEP 1489
SM00463: domain 1 of 1, from 1346 to 1416: score -20.0, E = 9.4
*->ewtLDLHGltveeAlqaLkkfldaarlrgletgervdlpkkleIitG
+++LD+H ++e q+ ++ + + +
gi|2464947 1346 DLCLDFHNAREENPNQFNSRLRNKGYYVK------------------ 1374
kGkhslvngkskvkpalkehlqHkhvesfrfaepsegnsGvlvvklk<-*
+G ++ g+++vk ke+ +v++ + + +G +++ ++
gi|2464947 1375 MGLRKI-VGRKAVKDLQKELRL--EVDGKIVEL--PPVDGIIILNIL 1416
PF04014: domain 1 of 1, from 1447 to 1487: score -14.1, E = 5.1
*->ivvKVdrnGqIVIPkeiRekLGikeGDiLEievdgdggeIilrkykp
v+ V +GqI iR ++ i+ G + i ++ d +++++
gi|2464947 1447 GVTGVVHLGQIQ--SGIRTAMRIAQGGHIKIHLNTDM----PVQVDG 1487
<-*
gi|2464947 - -
//
-------------- next part --------------
###################################################################################################### */
# COPYRIGHT INFORMATION
# Pfam DOMAIN RESULTS PARSER
# @AUTHOR: Wagied Davids
# @DATE: 22.01.2004
###################################################################################################### */
import sys
import string
import re
import time
class PfamEntry:
'''
Prototype class Entry structure
@author: Wagied Davids
@date: 22.01.2004
@copyright: Wagied Davids, ?, 2004
'''
# STATIC DATA
NO_HITS= '[no hits above thresholds]';
# STATIC REGEX OBJECTS
REGEX_FAMILY_SCORES= re.compile( r'((\S.*?)\s+(\S.*?)\s+((-| )\S.*?)\s+(\S.*?)\s+(\d+))', re.MULTILINE | re.DOTALL );
def __init__( self, query= None, accession= None, description= None, family_scores= [], parsed_domains= [], alignments= [] ):
'''
Constructor for Pfam Entry structure
@param ( query= None, accession= None, description= None, family_scores= [], parsed_domains= [], alignments= [] )
@return (None)
'''
if query != None:
self.query= query;
self.accession= accession;
self.description= description;
self.family_scores= family_scores;
self.family_scores_hitlist= []; # FAMILY SCORES HITLIST FOR SCORE ENTRIES
self.parsed_domains= parsed_domains;
self.alignments= alignments;
else:
print 'Error: Query must be provided';
sys.exit( -1 );
def getQuery( self ):
'''
Retrieves the QUERY
@param (None)
@return (String: QUERY )
'''
return self.query;
def getAccession( self ):
'''
Retrieves the ACCESSION
@param (None)
@return (String: ACCESSION);
'''
return self.accession;
def getDescription( self ):
'''
Retrieves the DESCRIPTION
@param (None)
@return (String: DESCRIPTION)
'''
return self.description;
def getFamilyScoresRaw( self ):
'''
Retrieves a list of FAMILY SCORES
@param (None)
@return (List: FAMILY SCORES)
'''
return self.family_scores;
def getNoOfFamilyEntries( self ):
'''
Retrieves the number of hits per query
@param (None)
@return (Integer: number of hits per query)
'''
return len( self.family_scores );
def getFamilyScoresML( self ):
'''
FINE-GRAINED CONTROL OVER FAMILY CLASSIFICATION AND SCORE RESULTS
@param (None)
@return ()
'''
# BEGIN FAMILY_SCORE_LIST TAG
family_scores= "<FAMILY_SCORES_HITLIST>\n";
family_scores_counter= 1;
for score_entry in self.getFamilyScoresRaw():
MATCH_SCORE_ENTRY= PfamEntry.REGEX_FAMILY_SCORES.search( score_entry );
if MATCH_SCORE_ENTRY != None:
# BEGIN FAMILY_SCORE_HIT TAG
family_scores= family_scores + "\t\t<FAMILY_SCORE_HIT= %d>\n" % ( family_scores_counter );
# EXTRACT INFORMATION FROM MATCH_SCORE_ENTRY
# MATCH_SCORE_ENTRY.group( 1 ) equals WHOLE ENTRY
FAMILY_MODEL= MATCH_SCORE_ENTRY.group( 2 );
FAMILY_DESCRIPTION= MATCH_SCORE_ENTRY.group( 3 );
FAMILY_SCORE_VALUE= MATCH_SCORE_ENTRY.group( 4 );
# MATCH_SCORE_ENTRY.group( 5 ) equals '-' IF PRESENT
FAMILY_E_VALUE= MATCH_SCORE_ENTRY.group( 6 );
FAMILY_N_VALUE= MATCH_SCORE_ENTRY.group( 7 );
# FORMAT ENTRY TAGS
family_scores= family_scores + "\t\t\t<FAMILY_SCORE_MODEL>%s</FAMILY_SCORE_MODEL>\n" % ( FAMILY_MODEL );
family_scores= family_scores + "\t\t\t<FAMILY_SCORE_DESCRIPTION>%s</FAMILY_SCORE_DESCRIPTION>\n" % ( FAMILY_DESCRIPTION );
family_scores= family_scores + "\t\t\t<FAMILY_SCORE_VALUE>%s</FAMILY_SCORE_VALUE>\n" % ( FAMILY_SCORE_VALUE );
family_scores= family_scores + "\t\t\t<FAMILY_E_VALUE>%s</FAMILY_E_VALUE>\n" % ( FAMILY_E_VALUE );
family_scores= family_scores + "\t\t\t<FAMILY_N_VALUE>%s</FAMILY_N_VALUE>\n" % ( FAMILY_N_VALUE );
# COMPLETE FAMILY_SCORE_HIT TAG
family_scores= family_scores + "\t\t</FAMILY_SCORE_HIT>\n";
# INCREMENT family_scores_counter
family_scores_counter= family_scores_counter + 1;
# COMPLETE FAMILY_SCORE_LIST TAG
family_scores= family_scores + "\t</FAMILY_SCORES_HITLIST>\n";
return family_scores;
def getParsedDomainsRaw( self ):
'''
Retrieves a list of PARSED DOMAINS
@param (None)
@return (List: PARSED DOMAINS)
'''
return self.parsed_domains;
def getNoOfParsedDomains( self ):
'''
Retrieves the number of parsed hits per query
@param (None)
@return (Integer: number of parsed hits per query)
'''
return len( self.parsed_domains );
def getParsedDomainsML( self ):
'''
FINE-GRAINED CONTROL OVER PARSED DOMAINS AND SCORE RESULTS
@param (None)
@return ()
'''
parsed_domain_list= [];
PARSED_MODEL= '';
PARSED_DOMAIN_NUMBER= '';
PARSED_DOMAIN_SEQ_F= '';
PARSED_DOMAIN_SEQ_T= '';
PARSED_DOMAIN_SEQ_F= '';
PARSED_DOMAIN_2_DOTS= '';
PARSED_DOMAIN_BRACKETS= '';
PARSED_DOMAIN_SCORE= '';
PARSED_DOMAIN_E_VALUE= '';
parsed_domains_counter= 1;
# BEGIN PARSED_DOMAINS_LIST TAG
parsed_domains= '<PARSED_DOMAINS_HITLIST>\n';
for domain in self.getParsedDomainsRaw():
# IF NO_HITS NOT FOUND, THEN EXTRACT DATA
if string.find( domain, PfamEntry.NO_HITS ) < 0:
parsed_domain_list= string.split( domain );
PARSED_MODEL= parsed_domain_list[0];
PARSED_DOMAIN_NUMBER= parsed_domain_list[1];
PARSED_DOMAIN_SEQ_F= parsed_domain_list[2];
PARSED_DOMAIN_SEQ_T= parsed_domain_list[3];
#PARSED_DOMAIN_2_DOTS= parsed_domain_list[4];
PARSED_DOMAIN_HMM_F= parsed_domain_list[5];
PARSED_DOMAIN_HMM_T= parsed_domain_list[6];
#PARSED_DOMAIN_BRACKETS= parsed_domain_list[7];
PARSED_DOMAIN_SCORE= parsed_domain_list[8];
PARSED_DOMAIN_E_VALUE= parsed_domain_list[9];
# BEGIN PARSED_DOMAIN_HIT TAG
parsed_domains= parsed_domains + "\t\t<PARSED_DOMAIN_HIT= %d>\n" % ( parsed_domains_counter );
# FORMAT ENTRY TAGS
parsed_domains= parsed_domains + "\t\t\t<PARSED_MODEL>%s</PARSED_MODEL>\n" % ( PARSED_MODEL );
parsed_domains= parsed_domains + "\t\t\t<PARSED_DOMAIN_NUMBER>%s</PARSED_DOMAIN_NUMBER>\n" % ( PARSED_DOMAIN_NUMBER );
parsed_domains= parsed_domains + "\t\t\t<PARSED_DOMAIN_SEQ_F>%s</PARSED_DOMAIN_SEQ_F>\n" % ( PARSED_DOMAIN_SEQ_F );
parsed_domains= parsed_domains + "\t\t\t<PARSED_DOMAIN_SEQ_T>%s</PARSED_DOMAIN_SEQ_T>\n" % ( PARSED_DOMAIN_SEQ_T );
parsed_domains= parsed_domains + "\t\t\t<PARSED_DOMAIN_HMM_F>%s</PARSED_DOMAIN_HMM_F>\n" % ( PARSED_DOMAIN_HMM_F );
parsed_domains= parsed_domains + "\t\t\t<PARSED_DOMAIN_HMM_T>%s</PARSED_DOMAIN_HMM_T>\n" % ( PARSED_DOMAIN_HMM_T );
parsed_domains= parsed_domains + "\t\t\t<PARSED_DOMAIN_SCORE>%s</PARSED_DOMAIN_SCORE>\n" % ( PARSED_DOMAIN_SCORE );
parsed_domains= parsed_domains + "\t\t\t<PARSED_DOMAIN_E_VALUE>%s</PARSED_DOMAIN_E_VALUE>\n" % ( PARSED_DOMAIN_E_VALUE );
# COMPLETE PARSED_DOMAIN_HIT TAG
parsed_domains= parsed_domains + "\t\t</PARSED_DOMAIN_HIT>\n";
# INCREMENT parsed_domains_counter
parsed_domains_counter= parsed_domains_counter + 1;
else:
# NO_HITS FOUND
return domain;
# COMPLETE PARSED_DOMAINS_LIST TAG
parsed_domains= parsed_domains + '</PARSED_DOMAINS_HITLIST>\n';
return parsed_domains;
def getAlignments( self ):
'''
Retrieves a list of TOP SCORING ALIGNMENTS
@param (None)
@return (List: TOP SCORING ALIGNMENTS)
'''
return self.alignments;
def getRegexFamilyScores( self ):
'''
Retrieves the Regex object for Pfam family scores
@param (None)
@return (Regex: Regex object for Pfam family scores)
'''
return PfamEntry.REGEX_FAMILY_SCORES;
def __str__( self ):
'''
Retrieves a string representation of parser entry class
@param (None)
@return (None)
'''
strBuffer= '';
strBuffer= strBuffer + "<HMMER>\n";
strBuffer= strBuffer + "\t<QUERY>%s</QUERY>\n" % ( self.getQuery() );
strBuffer= strBuffer + "\t<ACCESSION>%s</ACCESSION>?\n" % ( self.getAccession() );
strBuffer= strBuffer + "\t<DESCRIPTION>%s</DESCRIPTION>\n" % ( self.getDescription() );
strBuffer= strBuffer + "\t%s" % ( self.getFamilyScoresML() );
strBuffer= strBuffer + "\t%s" % ( self.getParsedDomainsML() );
strBuffer= strBuffer + "\t<ALIGNMENTS>%s</ALIGNMENTS>\n" % ( self.getAlignments() );
strBuffer= strBuffer + "</HMMER>";
return strBuffer;
class PfamParser:
'''
Prototype class for parsing hmmpfam output
@author: Wagied Davids
@date: 22.01.2004
@copyright: Wagied Davids, ?, 2004
'''
# DECLARATION OF STATIC DATA
HMM_HEADER_SEPERATOR= '-';
HMM_FILE= 'HMM file:';
HMM_SEQ_FILE= 'Sequence file:';
HMM_QUERY_SEQ= 'Query sequence:';
HMM_ACC= 'Accession:';
HMM_DESCRIPTION= 'Description:';
HMM_SCORE_HEADER= 'Scores for sequence family classification (score includes all domains):';
HMM_PARSED_DOMAINS= 'Parsed for domains:';
HMM_ALIGNMENT= 'Alignments of top-scoring domains:';
HMM_SPACE= ' ';
HMM_TAB= '\t';
HMM_NEWLINE= '\n';
HMM_ENTRY_SEPERATOR= '//';
HMM_ENTRY_COUNTER= 0;
# STATIC DATA STRUCTURE
# STATIC REGEX OBJECTS
REGEX_HMM_ENTRY= re.compile( r'(Query sequence:\s+\S.*\s+//)', re.MULTILINE | re.DOTALL );
REGEX_HMM_QUERY= re.compile( r'Query sequence:\s+(\S.*?)\s+Accession', re.MULTILINE | re.DOTALL );
REGEX_HMM_ACC= re.compile( r'Accession:\s+(\S.*?)\s+Description', re.MULTILINE | re.DOTALL );
REGEX_HMM_DESCRIPTION= re.compile( r'Description:\s+(\S.*?)\s+Scores', re.MULTILINE | re.DOTALL );
REGEX_HMM_SEQ_FAMILY_SCORES= re.compile( r'(Scores\s+\S.*)\s+Parsed', re.MULTILINE | re.DOTALL );
REGEX_HMM_PARSED_DOMAINS= re.compile( r'(Parsed for domains:\s+\S.*)\s+Alignments', re.MULTILINE | re.DOTALL );
REGEX_HMM_ALIGNMENTS= re.compile( r'(Alignments of top-scoring domains:\s+\S.*)\s+//', re.MULTILINE | re.DOTALL );
def __init__( self, filename= None ):
'''
Constructor for PfamParser
@param (Filename)
@return (None)
'''
if filename != None:
self.filename= filename;
self.debug= 0;
self.HMM_FAMILY_SCORES_HITS= {};
self.HMM_PARSED_DOMAINS= {};
else:
print 'Please enter filename';
sys.exit ( -1 );
def setDebug( self, debug= 0 ):
'''
Sets the debug level when parsing
debug= 0 No debug information
debug= 1 Pfam Entry level debug information
debug= 2 Regex level debug information
debug= 3 Incoming data
@param (Integer representing the verbosity/ debug level)
@return (None)
'''
self.debug= debug;
return ;
def getFilename( self ):
'''
Retrieves the filename
@param (None)
@return (String: Filename)
'''
return self.filename;
def parse( self ):
'''
MAIN PARSER FUNCTION
@param (None)
@return (None)
'''
try:
mode= 'r';
line= '';
data_entry= '';
HMM_QUERY= '';
HMM_ACC= '';
HMM_DESCRIPTION= '';
HMM_SCORES= '';
HMM_DOMAINS= '';
HMM_ALIGNMENTS= '';
# FAMILY SCORES
FAMILY_SCORES_TITLE= '';
FAMILY_SCORES_HEADER= '';
FAMILY_SCORES_INFO_LIST= [];
FAMILY_SCORES_LIST= [];
# PARSED DOMAIN HITS
HMM_DOMAINS= '';
PARSED_DOMAINS_INFO_LIST= [];
PARSED_DOMAINS_TITLE= '';
PARSED_DOMAINS_HEADER= '';
PARSED_DOMAINS_LIST= [];
# DOMAIN ALGINMENTS INFORMATION
DOMAIN_ALIGN_HEADER= '';
DOMAIN_ALIGNMENTS_LIST= [];
# Open file stream for reading
fopen= open( self.filename, mode );
while fopen:
line= fopen.readline();
if not line: break;
# Pfam ENTRY DETECTED
if line[ 0: len( PfamParser.HMM_QUERY_SEQ ) ] == PfamParser.HMM_QUERY_SEQ:
data_entry= data_entry + string.rstrip( line ) + PfamParser.HMM_SPACE;
while line[ 0: len( PfamParser.HMM_ENTRY_SEPERATOR ) ] != PfamParser.HMM_ENTRY_SEPERATOR:
line= fopen.readline();
if not line: break;
if line[ 0: len( PfamParser.HMM_ENTRY_SEPERATOR ) ] == PfamParser.HMM_ENTRY_SEPERATOR:
PfamParser.HMM_ENTRY_COUNTER= PfamParser.HMM_ENTRY_COUNTER + 1;
data_entry= data_entry + PfamParser.HMM_SPACE + PfamParser.HMM_ENTRY_SEPERATOR;
# EXTRACT PFAM ENTRY INFORMATION
# DEBUG INFO
if self.debug == 3:
print data_entry;
# MATCH ENTRY STRUCTURE
MATCH_HMM_ENTRY= PfamParser.REGEX_HMM_ENTRY.search( data_entry );
# DEBUG INFO
if self.debug == 2:
print "%s: %s" % ( MATCH_HMM_ENTRY, MATCH_HMM_ENTRY.re.pattern );
if MATCH_HMM_ENTRY != None:
# DEBUG INFO
if self.debug == 2:
print "%d. %s" % ( PfamParser.HMM_ENTRY_COUNTER, MATCH_HMM_ENTRY );
self.ENTRY= MATCH_HMM_ENTRY.group( 1 );
#print self.ENTRY;
# MATCH QUERY SEQUENCE
MATCH_HMM_QUERY= PfamParser.REGEX_HMM_QUERY.search( self.ENTRY );
# DEBUG INFO
if self.debug == 2:
print "%s: %s" % ( MATCH_HMM_QUERY, MATCH_HMM_QUERY.re.pattern );
if MATCH_HMM_QUERY != None:
HMM_QUERY= MATCH_HMM_QUERY.group( 1 );
#print HMM_QUERY, '-> ',
# MATCH ACCESSION
MATCH_HMM_ACC= PfamParser.REGEX_HMM_ACC.search( self.ENTRY );
# DEBUG INFO
if self.debug == 2:
print "%s: %s" % ( MATCH_HMM_ACC, MATCH_HMM_ACC.re.pattern );
if MATCH_HMM_ACC != None:
HMM_ACC= MATCH_HMM_ACC.group( 1 );
#print HMM_ACC;
# MATCH DESCRIPTION
MATCH_HMM_DESCRIPTION= PfamParser.REGEX_HMM_DESCRIPTION.search( self.ENTRY );
# DEBUG INFO
if self.debug == 2:
print "%s: %s" % ( MATCH_HMM_DESCRIPTION, MATCH_HMM_DESCRIPTION.re.pattern );
if MATCH_HMM_DESCRIPTION != None:
HMM_DESCRIPTION= MATCH_HMM_DESCRIPTION.group( 1 );
#print HMM_DESCRIPTION;
# MATCH FAMILY SCORES
# NB !!! --- MAXIMUM RECURSION LIMIT ---- !!!
try:
MATCH_HMM_SCORES= PfamParser.REGEX_HMM_SEQ_FAMILY_SCORES.search( self.ENTRY );
# DEBUG INFO
if self.debug == 2:
print "%s: %s" % ( MATCH_HMM_SCORES, MATCH_HMM_SCORES.re.pattern );
if MATCH_HMM_SCORES != None:
HMM_SCORES= MATCH_HMM_SCORES.group( 1 );
FAMILY_SCORES_INFO_LIST= string.split( HMM_SCORES, PfamParser.HMM_NEWLINE );
FAMILY_SCORES_TITLE= FAMILY_SCORES_INFO_LIST[0];
FAMILY_SCORES_HEADER= FAMILY_SCORES_INFO_LIST[1];
#FAMILY_SCORES_TABLINE= FAMILY_SCORES_INFO_LIST[2];
# NOTE: LAST ELEMENT = EMPTY SPACE
# COLLECT IN HASH
FAMILY_SCORES_LIST= FAMILY_SCORES_INFO_LIST[ 3: -1 ];
self.HMM_FAMILY_SCORES_HITS[ HMM_QUERY ]= FAMILY_SCORES_LIST;
except RuntimeError, run_err:
print "Error: MATCHING PFAM FAMILY SCORES!"
print run_err;
# MATCH PARSED DOMAIN INFORMATION
# NB !!! --- MAXIMUM RECURSION LIMIT ---- !!!
try:
MATCH_HMM_PARSED_DOMAINS= PfamParser.REGEX_HMM_PARSED_DOMAINS.search( self.ENTRY );
# DEBUG INFO
if self.debug == 2:
print "%s: %s" % ( MATCH_HMM_PARSED_DOMAINS, MATCH_HMM_PARSED_DOMAINS.re.pattern );
if MATCH_HMM_PARSED_DOMAINS != None:
HMM_DOMAINS= MATCH_HMM_PARSED_DOMAINS.group( 1 );
PARSED_DOMAINS_INFO_LIST= string.split( HMM_DOMAINS, PfamParser.HMM_NEWLINE );
PARSED_DOMAINS_TITLE= PARSED_DOMAINS_INFO_LIST[0];
PARSED_DOMAINS_HEADER= PARSED_DOMAINS_INFO_LIST[1];
#PARSED_DOMAINS_TABLINE= PARSED_DOMAINS_INFO_LIST[2];
# NOTE: LAST ELEMENT = EMPTY SPACE
# COLLECT IN HASH
PARSED_DOMAINS_LIST= PARSED_DOMAINS_INFO_LIST[ 3: -1 ];
self.HMM_PARSED_DOMAINS[ HMM_QUERY ]= PARSED_DOMAINS_LIST;
except RuntimeError, run_err:
print "Error: MATCHING PFAM PARSED DOMAIN INFORMATION!";
print run_err;
# MATCH DOMAIN ALIGNMENTS
# NB !!! --- MAXIMUM RECURSION LIMIT ---- !!!
try:
MATCH_HMM_ALIGNMENTS= PfamParser.REGEX_HMM_ALIGNMENTS.search( self.ENTRY );
# DEBUG INFO
if self.debug == 2:
print "%s: %s" % ( MATCH_HMM_ALIGNMENTS, MATCH_HMM_ALIGNMENTS.re.pattern );
if MATCH_HMM_ALIGNMENTS != None:
HMM_ALIGNMENTS= MATCH_HMM_ALIGNMENTS.group( 1 );
DOMAIN_ALIGNMENTS_INFO_LIST= string.split( HMM_ALIGNMENTS , "\n" );
DOMAIN_ALIGN_HEADER= DOMAIN_ALIGNMENTS_INFO_LIST[0];
DOMAIN_HIT_INFO= DOMAIN_ALIGNMENTS_INFO_LIST[1];
DOMAIN_ALIGNMENTS_LIST= DOMAIN_ALIGNMENTS_INFO_LIST[ 3:-2 ];
#print DOMAIN_ALIGNMENTS_LIST;
except RuntimeError, run_err:
print "Error: MATCHING PFAM DOMAIN ALIGNMENTS!";
print run_err;
# Construct Pfam Entry structure
Entry= PfamEntry( HMM_QUERY, HMM_ACC, HMM_DESCRIPTION, FAMILY_SCORES_LIST, PARSED_DOMAINS_LIST, DOMAIN_ALIGNMENTS_LIST);
# DEBUG INFO
if self.debug == 1:
print Entry;
#print "%s => %s" % ( Entry.getQuery(), Entry.getDescription() );
#print Entry.getFamilyScoresML();
#print Entry.getParsedDomainsML();
# CLEAN DATA VARIABLE
data_entry= '';
HMM_QUERY= '';
HMM_ACC= '';
HMM_DESCRIPTION= '';
# FAMILY SCORES INFORMATION
HMM_SCORES= '';
FAMILY_SCORES_INFO_LIST= [];
FAMILY_SCORES_TITLE= '';
FAMILY_SCORES_HEADER= '';
FAMILY_SCORES_LIST= [];
# PARSED DOMAINS INFORMATION
HMM_DOMAINS= '';
PARSED_DOMAINS_INFO_LIST= [];
PARSED_DOMAINS_TITLE= '';
PARSED_DOMAINS_HEADER= '';
PARSED_DOMAINS_LIST= [];
# DOMAIN INFORMATION
HMM_ALIGNMENTS= '';
DOMAIN_ALIGNMENTS_INFO_LIST= [];
DOMAIN_ALIGN_HEADER= '';
DOMAIN_HIT_INFO= '';
DOMAIN_ALIGNMENTS_LIST= [];
# ///////////////////////////////////////// ACCUMULATE DATA //////////////////////////////////////
else:
data_entry= data_entry + line ;
except IOError, io_err:
print io_err;
else:
fopen.close();
return ;
def getPfamHits( self ):
'''
Retrieves Pfam hits QUERY => HITS DATABASE
@param (None)
@return (Hash: Pfam hits QUERY => HITS DATABASE)
'''
return self.HMM_FAMILY_SCORES_HITS;
def getNoOfHits( self ):
'''
Retrieves the number of Pfam hits
@param (None)
@return (Integer: number of Pfam hits)
'''
return len( self.getPfamHits() );
def getPfamParsedDomains( self ):
'''
Retrieve Pfam parsed domains QUERY => HITS DATABASE
@param (None)
@return (Hash: Pfam parsed domains QUERY => HITS DATABASE)
'''
return self.HMM_PARSED_DOMAINS;
def getNoOfParsedDomains( self ):
'''
Retrieves the number of PARSED Pfam hits
@param (None)
@return (Integer: number of PARSED Pfam hits)
'''
return len( self.getPfamParsedDomains() );
def getRegexHMM_Entry( self ):
'''
Retrieves the Regex object for REGEX_HMM_ENTRY
@param (None)
@return (Regex: HMM_ENTRY)
'''
return PfamParser.REGEX_HMM_ENTRY;
def getRegexQuery( self ):
'''
Retrieves the Regex object for REGEX_HMM_QUERY
@param (None)
@return (Regex: REGEX_HMM_QUERY)
'''
return PfamParser.REGEX_HMM_QUERY;
def getRegexAccession( self ):
'''
Retrieves the Regex object for REGEX_HMM_ACC
@param (None)
@return (Regex: REGEX_HMM_ACC)
'''
return PfamParser.REGEX_HMM_ACC;
def getRegexDescription( self ):
'''
Retrieves the Regex object for REGEX_HMM_DESCRIPTION
@param (None)
@return (Regex: REGEX_HMM_DESCRIPTION)
'''
return PfamParser.REGEX_HMM_DESCRIPTION;
def getRegexFamilyScores( self ):
'''
Retrieves the Regex object for REGEX_HMM_SEQ_FAMILY_SCORES
@param (None)
@return (Regex: REGEX_HMM_SEQ_FAMILY_SCORES)
'''
return PfamParser.REGEX_HMM_SEQ_FAMILY_SCORES;
def getRegexParsedDomains( self ):
'''
Retrieves the Regex object for REGEX_HMM_DOMAINS
@param (None)
@return (Regex: REGEX_HMM_DOMAINS)
'''
return PfamParser.REGEX_HMM_PARSED_DOMAINS;
def getRegexAlignments( self ):
'''
Retrieves the Regex object for REGEX_HMM_ALIGNMENTS
@param (None)
@return (Regex: REGEX_HMM_ALIGNMENTS)
'''
return PfamParser.REGEX_HMM_ALIGNMENTS;
def __str__( self ):
'''
Retrieves a string representation of parser class
@param (None)
@return (String: Retrieves a string representation of parser class)
'''
strBuffer= 'ParserType: PfamParser';
strBuffer= strBuffer + "Filename: %s" % ( self.getFilename() );
return strBuffer;
-------------- next part --------------
#!/usr/bin/env python
###################################################################################################### */
# COPYRIGHT INFORMATION
# Pfam DOMAIN RESULTS PARSER
# @AUTHOR: Wagied Davids
# @DATE: 22.01.2004
###################################################################################################### */
import string
import PfamParser
# Module level re-name
PfamParser= PfamParser.PfamParser;
# DATA LOCATION
filename= 'hmmpfam_output.example';
# DATA STRUCTURE
PFAM_DB= {};
# Construct Parser
parser= PfamParser( filename );
# SET DEBUG LEVEL
parser.setDebug( 1 );
# parse document
parser.parse();
# retrieve Pfam hits
PFAM_DB= parser.getPfamParsedDomains();
counter= 1;
for QUERY in PFAM_DB.keys():
for HIT in PFAM_DB[ QUERY ]:
print "%d. %s => %s" % ( counter, QUERY, HIT );
counter= counter + 1;
More information about the Biopython-dev
mailing list