1 from ost
import settings
3 from xml.dom
import minidom
4 from ost
import io, seq
9 def __init__(self, aln, bit_score, score, evalue):
17 A positive match found by BLAST.
19 Each blast hit consists of one or more aligned patches.
22 def __init__(self, identifier, aligned_patches):
28 Parses the blast output and returns a list of BlastHits
32 for child
in node.childNodes:
33 if child.nodeType==child.TEXT_NODE:
37 def _GetValue(node, tag_name):
38 tag=node.getElementsByTagName(tag_name)
40 return _GetText(tag[0])
42 def _GetInt(node, tag_name):
43 return int(_GetValue(node, tag_name))
45 def _ParseHsp(query_id, hit_id, hsp):
46 bit_score=float(_GetValue(hsp,
'Hsp_bit-score'))
47 score=float(_GetValue(hsp,
'Hsp_score'))
48 evalue=float(_GetValue(hsp,
'Hsp_evalue'))
49 query_offset=_GetInt(hsp,
'Hsp_query-from')-1
50 hit_offset=_GetInt(hsp,
'Hsp_hit-from')-1
51 query_seq=seq.CreateSequence(str(query_id), str(_GetValue(hsp,
'Hsp_qseq')))
52 query_seq.offset=query_offset
53 hit_seq=seq.CreateSequence(str(hit_id), str(_GetValue(hsp,
'Hsp_hseq')))
54 hit_seq.offset=hit_offset
55 aln=seq.CreateAlignment(query_seq, hit_seq)
58 doc=minidom.parseString(string)
60 ost.LogError(
'Error while parsing BLAST output: %s' % str(e))
63 query_id=_GetValue(doc,
'BlastOutput_query-def')
64 for hit
in doc.getElementsByTagName(
'Hit'):
65 hit_id=_GetValue(hit,
'Hit_def')
66 hsps=hit.getElementsByTagName(
'Hsp')
67 aligned_patches=[_ParseHsp(query_id, hit_id, hsp)
for hsp
in hsps]
68 hits.append(
BlastHit(hit_id, aligned_patches))
84 def Blast(query, database, gap_open=11, gap_ext=1, matrix='BLOSUM62',
87 Runs a protein vs. protein blast search. The results are returned as a
88 list of :class:`BlastHit` instances.
90 :param query: the query sequence
91 :type query: :class:`seq.ConstSequenceHandle`
93 :param database: The filename of the sequence database. Make sure that
94 formatdb has been run on the database and the <database>.pin file exists.
95 :param matrix: The substitution matrix to be used. Must be one of 'BLOSUM45',
96 'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70'.
97 :param gap_open: Gap opening penalty. Note that only a subset of gap opening
98 penalties is supported for each substitutition matrix. Consult the blast
99 docs for more information.
100 :param gap_ext: Gap extension penalty. Only a subset of gap extension
101 penalties are supported for each of the substitution matrices. Consult the
102 blast docs for more information.
104 subst_mats=(
'BLOSUM45',
'BLOSUM62',
'BLOSUM80',
'PAM30',
'PAM70',)
105 if matrix
not in subst_mats:
106 raise ValueError(
'matrix must be one of %s' %
', '.join(subst_mats))
107 if not os.path.exists(
'%s.pin' % database):
108 raise IOError(
"Database %s does not exist" % database)
109 blastall_exe=settings.Locate(
'blastall', explicit_file_name=blast_location)
110 args=[blastall_exe,
'-d', database,
'-p',
'blastp',
111 '-m',
'7',
'-M', matrix,
'-G', str(gap_open),
'-E', str(gap_ext)]
112 ost.LogInfo(
'running BLAST (%s)' %
' '.join(args))
113 blast_pipe=subprocess.Popen(args, stderr=subprocess.PIPE,
114 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
115 stdout, stderr=blast_pipe.communicate(io.SequenceToString(query,
'fasta'))
117 pattern=re.compile(
r'^\[.*\]\s+ERROR:\s+(.*)')
118 lines=stderr.split(
'\n')
119 error_message=pattern.match(lines[0])
121 raise BlastError(error_message.group(1),
'\n'.join(lines[1:]))