1 from ost
import settings
3 from xml.dom
import minidom
4 from ost
import io, seq
11 An aligned patch, aka. HSP
15 The local alignment. Sequence offset of both sequences in the alignment are
16 set to the starting position in the query and target sequence, respectively.
18 :type: :class:`~ost.seq.AlignmentHandle`
20 .. attribute:: bit_score
22 The bit score of the HSP
30 The E-value of the HSP
32 def __init__(self, aln, bit_score, score, evalue):
40 A positive match found by BLAST.
42 Each blast hit consists of one or more HSPs, encoded by the
43 :class:`AlignedPatch` class.
45 .. attribute:: identifier
47 The identifier of the matching sequence
49 .. attribute:: aligned_patches
51 list of :class:`AlignedPatch` instances holding the actual HSPs.
53 def __init__(self, identifier, aligned_patches):
59 Raised when something goes wrong during parsing/execution of the blast
64 Short explanation of the problem
66 .. attribute:: details
68 If set, explains in more detail what caused the error. Might be empty.
82 Parses the blast output and returns a list of :class:`BlastHit` instances.
84 :raises: :class:`BlastError` if the output could not be parsed.
86 This functions is only capable of dealing with the BLAST XML output.
90 for child
in node.childNodes:
91 if child.nodeType==child.TEXT_NODE:
95 def _GetValue(node, tag_name):
96 tag=node.getElementsByTagName(tag_name)
98 return _GetText(tag[0])
100 def _GetInt(node, tag_name):
101 return int(_GetValue(node, tag_name))
103 def _ParseHsp(query_id, hit_id, hsp):
104 bit_score=float(_GetValue(hsp,
'Hsp_bit-score'))
105 score=float(_GetValue(hsp,
'Hsp_score'))
106 evalue=float(_GetValue(hsp,
'Hsp_evalue'))
107 query_offset=_GetInt(hsp,
'Hsp_query-from')-1
108 hit_offset=_GetInt(hsp,
'Hsp_hit-from')-1
109 query_seq=seq.CreateSequence(str(query_id), str(_GetValue(hsp,
'Hsp_qseq')))
110 query_seq.offset=query_offset
111 hit_seq=seq.CreateSequence(str(hit_id), str(_GetValue(hsp,
'Hsp_hseq')))
112 hit_seq.offset=hit_offset
113 aln=seq.CreateAlignment(query_seq, hit_seq)
116 doc=minidom.parseString(string)
118 raise BlastError(
'Error while parsing BLAST output: %s' % str(e),
'')
120 query_id=_GetValue(doc,
'BlastOutput_query-def')
121 for hit
in doc.getElementsByTagName(
'Hit'):
122 hit_id=_GetValue(hit,
'Hit_def')
123 hsps=hit.getElementsByTagName(
'Hsp')
124 aligned_patches=[_ParseHsp(query_id, hit_id, hsp)
for hsp
in hsps]
125 hits.append(
BlastHit(hit_id, aligned_patches))
130 Returns the version of the BLAST executable, e.g. 2.2.24 as a string
132 blastall_exe=settings.Locate(
'blastall', explicit_file_name=blast_location)
133 blast_pipe=subprocess.Popen(blastall_exe, stdout=subprocess.PIPE,
134 stderr=subprocess.PIPE)
135 lines=blast_pipe.stdout.readlines()
136 pattern=re.compile(
r'blastall (\d+\.\d+\.\d+)\s+arguments:\s*')
138 m=pattern.match(line)
141 raise IOError(
"could not determine blast version for '%s'" % blastall_exe)
143 def Blast(query, database, gap_open=11, gap_ext=1, matrix='BLOSUM62',
144 blast_location=
None):
146 Runs a protein vs. protein blast search. The results are returned as a
147 list of :class:`BlastHit` instances.
149 :param query: the query sequence
150 :type query: :class:`seq.ConstSequenceHandle`
152 :param database: The filename of the sequence database. Make sure that
153 formatdb has been run on the database and the <database>.pin or
154 <database>.pal file exists.
155 :param matrix: The substitution matrix to be used. Must be one of 'BLOSUM45',
156 'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70'.
157 :param gap_open: Gap opening penalty. Note that only a subset of gap opening
158 penalties is supported for each substitutition matrix. Consult the blast
159 docs for more information.
160 :param gap_ext: Gap extension penalty. Only a subset of gap extension
161 penalties are supported for each of the substitution matrices. Consult the
162 blast docs for more information.
164 :raises: :class:`~ost.settings.FileNotFound` if the BLAST executable could not
166 :raises: :class:`BlastError` if there was a problem during execution of BLAST.
167 :raises: :class:`ValueError` if the substitution matrix is invalid
168 :raises: :class:`IOError` if the database does not exist
170 subst_mats=(
'BLOSUM45',
'BLOSUM62',
'BLOSUM80',
'PAM30',
'PAM70',)
171 if matrix
not in subst_mats:
172 raise ValueError(
'matrix must be one of %s' %
', '.join(subst_mats))
173 if not os.path.exists(
'%s.pin' % database)
and not os.path.exists(
'%s.pal' % database):
174 raise IOError(
"Database %s does not exist" % database)
175 blastall_exe=settings.Locate(
'blastall', explicit_file_name=blast_location)
176 args=[blastall_exe,
'-d', database,
'-p',
'blastp',
177 '-m',
'7',
'-M', matrix,
'-G', str(gap_open),
'-E', str(gap_ext)]
178 ost.LogInfo(
'running BLAST (%s)' %
' '.join(args))
179 blast_pipe=subprocess.Popen(args, stderr=subprocess.PIPE,
180 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
181 stdout, stderr=blast_pipe.communicate(io.SequenceToString(query,
'fasta'))
183 pattern=re.compile(
r'^\[.*\]\s+ERROR:\s+(.*)')
184 lines=stderr.split(
'\n')
185 error_message=pattern.match(lines[0])
187 raise BlastError(error_message.group(1),
'\n'.join(lines[1:]))