1 from ost
import settings
3 from xml.dom
import minidom
4 from ost
import io, seq
10 An aligned patch, aka. HSP
14 The local alignment. Sequence offset of both sequences in the alignment are
15 set to the starting position in the query and target sequence, respectively.
17 :type: :class:`~ost.seq.AlignmentHandle`
19 .. attribute:: bit_score
21 The bit score of the HSP
29 The E-value of the HSP
33 The sequence identity of the HSP
35 def __init__(self, aln, bit_score, score, evalue, seqid):
44 A positive match found by BLAST.
46 Each blast hit consists of one or more HSPs, encoded by the
47 :class:`AlignedPatch` class.
49 .. attribute:: identifier
51 The identifier of the matching sequence
53 .. attribute:: aligned_patches
55 list of :class:`AlignedPatch` instances holding the actual HSPs.
57 def __init__(self, identifier, aligned_patches):
64 Parses the blast output and returns a list of BlastHits
65 setting no seqid_thres or evalue_thres, restores default behaviour without filtering
69 for child
in node.childNodes:
70 if child.nodeType==child.TEXT_NODE:
74 def _GetValue(node, tag_name):
75 tag=node.getElementsByTagName(tag_name)
77 return _GetText(tag[0])
79 def _GetInt(node, tag_name):
80 return int(_GetValue(node, tag_name))
82 def _ParseHsp(query_id, hit_id, hsp, tot_query_len, seqid_thres=0, evalue_thres=float(
"infinity")):
83 bit_score=float(_GetValue(hsp,
'Hsp_bit-score'))
84 score=float(_GetValue(hsp,
'Hsp_score'))
85 evalue=float(_GetValue(hsp,
'Hsp_evalue'))
87 identity=float(_GetValue(hsp,
'Hsp_identity'))
88 except AssertionError:
93 hsp_align_len=float(_GetValue(hsp,
'Hsp_align-len'))
94 seqid=identity/hsp_align_len
95 query_offset=_GetInt(hsp,
'Hsp_query-from')-1
96 hit_offset=_GetInt(hsp,
'Hsp_hit-from')-1
97 query_seq=seq.CreateSequence(str(query_id), str(_GetValue(hsp,
'Hsp_qseq')))
98 query_seq.offset=query_offset
99 hit_seq=seq.CreateSequence(str(hit_id), str(_GetValue(hsp,
'Hsp_hseq')))
100 hit_seq.offset=hit_offset
102 if seqid > float(seqid_thres)
and evalue < evalue_thres:
103 aln=seq.CreateAlignment(query_seq, hit_seq)
104 return AlignedPatch(aln, bit_score, score, evalue, seqid)
107 print str(e), query_seq, hit_seq
110 doc=minidom.parseString(string)
112 ost.LogError(
'Error while parsing BLAST output: %s' % str(e))
115 query_id=_GetValue(doc,
'BlastOutput_query-def')
116 tot_query_len=_GetValue(doc,
'BlastOutput_query-len')
117 for hit
in doc.getElementsByTagName(
'Hit'):
118 hit_id=_GetValue(hit,
'Hit_def')
119 hsps=hit.getElementsByTagName(
'Hsp')
120 aligned_patches=[_ParseHsp(query_id, hit_id, hsp, tot_query_len, seqid_thres, evalue_thres)
for hsp
in hsps]
121 hits.append(
BlastHit(hit_id, aligned_patches))
139 Create a blast DB from a fasta file
141 :param infasta: the pdb fasta from which the database will be created
142 :type infasta: :class:`string`
144 :param dbout: output location for blastDB file
145 :type dbout: :class:`string`
151 exe=settings.Locate(
'formatdb')
152 args=[exe,
'-i', infasta,
'-n', dbout]
155 exe=settings.Locate(
'makeblastdb')
156 args=[exe,
'-in', infasta,
'-out', dbout,
'-dbtype',
'prot']
158 raise RuntimeError(
'could not find makeblastdb/formatdb executable')
160 if os.path.basename(mkdb_cmd)==
'makeblastdb':
161 exe=settings.Locate(
'makeblastdb',explicit_file_name=mkdb_cmd)
162 args=[exe,
'-in', infasta,
'-out', dbout,
'-dbtype',
'prot']
163 elif os.path.basename(mkdb_cmd)==
'formatdb':
164 exe=settings.Locate(
'formatdb',explicit_filename=mkdb_cmd)
165 args=[exe,
'-i', infasta,
'-n', dbout]
167 raise IOError(
'mkdb command must either be the path to formatdb or makeblastdb!')
170 ost.LogInfo(
'creating blast DB (%s)' % cmd)
175 Returns the version of the BLAST executable, e.g. 2.2.24 as a string
179 blast_exe=settings.Locate(
'blastall',explicit_file_name=blast_location)
182 blast_exe=settings.Locate(
'blastp', explicit_file_name=blast_location)
184 raise RuntimeError(
'could not find blast executable')
186 if os.path.basename(blast_exe)==
'blastall':
188 pattern=re.compile(
r'\s*blastall (\d+\.\d+\.\d+)\s+arguments:\s*')
191 args=[blast_exe,
'-version']
192 pattern=re.compile(
r'\s*Package: blast (\d+\.\d+\.\d+),\s+')
194 blast_pipe=subprocess.Popen(args, stdout=subprocess.PIPE,
195 stderr=subprocess.PIPE)
196 lines=blast_pipe.stdout.readlines()
199 m=pattern.match(line)
202 raise IOError(
"could not determine blast version for '%s'" % blast_exe)
206 def Blast(query, database, gap_open=11, gap_ext=1, matrix='BLOSUM62',
207 blast_location=
None, outfmt=0, filter_low_complexity=
True):
209 Runs a protein vs. protein blast search. The results are returned as a
210 list of :class:`BlastHit` instances.
212 :param query: the query sequence
213 :type query: :class:`seq.ConstSequenceHandle`
215 :param database: The filename of the sequence database. Make sure that
216 formatdb has been run on the database and the <database>.pin file exists.
217 :param matrix: The substitution matrix to be used. Must be one of 'BLOSUM45',
218 'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70'.
219 :param gap_open: Gap opening penalty. Note that only a subset of gap opening
220 penalties is supported for each substitutition matrix. Consult the blast
221 docs for more information.
222 :param gap_ext: Gap extension penalty. Only a subset of gap extension
223 penalties are supported for each of the substitution matrices. Consult the
224 blast docs for more information.
225 :param outfmt: output format, where '0' corresponds to default output (parsed blast output and 1 to raw output)
226 :param filter_low_complexity: Mask off segments of the query sequence that
227 have low compositional complexity, as determined by the SEG program of
228 Wootton & Federhen (Computers and Chemistry, 1993)
230 subst_mats=(
'BLOSUM45',
'BLOSUM62',
'BLOSUM80',
'PAM30',
'PAM70',)
231 if matrix
not in subst_mats:
232 raise ValueError(
'matrix must be one of %s' %
', '.join(subst_mats))
233 if not os.path.exists(
'%s.pin' % database)
and not os.path.exists(
'%s.pal' % database):
234 raise IOError(
"Database %s does not exist" % database)
235 if blast_location!=
None and not os.path.exists(blast_location):
236 ost.LogScript(
'Could not find %s' %blast_location)
238 if blast_location==
None:
240 blast_exe=settings.Locate(
'blastall')
243 blast_exe=settings.Locate(
'blastp')
245 raise RuntimeError(
'could not find blast executable')
247 blast_exe=settings.Locate(os.path.basename(blast_location),explicit_file_name=blast_location)
249 if os.path.basename(blast_exe)==
'blastall':
250 args=[blast_exe,
'-d', database,
'-p',
'blastp',
251 '-m',
'7',
'-M', matrix,
'-G', str(gap_open),
'-E', str(gap_ext)]
252 if filter_low_complexity==
False:
257 complexity_opt=
'-seg'
258 if filter_low_complexity==
True:
262 args=[blast_exe,
'-db', database,
'-matrix', matrix,
263 '-gapopen', str(gap_open),
'-gapextend', str(gap_ext),
'-outfmt',
'5', complexity_opt, complexity_arg ]
265 ost.LogInfo(
'running BLAST (%s)' %
' '.join(args))
266 blast_pipe=subprocess.Popen(args, stderr=subprocess.PIPE,
267 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
268 if isinstance(query, str):
269 stdout, stderr=blast_pipe.communicate(query)
271 stdout, stderr=blast_pipe.communicate(io.SequenceToString(query,
'fasta'))
274 pattern=re.compile(
r'^\[.*\]\s+ERROR:\s+(.*)')
275 lines=stderr.split(
'\n')
276 error_message=pattern.match(lines[0])
278 raise BlastError(error_message.group(1),
'\n'.join(lines[1:]))