OpenStructure
Loading...
Searching...
No Matches
hhblits2.py
Go to the documentation of this file.
1'''HHblits wrapper classes and functions.
2'''
3
4import subprocess
5import datetime
6import os
7import shutil
8import tempfile
9
10import ost
11from ost import settings, seq
12from ost.bindings import utils
13
15 """
16 A hit found by HHblits
17
18 .. attribute:: hit_id
19
20 String identifying the hit
21
22 :type: :class:`str`
23
24 .. attribute:: aln
25
26 Pairwise alignment containing the aligned part between the query and the
27 target. First sequence is the query, the second sequence the target.
28
29 :type: :class:`~ost.seq.AlignmentHandle`
30
31 .. attribute:: score
32
33 The alignment score
34
35 :type: :class:`float`
36
37 .. attribute:: ss_score
38
39 The secondary structure score
40
41 :type: :class:`float`
42
43 .. attribute:: evalue
44
45 The E-value of the alignment
46
47 :type: :class:`float`
48
49 .. attribute:: pvalue
50
51 The P-value of the alignment
52
53 :type: :class:`float`
54
55 .. attribute:: prob
56
57 The probability of the alignment (between 0 and 100)
58
59 :type: :class:`float`
60 """
61 def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
62 self.hit_id = hit_id
63 self.aln = aln
64 self.score = score
65 self.ss_score = ss_score
66 self.evalue = evalue
67 self.prob = prob
68 self.pvalue = pvalue
69
71 """Stats from the beginning of search output.
72
73 .. attribute:: query
74
75 The name of the query sequence
76
77 :type: :class:`str`
78
79 .. attribute:: match_columns
80
81 Total of aligned Match columns
82
83 :type: :class:`int`
84
85 .. attribute:: n_eff
86
87 Value of the ``-neff`` option
88
89 :type: :class:`float`
90
91 .. attribute:: searched_hmms
92
93 Number of profiles searched
94
95 :type: :class:`int`
96
97 .. attribute:: date
98
99 Execution date
100
101 :type: :class:`datetime.datetime`
102
103 .. attribute:: command
104
105 Command used to run
106
107 :type: :class:`str`
108 """
109 def __init__(self):
110 self.query = ''
112 self.n_eff = 0
114 self.date = None
115 self.command = ''
116
118 '''Fetch header content.
119
120 First, we seek the start of the identifier, that is, the first whitespace
121 after the hit number + 1. Since the identifier may contain whitespaces
122 itself, we cannot split the whole line
123
124 :param line: Line from the output header.
125 :type line: :class:`str`
126
127 :return: Hit information and query/template offsets
128 :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
129 '''
130 for i in range(0, len(line)):
131 if line[i].isdigit():
132 break
133 for i in range(i, len(line)):
134 if line[i] == ' ':
135 break
136 assert len(line)-i >= 31 and line[i+1] != ' '
137 hit_id = line[i+1:i+31].strip()
138 fields = line[i+32:].split()
139 prob = float(fields[0])
140 evalue = float(fields[1])
141 pvalue = float(fields[2])
142 score = float(fields[3])
143 ss_score = float(fields[4])
144 offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
145 return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
146 offsets)
147
149 """
150 Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
151 the header of the search results and a list of hits.
152
153 :param output: Iterable containing the lines of the HHblits output file
154 :type output: iterable (e.g. an open file handle)
155
156 :return: a tuple of the header of the search results and the hits
157 :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
158 """
159 lines = iter(output)
160 def _ParseHeaderSection(lines):
161 value_start_column = 14
162 date_pattern = '%a %b %d %H:%M:%S %Y'
163 header = HHblitsHeader()
164 line = next(lines)
165 assert line.startswith('Query')
166 header.query = line[value_start_column:].strip()
167 line = next(lines)
168 assert line.startswith('Match_columns')
169 header.match_columns = int(line[value_start_column:].strip())
170
171 line = next(lines)
172 assert line.startswith('No_of_seqs')
173
174 line = next(lines)
175 assert line.startswith('Neff')
176 header.n_eff = float(line[value_start_column:].strip())
177
178 line = next(lines)
179 assert line.startswith('Searched_HMMs')
180 header.searched_hmms = int(line[value_start_column:].strip())
181
182 line = next(lines)
183 assert line.startswith('Date')
184 value = line[value_start_column:].strip()
185 header.date = datetime.datetime.strptime(value, date_pattern)
186
187 line = next(lines)
188 assert line.startswith('Command')
189 header.command = line[value_start_column:].strip()
190
191 line = next(lines)
192 assert len(line.strip()) == 0
193 return header
194
195 def _ParseTableOfContents(lines):
196 line = next(lines)
197 assert line.startswith(' No Hit')
198 hits = []
199 while True:
200 line = next(lines)
201 if len(line.strip()) == 0:
202 return hits
203 hits.append(ParseHeaderLine(line))
204 return hits
205
206 def _ParseResultBody(query_id, hits, lines):
207 entry_index = None
208 query_str = ''
209 templ_str = ''
210 def _MakeAln(query_id, hit_id, query_string, templ_string,
211 q_offset, t_offset):
212 s1 = seq.CreateSequence(query_id, query_string)
213 s1.offset = q_offset-1
214 s2 = seq.CreateSequence(hit_id, templ_string)
215 s2.offset = t_offset-1
216 return seq.CreateAlignment(s1, s2)
217 try:
218 while True:
219 # Lines which we are interested in:
220 # - "Done!" -> end of list
221 # - "No ..." -> next item in list
222 # - "T <hit_id> <start> <data> <end>"
223 # - "Q <query_id> <start> <data> <end>"
224 # -> rest is to be skipped
225 line = next(lines)
226 if len(line.strip()) == 0:
227 continue
228 if line.startswith('Done!'):
229 if len(query_str) > 0:
230 hits[entry_index][0].aln = _MakeAln(\
231 query_id, hits[entry_index][0].hit_id,
232 query_str, templ_str, *hits[entry_index][1])
233 return [h for h, o in hits]
234 if line.startswith('No '):
235 if len(query_str) > 0:
236 hits[entry_index][0].aln = _MakeAln(\
237 query_id, hits[entry_index][0].hit_id,
238 query_str, templ_str, *hits[entry_index][1])
239 entry_index = int(line[3:].strip())-1
240 line = next(lines)
241 hits[entry_index][0].hit_id = line[1:].strip()
242 query_str = ''
243 templ_str = ''
244 # skip the next line. It doesn't contain information we
245 # don't already know
246 next(lines)
247 continue
248 assert entry_index != None
249 # Skip all "T ..." and "Q ..." lines besides the one we want
250 if line[1:].startswith(' Consensus'):
251 continue
252 if line[1:].startswith(' ss_pred'):
253 continue
254 if line[1:].startswith(' ss_conf'):
255 continue
256 if line[1:].startswith(' ss_dssp'):
257 continue
258 if line.startswith('T '):
259 for start_pos in range(22, len(line)):
260 if line[start_pos].isalpha() or line[start_pos] == '-':
261 break
262 end_pos = line.find(' ', start_pos)
263 # this can fail if we didn't skip all other "T ..." lines
264 if end_pos == -1:
265 error_str = "Unparsable line '%s' for entry No %d" \
266 % (line.strip(), entry_index + 1)
267 raise AssertionError(error_str)
268 templ_str += line[start_pos:end_pos]
269 if line.startswith('Q '):
270 for start_pos in range(22, len(line)):
271 if line[start_pos].isalpha() or line[start_pos] == '-':
272 break
273 end_pos = line.find(' ', start_pos)
274 # this can fail if we didn't skip all other "Q ..." lines
275 if end_pos == -1:
276 error_str = "Unparsable line '%s' for entry No %d" \
277 % (line.strip(), entry_index + 1)
278 raise AssertionError(error_str)
279 query_str += line[start_pos:end_pos]
280 except StopIteration:
281 if len(query_str) > 0:
282 hits[entry_index][0].aln = _MakeAln(query_id,
283 hits[entry_index][0].hit_id,
284 query_str, templ_str,
285 *hits[entry_index][1])
286 return [h for h, o in hits]
287 header = _ParseHeaderSection(lines)
288 # parse the table of contents. This is neccessary as some of the properties
289 # (i.e. start of alignment) we need are only given there. From the TOC we
290 # create a list of hits that is then further filled with data when we parse
291 # the actual result body
292 hits = _ParseTableOfContents(lines)
293 return header, _ParseResultBody(header.query, hits, lines)
294
295def ParseA3M(a3m_file):
296 '''
297 Parse secondary structure information and the multiple sequence alignment
298 out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
299
300 :param a3m_file: Iterable containing the lines of the A3M file
301 :type a3m_file: iterable (e.g. an open file handle)
302
303 :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
304 (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
305 '''
306 profile_dict = dict()
307 state = 'NONE'
308 pred_seq_txt = ''
309 conf_seq_txt = ''
310 msa_seq = list()
311 msa_head = list()
312 for line in a3m_file:
313 if len(line.rstrip()) == 0:
314 continue
315 elif line.startswith('>ss_pred'):
316 state = 'sspred'
317 continue
318 elif line.startswith('>ss_conf'):
319 state = 'ssconf'
320 continue
321 elif line[0] == '>':
322 if state == 'ssconf' or state == 'msa':
323 msa_seq.append('')
324 msa_head.append(line[1:].rstrip())
325 else:
326 raise IOError('The A3M file is missing the "ss_conf" section')
327 state = 'msa'
328 continue
329
330 if state == 'sspred':
331 pred_seq_txt += line.rstrip()
332 elif state == 'ssconf':
333 conf_seq_txt += line.rstrip()
334 elif state == 'msa':
335 msa_seq[len(msa_seq)-1] += line.rstrip()
336
337 profile_dict['ss_pred'] = list()
338 profile_dict['ss_conf'] = list()
339 for i in range(0, len(pred_seq_txt)):
340 profile_dict['ss_pred'].append(pred_seq_txt[i])
341 profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
342
343 # post processing
344 # MSA
345 profile_dict['msa'] = None
346 if len(msa_seq) > 1:
347 t = msa_seq[0]
348 al = seq.AlignmentList()
349 for i in range(1, len(msa_seq)):
350 qs = ''
351 ts = ''
352 k = 0
353 for c in msa_seq[i]:
354 if c.islower():
355 qs += '-'
356 ts += c.upper()
357 else:
358 qs += t[k]
359 ts += c
360 k += 1
361 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
362 seq.CreateSequence(msa_head[i], ts))
363 al.append(nl)
364 profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
365 al, seq.CreateSequence(msa_head[0], t))
366 return profile_dict
367
368
369def ParseHHM(profile):
370 '''
371 Parse secondary structure information and the MSA out of an HHM profile as
372 produced by :meth:`HHblits.A3MToProfile`.
373
374 :param profile: Opened file handle holding the profile.
375 :type profile: :class:`file`
376
377 :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
378 (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
379 "consensus" (:class:`~ost.seq.SequenceHandle`).
380 '''
381 profile_dict = dict()
382 state = 'NONE'
383 pred_seq_txt = ''
384 conf_seq_txt = ''
385 consensus_txt = ''
386 msa_seq = list()
387 msa_head = list()
388 for line in profile:
389 if len(line.rstrip()) == 0:
390 continue
391 if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
392 state = 'sspred'
393 continue
394 elif line.rstrip() == '>ss_conf PSIPRED confidence values':
395 state = 'ssconf'
396 continue
397 elif line.rstrip() == '>Consensus':
398 state = 'consensus'
399 continue
400 elif line[0] == '>':
401 if state == 'consensus' or state == 'msa':
402 msa_seq.append('')
403 msa_head.append(line[1:].rstrip())
404 else:
405 raise IOError('Profile file "%s" is missing ' % profile.name+
406 'the "Consensus" section')
407 state = 'msa'
408 continue
409 elif line[0] == '#':
410 state = 'NONE'
411 continue
412
413 if state == 'sspred':
414 pred_seq_txt += line.rstrip()
415 elif state == 'ssconf':
416 conf_seq_txt += line.rstrip()
417 elif state == 'msa':
418 msa_seq[len(msa_seq)-1] += line.rstrip()
419 elif state == 'consensus':
420 consensus_txt += line.rstrip()
421
422 profile_dict['ss_pred'] = list()
423 profile_dict['ss_conf'] = list()
424 for i in range(0, len(pred_seq_txt)):
425 profile_dict['ss_pred'].append(pred_seq_txt[i])
426 profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
427
428 # post processing
429 # MSA
430 profile_dict['msa'] = None
431 if len(msa_seq):
432 t = msa_seq[0]
433 al = seq.AlignmentList()
434 for i in range(1, len(msa_seq)):
435 qs = ''
436 ts = ''
437 k = 0
438 for c in msa_seq[i]:
439 if c.islower():
440 qs += '-'
441 ts += c.upper()
442 else:
443 qs += t[k]
444 ts += c
445 k += 1
446 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
447 seq.CreateSequence(msa_head[i], ts))
448 al.append(nl)
449 profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
450 al, seq.CreateSequence(msa_head[0], t))
451 #print profile_dict['msa'].ToString(80)
452 # Consensus
453 profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
454
455 return profile_dict
456
457
459 """
460 Initialise a new HHblits "search" for the given query. Query may either
461 be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
462 query is the actual query sequence, in the latter case, the query is the
463 filename to the file containing the query.
464
465 :param query: Query sequence as file or sequence.
466 :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
467 :param hhsuite_root: Path to the top-level directory of your hhsuite
468 installation.
469 :type hhsuite_root: :class:`str`
470 :param hhblits_bin: Name of the hhblits binary. Will only be used if
471 :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
472 :type hhblits_bin: :class:`str`
473 :param working_dir: Directory for temporary files. Will be created if not
474 present but **not** automatically deleted.
475 :type working_dir: :class:`str`
476 """
477 OUTPUT_PREFIX = 'query_hhblits'
478 def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
479 self.query = query
480 self.hhsuite_root = hhsuite_root
481 if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
482 self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
483 self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
484 else:
485 self.hhblits_bin = settings.Locate('hhblits',
486 explicit_file_name=hhblits_bin)
487 self.bin_dir = os.path.dirname(self.hhblits_bin)
488 # guess root folder (note: this may fail in future)
489 self.hhsuite_root = os.path.dirname(self.bin_dir)
490 self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh')
491 if working_dir:
492 self.needs_cleanup = False
493 self.working_dir = working_dir
494 if not os.path.exists(working_dir):
495 os.mkdir(working_dir)
496 if isinstance(query, str):
497 self.filename = os.path.abspath(os.path.join(
498 self.working_dir, os.path.basename(query)))
499 if self.filename != os.path.abspath(query):
500 shutil.copy(query, self.filename)
501 else:
502 self.filename = os.path.abspath(os.path.join(self.working_dir,
503 '%s.fasta' % HHblits.OUTPUT_PREFIX))
504 ost.io.SaveSequence(query, self.filename)
505 else:
506 self.needs_cleanup = True
507 if isinstance(query, str):
508 self.working_dir = tempfile.mkdtemp()
509 self.filename = os.path.abspath(os.path.join(
510 self.working_dir, os.path.basename(query)))
511 shutil.copy(query, self.filename)
512 else:
513 tmp_dir = utils.TempDirWithFiles((query,))
514 self.working_dir = tmp_dir.dirname
515 self.filename = tmp_dir.files[0]
516
517 def BuildQueryMSA(self, nrdb, options={}, a3m_file=None):
518 """Builds the MSA for the query sequence.
519
520 This function directly uses hhblits of hhtools. While in theory it would
521 be possible to do this by PSI-blasting on our own, hhblits is supposed
522 to be faster. Also it is supposed to prevent alignment corruption. The
523 alignment corruption is caused by low-scoring terminal alignments that
524 draw the sequences found by PSI-blast away from the optimum. By removing
525 these low scoring ends, part of the alignment corruption can be
526 suppressed.
527
528 hhblits does **not** call PSIPRED on the MSA to predict the secondary
529 structure of the query sequence. This is done by addss.pl of hhtools.
530 The predicted secondary structure is stored together with the sequences
531 identified by hhblits.
532
533 The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
534 already produced, hhblits is not called again and the existing file path
535 is returned.
536
537 :param nrdb: Database to be align against; has to be an hhblits database
538 :type nrdb: :class:`str`
539
540 :param options: Dictionary of options to *hhblits*, one "-" is added in
541 front of every key. Boolean True values add flag without
542 value. Merged with default options {'cpu': 1, 'n': 1},
543 where 'n' defines the number of iterations.
544 :type options: :class:`dict`
545
546 :param a3m_file: a path of a3m_file to be used, optional
547 :type a3m_file: :class:`str`
548
549 :return: The path to the A3M file containing the MSA
550 :rtype: :class:`str`
551 """
552 if a3m_file is None:
553 a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
554 else:
555 a3m_file = os.path.abspath(a3m_file)
556 if os.path.exists(a3m_file):
557 ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
558 return a3m_file
559 ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
560 full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
561 os.path.split(nrdb)[1])
562 # create MSA
563 opts = {'cpu' : 1, # no. of cpus used
564 'n' : 1} # no. of iterations
565 opts.update(options)
566 opt_cmd, _ = _ParseOptions(opts)
567 hhblits_cmd = '%s -e 0.001 -i %s -oa3m %s -d %s %s' % \
568 (self.hhblits_bin, self.filename, a3m_file, full_nrdb,
569 opt_cmd)
570 job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir,
571 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
572 sout, _ = job.communicate()
573 lines = sout.decode().splitlines()
574 for line in lines:
575 ost.LogVerbose(line.strip())
576 if not os.path.exists(a3m_file):
577 ost.LogWarning('Building query profile failed, no output')
578 return a3m_file
579 # add secondary structure annotation
580 addss_cmd = "%s %s" % (os.path.join(self.hhsuite_root,
581 'lib/hh/scripts/addss.pl'),
582 a3m_file)
583 env = dict(os.environ)
584 env.update({'PERL5LIB' : os.path.join(self.hhsuite_root,
585 'lib/hh/scripts'),
586 'HHLIB' : self.hhlib_dir,
587 'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
588 os.environ['PATH'])})
589 job = subprocess.Popen(addss_cmd, shell=True, cwd=self.working_dir,
590 env=env, stdout=subprocess.PIPE,
591 stderr=subprocess.PIPE)
592 sout, serr = job.communicate()
593 lines = sout.decode().splitlines()
594 for line in lines:
595 if 'error' in line.lower():
596 ost.LogWarning('Predicting secondary structure for MSA '+
597 '(%s) failed, on command: %s' % (a3m_file, line))
598 return a3m_file
599 return a3m_file
600
601 def A3MToProfile(self, a3m_file, hhm_file=None):
602 """
603 Converts the A3M alignment file to a hhm profile. If hhm_file is not
604 given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
605
606 The produced HHM file can be parsed by :func:`ParseHHM`.
607
608 If the file was already produced, the existing file path is returned
609 without recomputing it.
610
611 :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
612 :type a3m_file: :class:`str`
613
614 :param hhm_file: Desired output file name
615 :type hhm_file: :class:`str`
616
617 :return: Path to the profile file
618 :rtype: :class:`str`
619 """
620 hhmake = os.path.join(self.bin_dir, 'hhmake')
621 if not hhm_file:
622 hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
623 if os.path.exists(hhm_file):
624 return hhm_file
625 ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
626 os.putenv('HHLIB', self.hhlib_dir)
627 job = subprocess.Popen('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
628 shell=True, stdout=subprocess.PIPE,
629 stderr=subprocess.PIPE)
630 sout, serr = job.communicate()
631 lines = serr.decode().splitlines()
632 for line in lines:
633 ost.LogWarning(line)
634 lines = sout.decode().splitlines()
635 for line in lines:
636 ost.LogVerbose(line)
637 if job.returncode !=0:
638 raise IOError('could not convert a3m to hhm file')
639 return hhm_file
640
641 def A3MToCS(self, a3m_file, cs_file=None, options={}):
642 """
643 Converts the A3M alignment file to a column state sequence file. If
644 cs_file is not given, the output file will be set to
645 <:attr:`a3m_file`-basename>.seq219.
646
647 If the file was already produced, the existing file path is returned
648 without recomputing it.
649
650 :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
651 :type a3m_file: :class:`str`
652
653 :param cs_file: Output file name (may be omitted)
654 :type cs_file: :class:`str`
655
656 :param options: Dictionary of options to *cstranslate*, one "-" is added
657 in front of every key. Boolean True values add flag
658 without value.
659 :type options: :class:`dict`
660
661 :return: Path to the column state sequence file
662 :rtype: :class:`str`
663 """
664 cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
665 if not cs_file:
666 cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
667 if os.path.exists(cs_file):
668 return cs_file
669 opt_cmd, _ = _ParseOptions(options)
670 cs_cmd = '%s -i %s -o %s %s' % (
671 cstranslate,
672 os.path.abspath(a3m_file),
673 os.path.abspath(cs_file),
674 opt_cmd)
675 ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
676 job = subprocess.Popen(cs_cmd, shell=True, cwd=self.working_dir,
677 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
678 sout, _ = job.communicate()
679 if b'Wrote abstract state sequence to' in sout:
680 return cs_file
681
682 ost.LogWarning('Creating column state sequence file (%s) failed' % \
683 cs_file)
684
685 def Cleanup(self):
686 """Delete temporary data.
687
688 Delete temporary data if no working dir was given. Controlled by
689 :attr:`needs_cleanup`.
690 """
691 if self.needs_cleanup and os.path.exists(self.working_dir):
692 shutil.rmtree(self.working_dir)
693
694 def CleanupFailed(self):
695 '''In case something went wrong, call to make sure everything is clean.
696
697 This will delete the working dir independently of :attr:`needs_cleanup`.
698 '''
699 store_needs_cleanup = self.needs_cleanup
700 self.needs_cleanup = True
701 self.Cleanup()
702 self.needs_cleanup = store_needs_cleanup
703
704 def Search(self, a3m_file, database, options={}, prefix=''):
705 """
706 Searches for templates in the given database. Before running the search,
707 the hhm file is copied. This makes it possible to launch several hhblits
708 instances at once. Upon success, the filename of the result file is
709 returned. This file may be parsed with :func:`ParseHHblitsOutput`.
710
711 :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
712 :type a3m_file: :class:`str`
713
714 :param database: Search database, needs to be the common prefix of the
715 database files
716 :type database: :class:`str`
717
718 :param options: Dictionary of options to *hhblits*, one "-" is added in
719 front of every key. Boolean True values add flag without
720 value. Merged with default options {'cpu': 1, 'n': 1},
721 where 'n' defines the number of iterations.
722 :type options: :class:`dict`
723
724 :param prefix: Prefix to the result file
725 :type prefix: :class:`str`
726
727 :return: The path to the result file
728 :rtype: :class:`str`
729 """
730 opts = {'cpu' : 1, # no. of cpus used
731 'n' : 1} # no. of iterations
732 opts.update(options)
733 opt_cmd, opt_str = _ParseOptions(opts)
734 base = os.path.basename(os.path.splitext(a3m_file)[0])
735 hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
736 hhr_file = os.path.join(self.working_dir, hhr_file)
737 search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
738 self.hhblits_bin,
739 opt_cmd,
740 os.path.abspath(a3m_file),
741 os.path.abspath(hhr_file),
742 os.path.join(os.path.abspath(os.path.split(database)[0]),
743 os.path.split(database)[1]))
744 ost.LogInfo('searching %s' % database)
745 ost.LogVerbose(search_cmd)
746 job = subprocess.Popen(search_cmd, shell=True, cwd=self.working_dir,
747 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
748 sout, serr = job.communicate()
749 if job.returncode != 0:
750 lines = sout.decode().splitlines()
751 for line in lines:
752 ost.LogError(line.strip())
753 lines = serr.decode().splitlines()
754 for line in lines:
755 ost.LogError(line.strip())
756 return None
757 return hhr_file
758
759
761 """
762 :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
763 passed to command ("-" added in front of keys, options
764 separated by space) and opt_str (options separated by "_")
765 can be used for filenames.
766 :param opts: Dictionary of options, one "-" is added in front of every
767 key. Boolean True values add flag without value.
768 """
769 opt_cmd = list()
770 opt_str = list()
771 for k, val in opts.items():
772 if type(val) == type(True):
773 if val == True:
774 opt_cmd.append('-%s' % str(k))
775 opt_str.append(str(k))
776 else:
777 opt_cmd.append('-%s %s' % (str(k), str(val)))
778 opt_str.append('%s%s' % (str(k), str(val)))
779 opt_cmd = ' '.join(opt_cmd)
780 opt_str = '_'.join(opt_str)
781 return opt_cmd, opt_str
782
783
784__all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
785 'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
786 'ParseHeaderLine']
787
788# LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
789# LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
790# LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln
791# LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime
792# LocalWords: whitespace whitespaces
__init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob)
Definition hhblits2.py:61
__init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None)
Definition hhblits2.py:478
BuildQueryMSA(self, nrdb, options={}, a3m_file=None)
Definition hhblits2.py:517
Search(self, a3m_file, database, options={}, prefix='')
Definition hhblits2.py:704
A3MToCS(self, a3m_file, cs_file=None, options={})
Definition hhblits2.py:641
A3MToProfile(self, a3m_file, hhm_file=None)
Definition hhblits2.py:601
void DLLEXPORT_OST_IO SaveSequence(const seq::ConstSequenceHandle &sequence, const String &filename, const String &format="auto")