OpenStructure
Loading...
Searching...
No Matches
hhblits3.py
Go to the documentation of this file.
1'''HHblits wrapper classes and functions.
2'''
3
4import subprocess
5import datetime
6import os
7import shutil
8import tempfile
9
10import ost
11from ost import settings, seq
12from ost.bindings import utils
13
15 """
16 A hit found by HHblits
17
18 .. attribute:: hit_id
19
20 String identifying the hit
21
22 :type: :class:`str`
23
24 .. attribute:: aln
25
26 Pairwise alignment containing the aligned part between the query and the
27 target. First sequence is the query, the second sequence the target.
28
29 :type: :class:`~ost.seq.AlignmentHandle`
30
31 .. attribute:: score
32
33 The alignment score
34
35 :type: :class:`float`
36
37 .. attribute:: ss_score
38
39 The secondary structure score
40
41 :type: :class:`float`
42
43 .. attribute:: evalue
44
45 The E-value of the alignment
46
47 :type: :class:`float`
48
49 .. attribute:: pvalue
50
51 The P-value of the alignment
52
53 :type: :class:`float`
54
55 .. attribute:: prob
56
57 The probability of the alignment (between 0 and 100)
58
59 :type: :class:`float`
60 """
61 def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
62 self.hit_id = hit_id
63 self.aln = aln
64 self.score = score
65 self.ss_score = ss_score
66 self.evalue = evalue
67 self.prob = prob
68 self.pvalue = pvalue
69
71 """Stats from the beginning of search output.
72
73 .. attribute:: query
74
75 The name of the query sequence
76
77 :type: :class:`str`
78
79 .. attribute:: match_columns
80
81 Total of aligned Match columns
82
83 :type: :class:`int`
84
85 .. attribute:: n_eff
86
87 Value of the ``-neff`` option
88
89 :type: :class:`float`
90
91 .. attribute:: searched_hmms
92
93 Number of profiles searched
94
95 :type: :class:`int`
96
97 .. attribute:: date
98
99 Execution date
100
101 :type: :class:`datetime.datetime`
102
103 .. attribute:: command
104
105 Command used to run
106
107 :type: :class:`str`
108 """
109 def __init__(self):
110 self.query = ''
112 self.n_eff = 0
114 self.date = None
115 self.command = ''
116
118 '''Fetch header content.
119
120 First, we seek the start of the identifier, that is, the first whitespace
121 after the hit number + 1. Since the identifier may contain whitespaces
122 itself, we cannot split the whole line
123
124 :param line: Line from the output header.
125 :type line: :class:`str`
126
127 :return: Hit information and query/template offsets
128 :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
129 '''
130 for i in range(0, len(line)):
131 if line[i].isdigit():
132 break
133 for i in range(i, len(line)):
134 if line[i] == ' ':
135 break
136 assert len(line)-i >= 31 and line[i+1] != ' '
137 hit_id = line[i+1:i+31].strip()
138 fields = line[i+32:].split()
139 prob = float(fields[0])
140 evalue = float(fields[1])
141 pvalue = float(fields[2])
142 score = float(fields[3])
143 ss_score = float(fields[4])
144 offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
145 return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
146 offsets)
147
149 """
150 Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
151 the header of the search results and a list of hits.
152
153 :param output: Iterable containing the lines of the HHblits output file
154 :type output: iterable (e.g. an open file handle)
155
156 :return: a tuple of the header of the search results and the hits
157 :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
158 """
159 lines = iter(output)
160 def _ParseHeaderSection(lines):
161 value_start_column = 14
162 date_pattern = '%a %b %d %H:%M:%S %Y'
163 header = HHblitsHeader()
164 line = next(lines)
165 assert line.startswith('Query')
166 header.query = line[value_start_column:].strip()
167 line = next(lines)
168 assert line.startswith('Match_columns')
169 header.match_columns = int(line[value_start_column:].strip())
170
171 line = next(lines)
172 assert line.startswith('No_of_seqs')
173
174 line = next(lines)
175 assert line.startswith('Neff')
176 header.n_eff = float(line[value_start_column:].strip())
177
178 line = next(lines)
179 assert line.startswith('Searched_HMMs')
180 header.searched_hmms = int(line[value_start_column:].strip())
181
182 line = next(lines)
183 assert line.startswith('Date')
184 value = line[value_start_column:].strip()
185 header.date = datetime.datetime.strptime(value, date_pattern)
186
187 line = next(lines)
188 assert line.startswith('Command')
189 header.command = line[value_start_column:].strip()
190
191 line = next(lines)
192 assert len(line.strip()) == 0
193 return header
194
195 def _ParseTableOfContents(lines):
196 line = next(lines)
197 assert line.startswith(' No Hit')
198 hits = []
199 while True:
200 line = next(lines)
201 if len(line.strip()) == 0:
202 return hits
203 hits.append(ParseHeaderLine(line))
204 return hits
205
206 def _ParseResultBody(query_id, hits, lines):
207 entry_index = None
208 query_str = ''
209 templ_str = ''
210 def _MakeAln(query_id, hit_id, query_string, templ_string,
211 q_offset, t_offset):
212 s1 = seq.CreateSequence(query_id, query_string)
213 s1.offset = q_offset-1
214 s2 = seq.CreateSequence(hit_id, templ_string)
215 s2.offset = t_offset-1
216 return seq.CreateAlignment(s1, s2)
217 try:
218 while True:
219 # Lines which we are interested in:
220 # - "Done!" -> end of list
221 # - "No ..." -> next item in list
222 # - "T <hit_id> <start> <data> <end>"
223 # - "Q <query_id> <start> <data> <end>"
224 # -> rest is to be skipped
225 line = next(lines)
226 if len(line.strip()) == 0:
227 continue
228 if line.startswith('Done!'):
229 if len(query_str) > 0:
230 hits[entry_index][0].aln = _MakeAln(\
231 query_id, hits[entry_index][0].hit_id,
232 query_str, templ_str, *hits[entry_index][1])
233 return [h for h, o in hits]
234 if line.startswith('No '):
235 if len(query_str) > 0:
236 hits[entry_index][0].aln = _MakeAln(\
237 query_id, hits[entry_index][0].hit_id,
238 query_str, templ_str, *hits[entry_index][1])
239 entry_index = int(line[3:].strip())-1
240 line = next(lines)
241 hits[entry_index][0].hit_id = line[1:].strip()
242 query_str = ''
243 templ_str = ''
244 # skip the next line. It doesn't contain information we
245 # don't already know
246 next(lines)
247 continue
248 assert entry_index != None
249 # Skip all "T ..." and "Q ..." lines besides the one we want
250 if line[1:].startswith(' Consensus'):
251 continue
252 if line[1:].startswith(' ss_pred'):
253 continue
254 if line[1:].startswith(' ss_conf'):
255 continue
256 if line[1:].startswith(' ss_dssp'):
257 continue
258 if line.startswith('T '):
259 for start_pos in range(22, len(line)):
260 if line[start_pos].isalpha() or line[start_pos] == '-':
261 break
262 end_pos = line.find(' ', start_pos)
263 # this can fail if we didn't skip all other "T ..." lines
264 if end_pos == -1:
265 error_str = "Unparsable line '%s' for entry No %d" \
266 % (line.strip(), entry_index + 1)
267 raise AssertionError(error_str)
268 templ_str += line[start_pos:end_pos]
269 if line.startswith('Q '):
270 for start_pos in range(22, len(line)):
271 if line[start_pos].isalpha() or line[start_pos] == '-':
272 break
273 end_pos = line.find(' ', start_pos)
274 # this can fail if we didn't skip all other "Q ..." lines
275 if end_pos == -1:
276 error_str = "Unparsable line '%s' for entry No %d" \
277 % (line.strip(), entry_index + 1)
278 raise AssertionError(error_str)
279 query_str += line[start_pos:end_pos]
280 except StopIteration:
281 if len(query_str) > 0:
282 hits[entry_index][0].aln = _MakeAln(query_id,
283 hits[entry_index][0].hit_id,
284 query_str, templ_str,
285 *hits[entry_index][1])
286 return [h for h, o in hits]
287 header = _ParseHeaderSection(lines)
288 # parse the table of contents. This is neccessary as some of the properties
289 # (i.e. start of alignment) we need are only given there. From the TOC we
290 # create a list of hits that is then further filled with data when we parse
291 # the actual result body
292 hits = _ParseTableOfContents(lines)
293 return header, _ParseResultBody(header.query, hits, lines)
294
295def ParseA3M(a3m_file):
296 '''
297 Parse secondary structure information and the multiple sequence alignment
298 out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
299
300 :param a3m_file: Iterable containing the lines of the A3M file
301 :type a3m_file: iterable (e.g. an open file handle)
302
303 :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
304 (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
305 If not available, "ss_pred" and "ss_conf" entries are set to None.
306 '''
307 profile_dict = dict()
308 state = 'NONE'
309 pred_seq_txt = ''
310 conf_seq_txt = ''
311 msa_seq = list()
312 msa_head = list()
313 for line in a3m_file:
314 if len(line.rstrip()) == 0:
315 continue
316 elif line.startswith('>ss_pred'):
317 state = 'sspred'
318 continue
319 elif line.startswith('>ss_conf'):
320 state = 'ssconf'
321 continue
322 elif line[0] == '>':
323 msa_seq.append('')
324 msa_head.append(line[1:].rstrip())
325 state = 'msa'
326 continue
327
328 if state == 'sspred':
329 pred_seq_txt += line.rstrip()
330 elif state == 'ssconf':
331 conf_seq_txt += line.rstrip()
332 elif state == 'msa':
333 msa_seq[len(msa_seq)-1] += line.rstrip()
334
335 if len(pred_seq_txt) > 0:
336 profile_dict['ss_pred'] = list()
337 profile_dict['ss_conf'] = list()
338 for i in range(0, len(pred_seq_txt)):
339 profile_dict['ss_pred'].append(pred_seq_txt[i])
340 profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
341 else:
342 profile_dict['ss_pred'] = None
343 profile_dict['ss_conf'] = None
344
345 # post processing
346 # MSA
347 profile_dict['msa'] = None
348 if len(msa_seq) > 1:
349 t = msa_seq[0]
350 al = seq.AlignmentList()
351 for i in range(1, len(msa_seq)):
352 qs = ''
353 ts = ''
354 k = 0
355 for c in msa_seq[i]:
356 if c.islower():
357 qs += '-'
358 ts += c.upper()
359 else:
360 qs += t[k]
361 ts += c
362 k += 1
363 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
364 seq.CreateSequence(msa_head[i], ts))
365 al.append(nl)
366 profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
367 al, seq.CreateSequence(msa_head[0], t))
368 return profile_dict
369
370
371def ParseHHM(profile):
372 '''
373 Parse secondary structure information and the MSA out of an HHM profile as
374 produced by :meth:`HHblits.A3MToProfile`.
375
376 :param profile: Opened file handle holding the profile.
377 :type profile: :class:`file`
378
379 :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
380 (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
381 "consensus" (:class:`~ost.seq.SequenceHandle`).
382 If not available, "ss_pred" and "ss_conf" entries are set to None.
383 '''
384 profile_dict = dict()
385 state = 'NONE'
386 pred_seq_txt = ''
387 conf_seq_txt = ''
388 consensus_txt = ''
389 msa_seq = list()
390 msa_head = list()
391 for line in profile:
392 if len(line.rstrip()) == 0:
393 continue
394 if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
395 state = 'sspred'
396 continue
397 elif line.rstrip() == '>ss_conf PSIPRED confidence values':
398 state = 'ssconf'
399 continue
400 elif line.rstrip() == '>Consensus':
401 state = 'consensus'
402 continue
403 elif line[0] == '>':
404 if state == 'consensus' or state == 'msa':
405 msa_seq.append('')
406 msa_head.append(line[1:].rstrip())
407 else:
408 raise IOError('Profile file "%s" is missing ' % profile.name+
409 'the "Consensus" section')
410 state = 'msa'
411 continue
412 elif line[0] == '#':
413 state = 'NONE'
414 continue
415
416 if state == 'sspred':
417 pred_seq_txt += line.rstrip()
418 elif state == 'ssconf':
419 conf_seq_txt += line.rstrip()
420 elif state == 'msa':
421 msa_seq[len(msa_seq)-1] += line.rstrip()
422 elif state == 'consensus':
423 consensus_txt += line.rstrip()
424
425 if len(pred_seq_txt) > 0:
426 profile_dict['ss_pred'] = list()
427 profile_dict['ss_conf'] = list()
428 for i in range(0, len(pred_seq_txt)):
429 profile_dict['ss_pred'].append(pred_seq_txt[i])
430 profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
431 else:
432 profile_dict['ss_pred'] = None
433 profile_dict['ss_conf'] = None
434
435 # post processing
436 # MSA
437 profile_dict['msa'] = None
438 if len(msa_seq):
439 t = msa_seq[0]
440 al = seq.AlignmentList()
441 for i in range(1, len(msa_seq)):
442 qs = ''
443 ts = ''
444 k = 0
445 for c in msa_seq[i]:
446 if c.islower():
447 qs += '-'
448 ts += c.upper()
449 else:
450 qs += t[k]
451 ts += c
452 k += 1
453 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
454 seq.CreateSequence(msa_head[i], ts))
455 al.append(nl)
456 profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
457 al, seq.CreateSequence(msa_head[0], t))
458 # Consensus
459 profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
460
461 return profile_dict
462
463
465 """
466 Initialise a new HHblits "search" for the given query. Query may either
467 be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
468 query is the actual query sequence, in the latter case, the query is the
469 filename to the file containing the query.
470
471 :param query: Query sequence as file or sequence.
472 :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
473 :param hhsuite_root: Path to the top-level directory of your hhsuite
474 installation.
475 :type hhsuite_root: :class:`str`
476 :param hhblits_bin: Name of the hhblits binary. Will only be used if
477 :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
478 :type hhblits_bin: :class:`str`
479 :param working_dir: Directory for temporary files. Will be created if not
480 present but **not** automatically deleted.
481 :type working_dir: :class:`str`
482 """
483 OUTPUT_PREFIX = 'query_hhblits'
484 def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
485 self.query = query
486 self.hhsuite_root = hhsuite_root
487 if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
488 self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
489 self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
490 else:
491 self.hhblits_bin = settings.Locate('hhblits',
492 explicit_file_name=hhblits_bin)
493 self.bin_dir = os.path.dirname(self.hhblits_bin)
494 # guess root folder (note: this may fail in future)
495 self.hhsuite_root = os.path.dirname(self.bin_dir)
496
497 if working_dir:
498 self.needs_cleanup = False
499 self.working_dir = working_dir
500 if not os.path.exists(working_dir):
501 os.mkdir(working_dir)
502 if isinstance(query, str):
503 self.filename = os.path.abspath(os.path.join(
504 self.working_dir, os.path.basename(query)))
505 if self.filename != os.path.abspath(query):
506 shutil.copy(query, self.filename)
507 else:
508 self.filename = os.path.abspath(os.path.join(self.working_dir,
509 '%s.fasta' % HHblits.OUTPUT_PREFIX))
510 ost.io.SaveSequence(query, self.filename)
511 else:
512 self.needs_cleanup = True
513 if isinstance(query, str):
514 self.working_dir = tempfile.mkdtemp()
515 self.filename = os.path.abspath(os.path.join(
516 self.working_dir, os.path.basename(query)))
517 shutil.copy(query, self.filename)
518 else:
519 tmp_dir = utils.TempDirWithFiles((query,))
520 self.working_dir = tmp_dir.dirname
521 self.filename = tmp_dir.files[0]
522
523 def BuildQueryMSA(self, nrdb, options={}, a3m_file=None, assign_ss=True):
524 """Builds the MSA for the query sequence.
525
526 The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
527 already produced, hhblits is not called again and the existing file path
528 is returned (neglecting the *assign_ss* flag!!!).
529
530 :param nrdb: Database to be align against; has to be an hhblits database
531 :type nrdb: :class:`str`
532
533 :param options: Dictionary of options to *hhblits*, one "-" is added in
534 front of every key. Boolean True values add flag without
535 value. Merged with default options
536 {'cpu': 1, 'n': 1, 'e': 0.001}, where 'n' defines the
537 number of iterations and 'e' the E-value cutoff for
538 inclusion of sequences in result alignment.
539 :type options: :class:`dict`
540
541 :param a3m_file: a path of a3m_file to be used, optional
542 :type a3m_file: :class:`str`
543
544 :param assign_ss: HHblits does not assign predicted secondary structure
545 by default. You can optionally assign it with the
546 addss.pl script provided by the HH-suite. However,
547 your HH-suite installation requires you to specify
548 paths to PSIRED etc. We refer to the HH-suite user
549 guide for further instructions. Assignment is done
550 by calling :func:`HHblits.AssignSSToA3M`
551
552 :type assign_ss: :class:`bool`
553
554 :return: The path to the A3M file containing the MSA
555 :rtype: :class:`str`
556 """
557 if a3m_file is None:
558 a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
559 else:
560 a3m_file = os.path.abspath(a3m_file)
561 if os.path.exists(a3m_file):
562 ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
563 return a3m_file
564 ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
565 full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
566 os.path.split(nrdb)[1])
567 # create MSA
568 opts = {'cpu' : 1, # no. of cpus used
569 'n' : 1, # no. of iterations
570 'e' : 0.001} # evalue threshold
571 opts.update(options)
572 opt_cmd, _ = _ParseOptions(opts)
573 hhblits_cmd = '%s -i %s -oa3m %s -d %s %s' % \
574 (self.hhblits_bin, self.filename, a3m_file, full_nrdb,
575 opt_cmd)
576
577 p = subprocess.run(hhblits_cmd, shell=True, cwd=self.working_dir,
578 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
579
580 lines = p.stdout.decode().splitlines()
581 for line in lines:
582 ost.LogVerbose(line.strip())
583
584 lines = p.stderr.decode().splitlines()
585 for line in lines:
586 ost.LogError(line.strip())
587
588 if not os.path.exists(a3m_file):
589 raise RuntimeError('Building query profile failed, no output')
590
591 if assign_ss:
592 return self.AssignSSToA3M(a3m_file)
593 else:
594 return a3m_file
595
596 def AssignSSToA3M(self, a3m_file):
597 """
598 HHblits does not assign predicted secondary structure by default.
599 You can optionally assign it with the addss.pl script provided by the
600 HH-suite. However, your HH-suite installation requires you to specify
601 paths to PSIRED etc. We refer to the HH-suite user guide for further
602 instructions.
603
604 :param a3m_file: Path to file you want to assign secondary structure to
605 :type a3m_file: :class:`str`
606 """
607
608 a3m_file = os.path.abspath(a3m_file)
609 addss_cmd = "perl %s %s" % (os.path.join(self.hhsuite_root,
610 'scripts/addss.pl'),
611 a3m_file)
612 env = dict(os.environ)
613 env.update({'PERL5LIB' : os.path.join(self.hhsuite_root, 'scripts'),
614 'HHLIB' : os.path.join(self.hhsuite_root),
615 'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
616 os.environ['PATH'])})
617
618 p = subprocess.run(addss_cmd, shell=True, cwd=self.working_dir,
619 env=env, stdout=subprocess.PIPE,
620 stderr=subprocess.PIPE)
621
622 lines = p.stdout.decode().splitlines()
623 for line in lines:
624 ost.LogVerbose(line.strip())
625 if 'error' in line.lower() or 'bad interpreter' in line.lower():
626 raise RuntimeError('Predicting secondary structure for MSA '+
627 '(%s) failed, on command: %s' % (a3m_file, line))
628
629 lines = p.stderr.decode().splitlines()
630 for line in lines:
631 ost.LogError(line.strip())
632 if 'error' in line.lower() or 'bad interpreter' in line.lower():
633 raise RuntimeError('Predicting secondary structure for MSA '+
634 '(%s) failed, on command: %s' % (a3m_file, line))
635
636 return a3m_file
637
638 def A3MToProfile(self, a3m_file, hhm_file=None):
639 """
640 Converts the A3M alignment file to a hhm profile. If hhm_file is not
641 given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
642
643 The produced HHM file can be parsed by :func:`ParseHHM`.
644
645 If the file was already produced, the existing file path is returned
646 without recomputing it.
647
648 :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
649 :type a3m_file: :class:`str`
650
651 :param hhm_file: Desired output file name
652 :type hhm_file: :class:`str`
653
654 :return: Path to the profile file
655 :rtype: :class:`str`
656 """
657 hhmake = os.path.join(self.bin_dir, 'hhmake')
658 if not hhm_file:
659 hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
660 if os.path.exists(hhm_file):
661 return hhm_file
662 ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
663 p = subprocess.run('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
664 shell=True, stdout=subprocess.PIPE,
665 stderr=subprocess.PIPE)
666 lines = p.stdout.decode().splitlines()
667 for line in lines:
668 ost.LogVerbose(line.strip())
669 lines = p.stderr.decode().splitlines()
670 for line in lines:
671 ost.LogError(line.strip())
672
673 if p.returncode != 0:
674 raise IOError('could not convert a3m to hhm file')
675
676 if not os.path.exists(hhm_file):
677 raise RuntimeError('could not convert a3m to hhm file, no output')
678
679 return hhm_file
680
681 def A3MToCS(self, a3m_file, cs_file=None, options={}):
682 """
683 Converts the A3M alignment file to a column state sequence file. If
684 cs_file is not given, the output file will be set to
685 <:attr:`a3m_file`-basename>.seq219.
686
687 If the file was already produced, the existing file path is returned
688 without recomputing it.
689
690 :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
691 :type a3m_file: :class:`str`
692
693 :param cs_file: Output file name (may be omitted)
694 :type cs_file: :class:`str`
695
696 :param options: Dictionary of options to *cstranslate*, one "-" is added
697 in front of every key. Boolean True values add flag
698 without value.
699 :type options: :class:`dict`
700
701 :return: Path to the column state sequence file
702 :rtype: :class:`str`
703 """
704 cstranslate = os.path.join(self.bin_dir, 'cstranslate')
705 if not cs_file:
706 cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
707 if os.path.exists(cs_file):
708 return cs_file
709 opt_cmd, _ = _ParseOptions(options)
710 cs_cmd = '%s -i %s -o %s %s' % (
711 cstranslate,
712 os.path.abspath(a3m_file),
713 os.path.abspath(cs_file),
714 opt_cmd)
715 ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
716 p = subprocess.run(cs_cmd, shell=True, cwd=self.working_dir,
717 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
718
719 if not os.path.exists(cs_file):
720 raise RuntimeError('Creating column state sequence file failed, ' +
721 'no output')
722
723 if b'Wrote abstract state sequence to' in p.stdout:
724 return cs_file
725 else:
726 raise RuntimeError('Creating column state sequence file failed')
727
728 def Cleanup(self):
729 """Delete temporary data.
730
731 Delete temporary data if no working dir was given. Controlled by
732 :attr:`needs_cleanup`.
733 """
734 if self.needs_cleanup and os.path.exists(self.working_dir):
735 shutil.rmtree(self.working_dir)
736
737 def CleanupFailed(self):
738 '''In case something went wrong, call to make sure everything is clean.
739
740 This will delete the working dir independently of :attr:`needs_cleanup`.
741 '''
742 store_needs_cleanup = self.needs_cleanup
743 self.needs_cleanup = True
744 self.Cleanup()
745 self.needs_cleanup = store_needs_cleanup
746
747 def Search(self, a3m_file, database, options={}, prefix=''):
748 """
749 Searches for templates in the given database. Before running the search,
750 the hhm file is copied. This makes it possible to launch several hhblits
751 instances at once. Upon success, the filename of the result file is
752 returned. This file may be parsed with :func:`ParseHHblitsOutput`.
753
754 :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
755 :type a3m_file: :class:`str`
756
757 :param database: Search database, needs to be the common prefix of the
758 database files
759 :type database: :class:`str`
760
761 :param options: Dictionary of options to *hhblits*, one "-" is added in
762 front of every key. Boolean True values add flag without
763 value. Merged with default options {'cpu': 1, 'n': 1},
764 where 'n' defines the number of iterations.
765 :type options: :class:`dict`
766
767 :param prefix: Prefix to the result file
768 :type prefix: :class:`str`
769
770 :return: The path to the result file
771 :rtype: :class:`str`
772 """
773 opts = {'cpu' : 1, # no. of cpus used
774 'n' : 1} # no. of iterations
775 opts.update(options)
776 opt_cmd, opt_str = _ParseOptions(opts)
777 base = os.path.basename(os.path.splitext(a3m_file)[0])
778 hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
779 hhr_file = os.path.join(self.working_dir, hhr_file)
780 search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
781 self.hhblits_bin,
782 opt_cmd,
783 os.path.abspath(a3m_file),
784 os.path.abspath(hhr_file),
785 os.path.join(os.path.abspath(os.path.split(database)[0]),
786 os.path.split(database)[1]))
787 ost.LogInfo('searching %s' % database)
788 ost.LogVerbose(search_cmd)
789 p = subprocess.run(search_cmd, shell=True, cwd=self.working_dir,
790 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
791 lines = p.stdout.decode().splitlines()
792 for line in lines:
793 ost.LogVerbose(line.strip())
794 lines = p.stderr.decode().splitlines()
795 for line in lines:
796 ost.LogError(line.strip())
797
798 if p.returncode != 0:
799 raise RuntimeError('Sequence search failed')
800
801 if not os.path.exists(hhr_file):
802 raise RuntimeError('Sequence search failed, no output')
803
804 return hhr_file
805
806
808 """
809 :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
810 passed to command ("-" added in front of keys, options
811 separated by space) and opt_str (options separated by "_")
812 can be used for filenames.
813 :param opts: Dictionary of options, one "-" is added in front of every
814 key. Boolean True values add flag without value.
815 """
816 opt_cmd = list()
817 opt_str = list()
818 for k, val in opts.items():
819 if type(val) == type(True):
820 if val == True:
821 opt_cmd.append('-%s' % str(k))
822 opt_str.append(str(k))
823 else:
824 opt_cmd.append('-%s %s' % (str(k), str(val)))
825 opt_str.append('%s%s' % (str(k), str(val)))
826 opt_cmd = ' '.join(opt_cmd)
827 opt_str = '_'.join(opt_str)
828 return opt_cmd, opt_str
829
830
831__all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
832 'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
833 'ParseHeaderLine']
834
835# LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
836# LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
837# LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln
838# LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime
839# LocalWords: whitespace whitespaces
__init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob)
Definition hhblits3.py:61
__init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None)
Definition hhblits3.py:484
AssignSSToA3M(self, a3m_file)
Definition hhblits3.py:596
BuildQueryMSA(self, nrdb, options={}, a3m_file=None, assign_ss=True)
Definition hhblits3.py:523
Search(self, a3m_file, database, options={}, prefix='')
Definition hhblits3.py:747
A3MToCS(self, a3m_file, cs_file=None, options={})
Definition hhblits3.py:681
A3MToProfile(self, a3m_file, hhm_file=None)
Definition hhblits3.py:638
void DLLEXPORT_OST_IO SaveSequence(const seq::ConstSequenceHandle &sequence, const String &filename, const String &format="auto")