OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
hhblits.py
Go to the documentation of this file.
1 '''HHblits wrapper classes and functions.
2 '''
3 
4 import subprocess
5 import datetime
6 import os
7 import shutil
8 import tempfile
9 
10 import ost
11 from ost import settings, seq
12 from ost.bindings import utils
13 
14 class HHblitsHit:
15  """
16  A hit found by HHblits
17 
18  .. attribute:: hit_id
19 
20  String identifying the hit
21 
22  :type: :class:`str`
23 
24  .. attribute:: aln
25 
26  Pairwise alignment containing the aligned part between the query and the
27  target. First sequence is the query, the second sequence the target.
28 
29  :type: :class:`~ost.seq.AlignmentHandle`
30 
31  .. attribute:: score
32 
33  The alignment score
34 
35  :type: :class:`float`
36 
37  .. attribute:: ss_score
38 
39  The secondary structure score
40 
41  :type: :class:`float`
42 
43  .. attribute:: evalue
44 
45  The E-value of the alignment
46 
47  :type: :class:`float`
48 
49  .. attribute:: pvalue
50 
51  The P-value of the alignment
52 
53  :type: :class:`float`
54 
55  .. attribute:: prob
56 
57  The probability of the alignment (between 0 and 100)
58 
59  :type: :class:`float`
60  """
61  def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
62  self.hit_id = hit_id
63  self.aln = aln
64  self.score = score
65  self.ss_score = ss_score
66  self.evalue = evalue
67  self.prob = prob
68  self.pvalue = pvalue
69 
71  """Stats from the beginning of search output.
72 
73  .. attribute:: query
74 
75  The name of the query sequence
76 
77  :type: :class:`str`
78 
79  .. attribute:: match_columns
80 
81  Total of aligned Match columns
82 
83  :type: :class:`int`
84 
85  .. attribute:: n_eff
86 
87  Value of the ``-neff`` option
88 
89  :type: :class:`float`
90 
91  .. attribute:: searched_hmms
92 
93  Number of profiles searched
94 
95  :type: :class:`int`
96 
97  .. attribute:: date
98 
99  Execution date
100 
101  :type: :class:`datetime.datetime`
102 
103  .. attribute:: command
104 
105  Command used to run
106 
107  :type: :class:`str`
108  """
109  def __init__(self):
110  self.query = ''
111  self.match_columns = 0
112  self.n_eff = 0
113  self.searched_hmms = 0
114  self.date = None
115  self.command = ''
116 
117 def ParseHeaderLine(line):
118  '''Fetch header content.
119 
120  First, we seek the start of the identifier, that is, the first whitespace
121  after the hit number + 1. Since the identifier may contain whitespaces
122  itself, we cannot split the whole line
123 
124  :param line: Line from the output header.
125  :type line: :class:`str`
126 
127  :return: Hit information and query/template offsets
128  :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
129  '''
130  for i in range(0, len(line)):
131  if line[i].isdigit():
132  break
133  for i in range(i, len(line)):
134  if line[i] == ' ':
135  break
136  assert len(line)-i >= 31 and line[i+1] != ' '
137  hit_id = line[i+1:i+31].strip()
138  fields = line[i+32:].split()
139  prob = float(fields[0])
140  evalue = float(fields[1])
141  pvalue = float(fields[2])
142  score = float(fields[3])
143  ss_score = float(fields[4])
144  offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
145  return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
146  offsets)
147 
148 def ParseHHblitsOutput(output):
149  """
150  Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
151  the header of the search results and a list of hits.
152 
153  :param output: Iterable containing the lines of the HHblits output file
154  :type output: iterable (e.g. an open file handle)
155 
156  :return: a tuple of the header of the search results and the hits
157  :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
158  """
159  lines = iter(output)
160  def _ParseHeaderSection(lines):
161  value_start_column = 14
162  date_pattern = '%a %b %d %H:%M:%S %Y'
163  header = HHblitsHeader()
164  line = lines.next()
165  assert line.startswith('Query')
166  header.query = line[value_start_column:].strip()
167  line = lines.next()
168  assert line.startswith('Match_columns')
169  header.match_columns = int(line[value_start_column:].strip())
170 
171  line = lines.next()
172  assert line.startswith('No_of_seqs')
173 
174  line = lines.next()
175  assert line.startswith('Neff')
176  header.n_eff = float(line[value_start_column:].strip())
177 
178  line = lines.next()
179  assert line.startswith('Searched_HMMs')
180  header.searched_hmms = int(line[value_start_column:].strip())
181 
182  line = lines.next()
183  assert line.startswith('Date')
184  value = line[value_start_column:].strip()
185  header.date = datetime.datetime.strptime(value, date_pattern)
186 
187  line = lines.next()
188  assert line.startswith('Command')
189  header.command = line[value_start_column:].strip()
190 
191  line = lines.next()
192  assert len(line.strip()) == 0
193  return header
194 
195  def _ParseTableOfContents(lines):
196  assert lines.next().startswith(' No Hit')
197  hits = []
198  while True:
199  line = lines.next()
200  if len(line.strip()) == 0:
201  return hits
202  hits.append(ParseHeaderLine(line))
203  return hits
204 
205  def _ParseResultBody(query_id, hits, lines):
206  entry_index = None
207  query_str = ''
208  templ_str = ''
209  def _MakeAln(query_id, hit_id, query_string, templ_string,
210  q_offset, t_offset):
211  s1 = seq.CreateSequence(query_id, query_string)
212  s1.offset = q_offset-1
213  s2 = seq.CreateSequence(hit_id, templ_string)
214  s2.offset = t_offset-1
215  return seq.CreateAlignment(s1, s2)
216  try:
217  while True:
218  # Lines which we are interested in:
219  # - "Done!" -> end of list
220  # - "No ..." -> next item in list
221  # - "T <hit_id> <start> <data> <end>"
222  # - "Q <query_id> <start> <data> <end>"
223  # -> rest is to be skipped
224  line = lines.next()
225  if len(line.strip()) == 0:
226  continue
227  if line.startswith('Done!'):
228  if len(query_str) > 0:
229  hits[entry_index][0].aln = _MakeAln(\
230  query_id, hits[entry_index][0].hit_id,
231  query_str, templ_str, *hits[entry_index][1])
232  return [h for h, o in hits]
233  if line.startswith('No '):
234  if len(query_str) > 0:
235  hits[entry_index][0].aln = _MakeAln(\
236  query_id, hits[entry_index][0].hit_id,
237  query_str, templ_str, *hits[entry_index][1])
238  entry_index = int(line[3:].strip())-1
239  hits[entry_index][0].hit_id = lines.next()[1:].strip()
240  query_str = ''
241  templ_str = ''
242  # skip the next line. It doesn't contain information we
243  # don't already know
244  lines.next()
245  continue
246  assert entry_index != None
247  # Skip all "T ..." and "Q ..." lines besides the one we want
248  if line[1:].startswith(' Consensus'):
249  continue
250  if line[1:].startswith(' ss_pred'):
251  continue
252  if line[1:].startswith(' ss_conf'):
253  continue
254  if line[1:].startswith(' ss_dssp'):
255  continue
256  if line.startswith('T '):
257  end_pos = line.find(' ', 22)
258  # this can fail if we didn't skip all other "T ..." lines
259  if end_pos == -1:
260  error_str = "Unparsable line '%s' for entry No %d" \
261  % (line.strip(), entry_index + 1)
262  raise AssertionError(error_str)
263  templ_str += line[22:end_pos]
264  if line.startswith('Q '):
265  end_pos = line.find(' ', 22)
266  # this can fail if we didn't skip all other "Q ..." lines
267  if end_pos == -1:
268  error_str = "Unparsable line '%s' for entry No %d" \
269  % (line.strip(), entry_index + 1)
270  raise AssertionError(error_str)
271  query_str += line[22:end_pos]
272  except StopIteration:
273  if len(query_str) > 0:
274  hits[entry_index][0].aln = _MakeAln(query_id,
275  hits[entry_index][0].hit_id,
276  query_str, templ_str,
277  *hits[entry_index][1])
278  return [h for h, o in hits]
279  header = _ParseHeaderSection(lines)
280  # parse the table of contents. This is neccessary as some of the properties
281  # (i.e. start of alignment) we need are only given there. From the TOC we
282  # create a list of hits that is then further filled with data when we parse
283  # the actual result body
284  hits = _ParseTableOfContents(lines)
285  return header, _ParseResultBody(header.query, hits, lines)
286 
287 def ParseA3M(a3m_file):
288  '''
289  Parse secondary structure information and the multiple sequence alignment
290  out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
291 
292  :param a3m_file: Iterable containing the lines of the A3M file
293  :type a3m_file: iterable (e.g. an open file handle)
294 
295  :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
296  (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
297  '''
298  profile_dict = dict()
299  state = 'NONE'
300  pred_seq_txt = ''
301  conf_seq_txt = ''
302  msa_seq = list()
303  msa_head = list()
304  for line in a3m_file:
305  if len(line.rstrip()) == 0:
306  continue
307  elif line.startswith('>ss_pred'):
308  state = 'sspred'
309  continue
310  elif line.startswith('>ss_conf'):
311  state = 'ssconf'
312  continue
313  elif line[0] == '>':
314  if state == 'ssconf' or state == 'msa':
315  msa_seq.append('')
316  msa_head.append(line[1:].rstrip())
317  else:
318  raise IOError('The A3M file is missing the "ss_conf" section')
319  state = 'msa'
320  continue
321 
322  if state == 'sspred':
323  pred_seq_txt += line.rstrip()
324  elif state == 'ssconf':
325  conf_seq_txt += line.rstrip()
326  elif state == 'msa':
327  msa_seq[len(msa_seq)-1] += line.rstrip()
328 
329  profile_dict['ss_pred'] = list()
330  profile_dict['ss_conf'] = list()
331  for i in range(0, len(pred_seq_txt)):
332  profile_dict['ss_pred'].append(pred_seq_txt[i])
333  profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
334 
335  # post processing
336  # MSA
337  profile_dict['msa'] = None
338  if len(msa_seq) > 1:
339  t = msa_seq[0]
340  al = seq.AlignmentList()
341  for i in range(1, len(msa_seq)):
342  qs = ''
343  ts = ''
344  k = 0
345  for c in msa_seq[i]:
346  if c.islower():
347  qs += '-'
348  ts += c.upper()
349  else:
350  qs += t[k]
351  ts += c
352  k += 1
353  nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
354  seq.CreateSequence(msa_head[i], ts))
355  al.append(nl)
356  profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
357  al, seq.CreateSequence(msa_head[0], t))
358  return profile_dict
359 
360 
361 def ParseHHM(profile):
362  '''
363  Parse secondary structure information and the MSA out of an HHM profile as
364  produced by :meth:`HHblits.A3MToProfile`.
365 
366  :param profile: Opened file handle holding the profile.
367  :type profile: :class:`file`
368 
369  :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
370  (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
371  "consensus" (:class:`~ost.seq.SequenceHandle`).
372  '''
373  profile_dict = dict()
374  state = 'NONE'
375  pred_seq_txt = ''
376  conf_seq_txt = ''
377  consensus_txt = ''
378  msa_seq = list()
379  msa_head = list()
380  for line in profile:
381  if len(line.rstrip()) == 0:
382  continue
383  if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
384  state = 'sspred'
385  continue
386  elif line.rstrip() == '>ss_conf PSIPRED confidence values':
387  state = 'ssconf'
388  continue
389  elif line.rstrip() == '>Consensus':
390  state = 'consensus'
391  continue
392  elif line[0] == '>':
393  if state == 'consensus' or state == 'msa':
394  msa_seq.append('')
395  msa_head.append(line[1:].rstrip())
396  else:
397  raise IOError('Profile file "%s" is missing ' % profile.name+
398  'the "Consensus" section')
399  state = 'msa'
400  continue
401  elif line[0] == '#':
402  state = 'NONE'
403  continue
404 
405  if state == 'sspred':
406  pred_seq_txt += line.rstrip()
407  elif state == 'ssconf':
408  conf_seq_txt += line.rstrip()
409  elif state == 'msa':
410  msa_seq[len(msa_seq)-1] += line.rstrip()
411  elif state == 'consensus':
412  consensus_txt += line.rstrip()
413 
414  profile_dict['ss_pred'] = list()
415  profile_dict['ss_conf'] = list()
416  for i in range(0, len(pred_seq_txt)):
417  profile_dict['ss_pred'].append(pred_seq_txt[i])
418  profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
419 
420  # post processing
421  # MSA
422  profile_dict['msa'] = None
423  if len(msa_seq):
424  t = msa_seq[0]
425  al = seq.AlignmentList()
426  for i in range(1, len(msa_seq)):
427  qs = ''
428  ts = ''
429  k = 0
430  for c in msa_seq[i]:
431  if c.islower():
432  qs += '-'
433  ts += c.upper()
434  else:
435  qs += t[k]
436  ts += c
437  k += 1
438  nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
439  seq.CreateSequence(msa_head[i], ts))
440  al.append(nl)
441  profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
442  al, seq.CreateSequence(msa_head[0], t))
443  #print profile_dict['msa'].ToString(80)
444  # Consensus
445  profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
446 
447  return profile_dict
448 
449 
450 class HHblits:
451  """
452  Initialise a new HHblits "search" for the given query. Query may either
453  be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
454  query is the actual query sequence, in the latter case, the query is the
455  filename to the file containing the query.
456 
457  :param query: Query sequence as file or sequence.
458  :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
459  :param hhsuite_root: Path to the top-level directory of your hhsuite
460  installation.
461  :type hhsuite_root: :class:`str`
462  :param hhblits_bin: Name of the hhblits binary. Will only be used if
463  :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
464  :type hhblits_bin: :class:`str`
465  :param working_dir: Directory for temporary files. Will be created if not
466  present but **not** automatically deleted.
467  :type working_dir: :class:`str`
468  """
469  OUTPUT_PREFIX = 'query_hhblits'
470  def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
471  self.query = query
472  self.hhsuite_root = hhsuite_root
473  if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
474  self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
475  self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
476  else:
477  self.hhblits_bin = settings.Locate('hhblits',
478  explicit_file_name=hhblits_bin)
479  self.bin_dir = os.path.dirname(self.hhblits_bin)
480  # guess root folder (note: this may fail in future)
481  self.hhsuite_root = os.path.dirname(self.bin_dir)
482  self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh')
483  if working_dir:
484  self.needs_cleanup = False
485  self.working_dir = working_dir
486  if not os.path.exists(working_dir):
487  os.mkdir(working_dir)
488  if isinstance(query, str):
489  self.filename = os.path.abspath(os.path.join(
490  self.working_dir, os.path.basename(query)))
491  if self.filename != os.path.abspath(query):
492  shutil.copy(query, self.filename)
493  else:
494  self.filename = os.path.join(self.working_dir,
495  '%s.fasta' % HHblits.OUTPUT_PREFIX)
496  ost.io.SaveSequence(query, self.filename)
497  else:
498  self.needs_cleanup = True
499  if isinstance(query, str):
500  self.working_dir = tempfile.mkdtemp()
501  self.filename = os.path.abspath(os.path.join(
502  self.working_dir, os.path.basename(query)))
503  shutil.copy(query, self.filename)
504  else:
505  tmp_dir = utils.TempDirWithFiles((query,))
506  self.working_dir = tmp_dir.dirname
507  self.filename = tmp_dir.files[0]
508 
509  def BuildQueryMSA(self, nrdb, options={}, a3m_file=None):
510  """Builds the MSA for the query sequence.
511 
512  This function directly uses hhblits of hhtools. While in theory it would
513  be possible to do this by PSI-blasting on our own, hhblits is supposed
514  to be faster. Also it is supposed to prevent alignment corruption. The
515  alignment corruption is caused by low-scoring terminal alignments that
516  draw the sequences found by PSI-blast away from the optimum. By removing
517  these low scoring ends, part of the alignment corruption can be
518  suppressed.
519 
520  hhblits does **not** call PSIPRED on the MSA to predict the secondary
521  structure of the query sequence. This is done by addss.pl of hhtools.
522  The predicted secondary structure is stored together with the sequences
523  identified by hhblits.
524 
525  The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
526  already produced, hhblits is not called again and the existing file path
527  is returned.
528 
529  :param nrdb: Database to be align against; has to be an hhblits database
530  :type nrdb: :class:`str`
531 
532  :param options: Dictionary of options to *hhblits*, one "-" is added in
533  front of every key. Boolean True values add flag without
534  value. Merged with default options {'cpu': 1, 'n': 1},
535  where 'n' defines the number of iterations.
536  :type options: :class:`dict`
537 
538  :param a3m_file: a path of a3m_file to be used, optional
539  :type a3m_file: :class:`str`
540 
541  :return: The path to the A3M file containing the MSA
542  :rtype: :class:`str`
543  """
544  if a3m_file is None:
545  a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
546  if os.path.exists(a3m_file):
547  ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
548  return a3m_file
549  ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
550  full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
551  os.path.split(nrdb)[1])
552  # create MSA
553  opts = {'cpu' : 1, # no. of cpus used
554  'n' : 1} # no. of iterations
555  opts.update(options)
556  opt_cmd, _ = _ParseOptions(opts)
557  hhblits_cmd = '%s -e 0.001 -i %s -oa3m %s -d %s %s' % \
558  (self.hhblits_bin, self.filename, a3m_file, full_nrdb,
559  opt_cmd)
560  job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir,
561  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
562  sout, _ = job.communicate()
563  lines = sout.splitlines()
564  for line in lines:
565  ost.LogVerbose(line.strip())
566  if not os.path.exists(a3m_file):
567  ost.LogWarning('Building query profile failed, no output')
568  return a3m_file
569  # add secondary structure annotation
570  addss_cmd = "%s %s" % (os.path.join(self.hhsuite_root,
571  'lib/hh/scripts/addss.pl'),
572  a3m_file)
573  env = dict(os.environ)
574  env.update({'PERL5LIB' : os.path.join(self.hhsuite_root,
575  'lib/hh/scripts'),
576  'HHLIB' : self.hhlib_dir,
577  'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
578  os.environ['PATH'])})
579  job = subprocess.Popen(addss_cmd, shell=True, cwd=self.working_dir,
580  env=env, stdout=subprocess.PIPE,
581  stderr=subprocess.PIPE)
582  sout, serr = job.communicate()
583  lines = sout.splitlines()
584  for line in lines:
585  if 'error' in line.lower():
586  ost.LogWarning('Predicting secondary structure for MSA '+
587  '(%s) failed, on command: %s' % (a3m_file, line))
588  return a3m_file
589  return a3m_file
590 
591  def A3MToProfile(self, a3m_file, hhm_file=None):
592  """
593  Converts the A3M alignment file to a hhm profile. If hhm_file is not
594  given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
595 
596  The produced A3M file can be parsed by :func:`ParseHHM`.
597 
598  If the file was already produced, the existing file path is returned
599  without recomputing it.
600 
601  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
602  :type a3m_file: :class:`str`
603 
604  :param hhm_file: Desired output file name
605  :type hhm_file: :class:`str`
606 
607  :return: Path to the profile file
608  :rtype: :class:`str`
609  """
610  hhmake = os.path.join(self.bin_dir, 'hhmake')
611  if not hhm_file:
612  hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
613  if os.path.exists(hhm_file):
614  return hhm_file
615  ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
616  os.putenv('HHLIB', self.hhlib_dir)
617  if subprocess.call('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
618  shell=True):
619  raise IOError('could not convert a3m to hhm file')
620  return hhm_file
621 
622  def A3MToCS(self, a3m_file, cs_file=None, options={}):
623  """
624  Converts the A3M alignment file to a column state sequence file. If
625  cs_file is not given, the output file will be set to
626  <:attr:`a3m_file`-basename>.seq219.
627 
628  If the file was already produced, the existing file path is returned
629  without recomputing it.
630 
631  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
632  :type a3m_file: :class:`str`
633 
634  :param cs_file: Output file name (may be omitted)
635  :type cs_file: :class:`str`
636 
637  :param options: Dictionary of options to *cstranslate*, one "-" is added
638  in front of every key. Boolean True values add flag
639  without value.
640  :type options: :class:`dict`
641 
642  :return: Path to the column state sequence file
643  :rtype: :class:`str`
644  """
645  cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
646  if not cs_file:
647  cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
648  if os.path.exists(cs_file):
649  return cs_file
650  opt_cmd, _ = _ParseOptions(options)
651  cs_cmd = '%s -i %s -o %s %s' % (
652  cstranslate,
653  os.path.abspath(a3m_file),
654  os.path.abspath(cs_file),
655  opt_cmd)
656  ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
657  job = subprocess.Popen(cs_cmd, shell=True, cwd=self.working_dir,
658  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
659  sout, _ = job.communicate()
660  lines = sout.splitlines()
661  for line in lines:
662  if 'Wrote abstract state sequence to' in line:
663  return cs_file
664  ost.LogWarning('Creating column state sequence file (%s) failed' % \
665  cs_file)
666 
667  def Cleanup(self):
668  """Delete temporary data.
669 
670  Delete temporary data if no working dir was given. Controlled by
671  :attr:`needs_cleanup`.
672  """
673  if self.needs_cleanup and os.path.exists(self.working_dir):
674  shutil.rmtree(self.working_dir)
675 
676  def CleanupFailed(self):
677  '''In case something went wrong, call to make sure everything is clean.
678 
679  This will delete the working dir independently of :attr:`needs_cleanup`.
680  '''
681  store_needs_cleanup = self.needs_cleanup
682  self.needs_cleanup = True
683  self.Cleanup()
684  self.needs_cleanup = store_needs_cleanup
685 
686  def Search(self, a3m_file, database, options={}, prefix=''):
687  """
688  Searches for templates in the given database. Before running the search,
689  the hhm file is copied. This makes it possible to launch several hhblits
690  instances at once. Upon success, the filename of the result file is
691  returned. This file may be parsed with :func:`ParseHHblitsOutput`.
692 
693  :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
694  :type a3m_file: :class:`str`
695 
696  :param database: Search database, needs to be the common prefix of the
697  database files
698  :type database: :class:`str`
699 
700  :param options: Dictionary of options to *hhblits*, one "-" is added in
701  front of every key. Boolean True values add flag without
702  value. Merged with default options {'cpu': 1, 'n': 1},
703  where 'n' defines the number of iterations.
704  :type options: :class:`dict`
705 
706  :param prefix: Prefix to the result file
707  :type prefix: :class:`str`
708 
709  :return: The path to the result file
710  :rtype: :class:`str`
711  """
712  opts = {'cpu' : 1, # no. of cpus used
713  'n' : 1} # no. of iterations
714  opts.update(options)
715  opt_cmd, opt_str = _ParseOptions(opts)
716  base = os.path.basename(os.path.splitext(a3m_file)[0])
717  hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
718  hhr_file = os.path.join(self.working_dir, hhr_file)
719  search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
720  self.hhblits_bin,
721  opt_cmd,
722  os.path.abspath(a3m_file),
723  hhr_file,
724  os.path.join(os.path.abspath(os.path.split(database)[0]),
725  os.path.split(database)[1]))
726  ost.LogInfo('searching %s' % database)
727  ost.LogVerbose(search_cmd)
728  job = subprocess.Popen(search_cmd, shell=True, cwd=self.working_dir,
729  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
730  sout, serr = job.communicate()
731  if job.returncode != 0:
732  lines = sout.splitlines()
733  for line in lines:
734  ost.LogError(line.strip())
735  lines = serr.splitlines()
736  for line in lines:
737  ost.LogError(line.strip())
738  return None
739  return hhr_file
740 
741 
742 def _ParseOptions(opts):
743  """
744  :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
745  passed to command ("-" added in front of keys, options
746  separated by space) and opt_str (options separated by "_")
747  can be used for filenames.
748  :param opts: Dictionary of options, one "-" is added in front of every
749  key. Boolean True values add flag without value.
750  """
751  opt_cmd = list()
752  opt_str = list()
753  for k, val in opts.iteritems():
754  if type(val) == type(True):
755  if val == True:
756  opt_cmd.append('-%s' % str(k))
757  opt_str.append(str(k))
758  else:
759  opt_cmd.append('-%s %s' % (str(k), str(val)))
760  opt_str.append('%s%s' % (str(k), str(val)))
761  opt_cmd = ' '.join(opt_cmd)
762  opt_str = '_'.join(opt_str)
763  return opt_cmd, opt_str
764 
765 
766 __all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
767  'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
768  'ParseHeaderLine']
769 
770 # LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
771 # LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
772 # LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln
773 # LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime
774 # LocalWords: whitespace whitespaces
void DLLEXPORT_OST_IO SaveSequence(const seq::ConstSequenceHandle &sequence, const String &filename, const String &format="auto")