00001 '''HHblits wrapper.
00002 '''
00003
00004 import subprocess
00005 import datetime
00006 import os
00007 import shutil
00008 import tempfile
00009
00010 import ost
00011 from ost import settings, seq
00012 from ost.bindings import utils
00013
00014 class HHblitsHit:
00015 """
00016 A hit found by HHblits
00017
00018 .. attribute:: hit_id
00019
00020 String identifying the hit
00021
00022 :type: :class:`str`
00023
00024 .. attribute:: aln
00025
00026 Pairwise alignment containing the aligned part between the query and the
00027 target. First sequence is the query, the second sequence the target.
00028
00029 :type: :class:`~ost.seq.AlignmentHandle`
00030
00031 .. attribute:: score
00032
00033 The alignment score
00034
00035 :type: :class:`float`
00036
00037 .. attribute:: ss_score
00038
00039 The secondary structure score
00040
00041 :type: :class:`float`
00042
00043 .. attribute:: evalue
00044
00045 The E-value of the alignment
00046
00047 :type: :class:`float`
00048
00049 .. attribute:: pvalue
00050
00051 The P-value of the alignment
00052
00053 :type: :class:`float`
00054
00055 .. attribute:: prob
00056
00057 The probability of the alignment (between 0 and 100)
00058
00059 :type: :class:`float`
00060 """
00061 def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
00062 self.hit_id = hit_id
00063 self.aln = aln
00064 self.score = score
00065 self.ss_score = ss_score
00066 self.evalue = evalue
00067 self.prob = prob
00068 self.pvalue = pvalue
00069
00070 class HHblitsHeader:
00071 """Stats from the beginning of search output.
00072
00073 .. attribute:: query
00074
00075 The name of the query sequence
00076
00077 :type: :class:`str`
00078
00079 .. attribute:: match_columns
00080
00081 Total of aligned Match columns
00082
00083 :type: :class:`int`
00084
00085 .. attribute:: n_eff
00086
00087 Value of the ``-neff`` option
00088
00089 :type: :class:`float`
00090
00091 .. attribute:: searched_hmms
00092
00093 Number of profiles searched
00094
00095 :type: :class:`int`
00096
00097 .. attribute:: date
00098
00099 Execution date
00100
00101 :type: :class:`datetime.datetime`
00102
00103 .. attribute:: command
00104
00105 Command used to run
00106
00107 :type: :class:`str`
00108 """
00109 def __init__(self):
00110 self.query = ''
00111 self.match_columns = 0
00112 self.n_eff = 0
00113 self.searched_hmms = 0
00114 self.date = None
00115 self.command = ''
00116
00117 def ParseHeaderLine(line):
00118 '''Fetch header content.
00119
00120 First, we seek the start of the identifier, that is, the first whitespace
00121 after the hit number + 1. Since the identifier may contain whitespaces
00122 itself, we cannot split the whole line
00123
00124 :param line: Line from the output header.
00125 :type line: :class:`str`
00126
00127 :return: Hit information
00128 :rtype: :class:`HHblitsHit`
00129 '''
00130 for i in range(0, len(line)):
00131 if line[i].isdigit():
00132 break
00133 for i in range(i, len(line)):
00134 if line[i] == ' ':
00135 break
00136 assert len(line)-i >= 31 and line[i+1] != ' '
00137 hit_id = line[i+1:i+31].strip()
00138 fields = line[i+32:].split()
00139 prob = float(fields[0])
00140 evalue = float(fields[1])
00141 pvalue = float(fields[2])
00142 score = float(fields[3])
00143 ss_score = float(fields[4])
00144 offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
00145 return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
00146 offsets)
00147
00148 def ParseHHblitsOutput(output):
00149 """
00150 Parses the HHblits output and returns a tuple of :class:`HHblitsHeader` and
00151 a list of :class:`HHblitsHit` instances.
00152
00153 :param output: output of a :meth:`HHblits.Search`, needs to be iteratable,
00154 e.g. an open file handle
00155 :type output: :class:`file`/ iteratable
00156
00157 :return: a tuple of the header of the search results and the hits
00158 :rtype: (:class:`HHblitsHeader`, :class:`HHblitsHit`)
00159 """
00160 lines = iter(output)
00161 def _ParseHeaderSection(lines):
00162 value_start_column = 14
00163 date_pattern = '%a %b %d %H:%M:%S %Y'
00164 header = HHblitsHeader()
00165 line = lines.next()
00166 assert line.startswith('Query')
00167 header.query = line[value_start_column:].strip()
00168 line = lines.next()
00169 assert line.startswith('Match_columns')
00170 header.match_columns = int(line[value_start_column:].strip())
00171
00172 line = lines.next()
00173 assert line.startswith('No_of_seqs')
00174
00175 line = lines.next()
00176 assert line.startswith('Neff')
00177 header.n_eff = float(line[value_start_column:].strip())
00178
00179 line = lines.next()
00180 assert line.startswith('Searched_HMMs')
00181 header.searched_hmms = int(line[value_start_column:].strip())
00182
00183 line = lines.next()
00184 assert line.startswith('Date')
00185 value = line[value_start_column:].strip()
00186 header.date = datetime.datetime.strptime(value, date_pattern)
00187
00188 line = lines.next()
00189 assert line.startswith('Command')
00190 header.command = line[value_start_column:].strip()
00191
00192 line = lines.next()
00193 assert len(line.strip()) == 0
00194 return header
00195
00196 def _ParseTableOfContents(lines):
00197 assert lines.next().startswith(' No Hit')
00198 hits = []
00199 while True:
00200 line = lines.next()
00201 if len(line.strip()) == 0:
00202 return hits
00203 hits.append(ParseHeaderLine(line))
00204 return hits
00205
00206 def _ParseResultBody(query_id, hits, lines):
00207 entry_index = None
00208 query_str = ''
00209 templ_str = ''
00210 def _MakeAln(query_id, hit_id, query_string, templ_string,
00211 q_offset, t_offset):
00212 s1 = seq.CreateSequence(query_id, query_string)
00213 s1.offset = q_offset-1
00214 s2 = seq.CreateSequence(hit_id, templ_string)
00215 s2.offset = t_offset-1
00216 return seq.CreateAlignment(s1, s2)
00217 try:
00218 while True:
00219 line = lines.next()
00220 if len(line.strip()) == 0:
00221 continue
00222 if line.startswith('Done!'):
00223 if len(query_str) > 0:
00224 hits[entry_index][0].aln = _MakeAln(\
00225 query_id, hits[entry_index][0].hit_id,
00226 query_str, templ_str, *hits[entry_index][1])
00227 return [h for h, o in hits]
00228 if line.startswith('No '):
00229 if len(query_str) > 0:
00230 hits[entry_index][0].aln = _MakeAln(\
00231 query_id, hits[entry_index][0].hit_id,
00232 query_str, templ_str, *hits[entry_index][1])
00233 entry_index = int(line[3:].strip())-1
00234 hits[entry_index][0].hit_id = lines.next()[1:].strip()
00235 query_str = ''
00236 templ_str = ''
00237
00238
00239 lines.next()
00240 continue
00241 assert entry_index != None
00242 if line[1:].startswith(' Consensus'):
00243 continue
00244 if line[1:].startswith(' ss_pred'):
00245 continue
00246 if line[1:].startswith(' ss_conf'):
00247 continue
00248 if line.startswith('T '):
00249 end_pos = line.find(' ', 22)
00250 assert end_pos != -1
00251 templ_str += line[22:end_pos]
00252 if line.startswith('Q '):
00253 end_pos = line.find(' ', 22)
00254 assert end_pos != -1
00255 query_str += line[22:end_pos]
00256 except StopIteration:
00257 if len(query_str) > 0:
00258 hits[entry_index][0].aln = _MakeAln(query_id,
00259 hits[entry_index][0].hit_id,
00260 query_str, templ_str,
00261 *hits[entry_index][1])
00262 return [h for h, o in hits]
00263 header = _ParseHeaderSection(lines)
00264
00265
00266
00267
00268 hits = _ParseTableOfContents(lines)
00269 return header, _ParseResultBody(header.query, hits, lines)
00270
00271 def ParseA3M(a3m_file):
00272 '''
00273 Parse secondary structure information and the multiple sequence alignment
00274 out of an A3M file.
00275
00276 :param a3m_file: Iteratable containing the lines of the A3M file
00277 :type a3m_file: iteratable, e.g. an opened file
00278
00279 :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
00280 (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
00281 '''
00282 profile_dict = dict()
00283 state = 'NONE'
00284 pred_seq_txt = ''
00285 conf_seq_txt = ''
00286 msa_seq = list()
00287 msa_head = list()
00288 for line in a3m_file:
00289 if len(line.rstrip()) == 0:
00290 continue
00291 elif line.startswith('>ss_pred'):
00292 state = 'sspred'
00293 continue
00294 elif line.startswith('>ss_conf'):
00295 state = 'ssconf'
00296 continue
00297 elif line[0] == '>':
00298 if state == 'ssconf' or state == 'msa':
00299 msa_seq.append('')
00300 msa_head.append(line[1:].rstrip())
00301 else:
00302 raise IOError('The A3M file is missing the "ss_conf" section')
00303 state = 'msa'
00304 continue
00305
00306 if state == 'sspred':
00307 pred_seq_txt += line.rstrip()
00308 elif state == 'ssconf':
00309 conf_seq_txt += line.rstrip()
00310 elif state == 'msa':
00311 msa_seq[len(msa_seq)-1] += line.rstrip()
00312
00313 profile_dict['ss_pred'] = list()
00314 profile_dict['ss_conf'] = list()
00315 for i in range(0, len(pred_seq_txt)):
00316 profile_dict['ss_pred'].append(pred_seq_txt[i])
00317 profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
00318
00319
00320
00321 profile_dict['msa'] = None
00322 if len(msa_seq) > 1:
00323 t = msa_seq[0]
00324 al = seq.AlignmentList()
00325 for i in range(1, len(msa_seq)):
00326 qs = ''
00327 ts = ''
00328 k = 0
00329 for c in msa_seq[i]:
00330 if c.islower():
00331 qs += '-'
00332 ts += c.upper()
00333 else:
00334 qs += t[k]
00335 ts += c
00336 k += 1
00337 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
00338 seq.CreateSequence(msa_head[i], ts))
00339 al.append(nl)
00340 profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
00341 al,
00342 seq.CreateSequence(msa_head[0],
00343 t))
00344 return profile_dict
00345
00346
00347 def ParseHHM(profile):
00348 '''Parse secondary structure information and the MSA out of an HHM profile.
00349
00350 :param profile: Opened file handle holding the profile.
00351 :type profile: :class:`file`
00352
00353 :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
00354 (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
00355 "consensus" (~ost.seq.SequenceHandle).
00356 '''
00357 profile_dict = dict()
00358 state = 'NONE'
00359 pred_seq_txt = ''
00360 conf_seq_txt = ''
00361 consensus_txt = ''
00362 msa_seq = list()
00363 msa_head = list()
00364 for line in profile:
00365 if len(line.rstrip()) == 0:
00366 continue
00367 if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
00368 state = 'sspred'
00369 continue
00370 elif line.rstrip() == '>ss_conf PSIPRED confidence values':
00371 state = 'ssconf'
00372 continue
00373 elif line.rstrip() == '>Consensus':
00374 state = 'consensus'
00375 continue
00376 elif line[0] == '>':
00377 if state == 'consensus' or state == 'msa':
00378 msa_seq.append('')
00379 msa_head.append(line[1:].rstrip())
00380 else:
00381 raise IOError('Profile file "%s" is missing ' % profile.name+
00382 'the "Consensus" section')
00383 state = 'msa'
00384 continue
00385 elif line[0] == '#':
00386 state = 'NONE'
00387 continue
00388
00389 if state == 'sspred':
00390 pred_seq_txt += line.rstrip()
00391 elif state == 'ssconf':
00392 conf_seq_txt += line.rstrip()
00393 elif state == 'msa':
00394 msa_seq[len(msa_seq)-1] += line.rstrip()
00395 elif state == 'consensus':
00396 consensus_txt += line.rstrip()
00397
00398 profile_dict['ss_pred'] = list()
00399 profile_dict['ss_conf'] = list()
00400 for i in range(0, len(pred_seq_txt)):
00401 profile_dict['ss_pred'].append(pred_seq_txt[i])
00402 profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
00403
00404
00405
00406 profile_dict['msa'] = None
00407 if len(msa_seq):
00408 t = msa_seq[0]
00409 al = seq.AlignmentList()
00410 for i in range(1, len(msa_seq)):
00411 qs = ''
00412 ts = ''
00413 k = 0
00414 for c in msa_seq[i]:
00415 if c.islower():
00416 qs += '-'
00417 ts += c.upper()
00418 else:
00419 qs += t[k]
00420 ts += c
00421 k += 1
00422 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
00423 seq.CreateSequence(msa_head[i], ts))
00424 al.append(nl)
00425 profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
00426 al,
00427 seq.CreateSequence(msa_head[0], t))
00428
00429
00430 profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
00431
00432 return profile_dict
00433
00434 def EstimateMemConsumption():
00435 """
00436 Estimate the memory needed by HHblits. By default it uses not more than 3G.
00437 Also for small sequences it already uses quite some memnmory (46AA, 1.48G).
00438 And since the memory consumption could depend on the iterative search runs,
00439 how many hits are found in each step, we just go with 4G, here.
00440
00441 :return: Assumed memory consumtion
00442 :rtype: (:class:`float`, :class:`str`)
00443 """
00444 return 4.0, 'G'
00445
00446 class HHblits:
00447 """
00448 Initialise a new HHblits "search" for the given query. Query may either
00449 be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
00450 query is the actual query sequence, in the latter case, the query is the
00451 filename to the file containing the query.
00452
00453 :param query: Query sequence as file or sequence.
00454 :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
00455 :param hhsuite_root: Path to the top-level directory of your hhsuite
00456 installation.
00457 :type hhsuite_root: :class:`str`
00458 :param hhblits_bin: Name of the hhblits binary. Will only be used if
00459 :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
00460 :type hhblits_bin: :class:`str`
00461 :param working_dir: Directory for temporary files. Will be created if not
00462 present but **not** automatically deleted.
00463 :type working_dir: :class:`str`
00464
00465 """
00466 OUTPUT_PREFIX = 'query_hhblits'
00467 def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
00468 self.query = query
00469 self.hhsuite_root = hhsuite_root
00470 if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
00471 self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
00472 self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
00473 else:
00474 self.hhblits_bin = settings.Locate('hhblits',
00475 explicit_file_name=hhblits_bin)
00476 self.bin_dir = os.path.dirname(self.hhblits_bin)
00477 self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh')
00478 if working_dir:
00479 self.needs_cleanup = False
00480 self.working_dir = working_dir
00481 if not os.path.exists(working_dir):
00482 os.mkdir(working_dir)
00483 if isinstance(query, str):
00484 self.filename = os.path.abspath(os.path.join(
00485 self.working_dir, os.path.basename(query)))
00486 if self.filename != os.path.abspath(query):
00487 shutil.copy(query, self.filename)
00488 else:
00489 self.filename = os.path.join(self.working_dir,
00490 '%s.fasta' % HHblits.OUTPUT_PREFIX)
00491 ost.io.SaveSequence(query, self.filename)
00492 else:
00493 self.needs_cleanup = True
00494 if isinstance(query, str):
00495 self.working_dir = tempfile.mkdtemp()
00496 self.filename = os.path.abspath(os.path.join(
00497 self.working_dir, os.path.basename(query)))
00498 shutil.copy(query, self.filename)
00499 else:
00500 tmp_dir = utils.TempDirWithFiles((query,))
00501 self.working_dir = tmp_dir.dirname
00502 self.filename = tmp_dir.files[0]
00503
00504 def Cleanup(self):
00505 """Delete temporary data.
00506
00507 Delete temporary data if no working dir was given. Controlled by
00508 :attr:`needs_cleanup`.
00509 """
00510 if self.needs_cleanup and os.path.exists(self.working_dir):
00511 shutil.rmtree(self.working_dir)
00512
00513 def BuildQueryMSA(self, nrdb, iterations=1, mact=None, cpu=1):
00514 """Builds the MSA for the query sequence
00515
00516 This function directly uses hhblits of hhtools. While in theory it
00517 would be possible to do this by PSI-blasting on our own, hhblits is
00518 supposed to be faster. Also it is supposed to prevent alignment
00519 corruption. The alignment corruption is caused by low-scoring terminal
00520 alignments that draw the sequences found by PSI-blast away from the
00521 optimum. By removing these low scoring ends, part of the alignment
00522 corruption can be suppressed. hhblits does **not** call PSIPRED on the
00523 MSA to predict the secondary structure of the query sequence. This is
00524 done by addss.pl of hhtools. The predicted secondary structure is
00525 stored together with the sequences identified by hhblits.
00526
00527 :param nrdb: Database to be align against; has to be an hhblits database
00528 :type nrdb: :class:`str`
00529
00530 :param iterations: Number of hhblits iterations
00531 :type iterations: :class:`int`
00532
00533 :param mact: ``-mact`` of hhblits
00534 :type mact: :class:`float`
00535
00536 :param cpu: ``-cpu`` of hhblits
00537 :type cpu: :class:`int`
00538
00539 :return: the path to the MSA file
00540 :rtype: :class:`str`
00541 """
00542 a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
00543 ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
00544 full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
00545 os.path.split(nrdb)[1])
00546
00547 hhblits_cmd = '%s -e 0.001 -cpu %d -i %s -oa3m %s -d %s -n %d' % \
00548 (self.hhblits_bin, cpu, self.filename, a3m_file,
00549 full_nrdb, iterations)
00550 if mact:
00551 hhblits_cmd += '-mact %f' % mact
00552 job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir,
00553 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00554 sout, _ = job.communicate()
00555
00556
00557
00558
00559
00560
00561 if not os.path.exists(a3m_file):
00562 ost.LogWarning('Building query profile failed, no output')
00563 return a3m_file
00564
00565 addss_cmd = "%s %s" % (os.path.join(self.hhsuite_root,
00566 'lib/hh/scripts/addss.pl'),
00567 a3m_file)
00568 env = dict(os.environ)
00569 env.update({'PERL5LIB' : os.path.join(self.hhsuite_root,
00570 'lib/hh/scripts'),
00571 'HHLIB' : self.hhlib_dir,
00572 'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
00573 os.environ['PATH'])})
00574 job = subprocess.Popen(addss_cmd, shell=True, cwd=self.working_dir,
00575 env=env, stdout=subprocess.PIPE,
00576 stderr=subprocess.PIPE)
00577 sout, serr = job.communicate()
00578 lines = sout.splitlines()
00579 for line in lines:
00580 if 'error' in line.lower():
00581 ost.LogWarning('Predicting secondary structure for MSA '+
00582 '(%s) failed, on command: %s' % (a3m_file, line))
00583 return a3m_file
00584 return a3m_file
00585
00586 def A3MToProfile(self, a3m_file, hhm_file=None):
00587 """
00588 Converts the A3M alignment file to a hhm profile. If hhm_file is not
00589 given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
00590
00591 :param a3m_file: input MSA
00592 :type a3m_file: :class:`str`
00593
00594 :param hhm_file: output file name
00595 :type hhm_file: :class:`str`
00596
00597 :return: the path to the profile
00598 :rtype: :class:`str`
00599 """
00600 hhmake = os.path.join(self.bin_dir, 'hhmake')
00601 if not hhm_file:
00602 hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
00603 if os.path.exists(hhm_file):
00604 return hhm_file
00605 ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
00606 os.putenv('HHLIB', self.hhlib_dir)
00607 if subprocess.call('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
00608 shell=True):
00609 raise IOError('could not convert a3m to hhm file')
00610 return hhm_file
00611
00612
00613 def A3MToCS(self, a3m_file, cs_file=None, options={}):
00614 """
00615 Converts the A3M alignment file to a column state sequence file. If
00616 cs_file is not given, the output file will be set to
00617 <:attr:`a3m_file`-basename>.seq219.
00618
00619 :param a3m_file: A3M file to be converted
00620 :type a3m_file: :class:`str`
00621
00622 :param cs_file: output file name (may be omitted)
00623 :type cs_file: :class:`str`
00624
00625 :param options: dictionary of options to *cstranslate*, must come with
00626 the right amount of '-' in front.
00627 :type options: :class:`dict`
00628
00629 :return: the path to the column state sequence file
00630 :rtype: :class:`str`
00631 """
00632 cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
00633 if not cs_file:
00634 cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
00635 if os.path.exists(cs_file):
00636 return cs_file
00637 opt_cmd = list()
00638 for k, val in options.iteritems():
00639 if type(val) == type(True):
00640 if val == True:
00641 opt_cmd.append('%s' % str(k))
00642 else:
00643 opt_cmd.append('%s %s' % (str(k), str(val)))
00644 opt_cmd = ' '.join(opt_cmd)
00645 cs_cmd = '%s -i %s -o %s %s' % (cstranslate, a3m_file, cs_file, opt_cmd)
00646 ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
00647 job = subprocess.Popen(cs_cmd, shell=True,
00648 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00649 sout, _ = job.communicate()
00650
00651
00652
00653 lines = sout.splitlines()
00654 for line in lines:
00655 if line in 'Wrote abstract state sequence to %s' % cs_file:
00656 return cs_file
00657 ost.LogWarning('Creating column state sequence file (%s) failed' % \
00658 cs_file)
00659
00660 def CleanupFailed(self):
00661 '''In case something went wrong, call to make sure everything is clean.
00662 '''
00663 store_needs_cleanup = self.needs_cleanup
00664 self.needs_cleanup = True
00665 self.Cleanup()
00666 self.needs_cleanup = store_needs_cleanup
00667
00668 def Search(self, a3m_file, database, options={}, prefix=''):
00669 """
00670 Searches for templates in the given database. Before running the
00671 search, the hhm file is copied. This makes it possible to launch
00672 several hhblits instances at once. Upon success, the filename of the
00673 result file is returned. This file may be parsed with
00674 :func:`ParseHHblitsOutput`.
00675
00676 :param a3m_file: input MSA file
00677 :type a3m_file: :class:`str`
00678
00679 :param database: search database, needs to be the common prefix of the
00680 database files
00681 :type database: :class:`str`
00682
00683 :param options: dictionary of options, must come with the right amount
00684 of '-' in front.
00685 :type options: :class:`dict`
00686
00687 :param prefix: prefix to the result file
00688 :type prefix: :class:`str`
00689
00690 :return: the path to the result file
00691 :rtype: :class:`str`
00692 """
00693 opts = {'cpu' : 1,
00694 'n' : 1}
00695 opts.update(options)
00696 opt_cmd = []
00697 opt_str = []
00698 for k, val in opts.iteritems():
00699 if type(val) == type(True):
00700 if val == True:
00701 opt_cmd.append('-%s' % str(k))
00702 opt_str.append(str(k))
00703 else:
00704 opt_cmd.append('-%s %s' % (str(k), str(val)))
00705 opt_str.append('%s%s' % (str(k), str(val)))
00706 opt_cmd = ' '.join(opt_cmd)
00707 opt_str = '_'.join(opt_str)
00708 base = os.path.basename(os.path.splitext(a3m_file)[0])
00709 hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
00710 hhr_file = os.path.join(self.working_dir, hhr_file)
00711 search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s'%(
00712 self.hhblits_bin,
00713 opt_cmd, os.path.abspath(a3m_file),
00714 hhr_file,
00715 os.path.join(os.path.abspath(os.path.split(database)[0]),
00716 os.path.split(database)[1]))
00717 ost.LogInfo('searching %s' % database)
00718 ost.LogVerbose(search_cmd)
00719 job = subprocess.Popen(search_cmd, shell=True, cwd=self.working_dir,
00720 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00721 sout, serr = job.communicate()
00722 if job.returncode != 0:
00723 lines = sout.splitlines()
00724 for line in lines:
00725 print line.strip()
00726 lines = serr.splitlines()
00727 for line in lines:
00728 print line.strip()
00729 return None
00730 return hhr_file
00731
00732
00733 __all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader', 'ParseHeaderLine',
00734 'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
00735 'EstimateMemConsumption']
00736
00737
00738
00739
00740
00741