OpenStructure: stage/lib64/python2.7/site-packages/ost/bindings/hhblits.py Source File

00001 '''HHblits wrapper.
00002 '''
00003 
00004 import subprocess
00005 import datetime
00006 import os
00007 import shutil
00008 import tempfile
00009 
00010 import ost
00011 from ost import settings, seq
00012 from ost.bindings import utils
00013 
00014 class HHblitsHit:
00015     """
00016     A hit found by HHblits
00017 
00018     .. attribute:: hit_id
00019 
00020       String identifying the hit
00021 
00022       :type: :class:`str`
00023 
00024     .. attribute:: aln
00025 
00026       Pairwise alignment containing the aligned part between the query and the
00027       target. First sequence is the query, the second sequence the target.
00028 
00029       :type: :class:`~ost.seq.AlignmentHandle`
00030 
00031     .. attribute:: score
00032 
00033       The alignment score
00034 
00035       :type: :class:`float`
00036 
00037     .. attribute:: ss_score
00038 
00039       The secondary structure score
00040 
00041       :type: :class:`float`
00042 
00043     .. attribute:: evalue
00044 
00045       The E-value of the alignment
00046 
00047       :type: :class:`float`
00048 
00049     .. attribute:: pvalue
00050 
00051       The P-value of the alignment
00052 
00053       :type: :class:`float`
00054 
00055     .. attribute:: prob
00056 
00057       The probability of the alignment (between 0 and 100)
00058 
00059       :type: :class:`float`
00060     """
00061     def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
00062         self.hit_id = hit_id
00063         self.aln = aln
00064         self.score = score
00065         self.ss_score = ss_score
00066         self.evalue = evalue
00067         self.prob = prob
00068         self.pvalue = pvalue
00069 
00070 class HHblitsHeader:
00071     """Stats from the beginning of search output.
00072 
00073     .. attribute:: query
00074 
00075       The name of the query sequence
00076 
00077       :type: :class:`str`
00078 
00079     .. attribute:: match_columns
00080 
00081       Total of aligned Match columns
00082 
00083       :type: :class:`int`
00084 
00085     .. attribute:: n_eff
00086 
00087       Value of the ``-neff`` option
00088 
00089       :type: :class:`float`
00090 
00091     .. attribute:: searched_hmms
00092 
00093       Number of profiles searched
00094 
00095       :type: :class:`int`
00096 
00097     .. attribute:: date
00098 
00099       Execution date
00100 
00101       :type: :class:`datetime.datetime`
00102 
00103     .. attribute:: command
00104 
00105       Command used to run
00106 
00107       :type: :class:`str`
00108     """
00109     def __init__(self):
00110         self.query = ''
00111         self.match_columns = 0
00112         self.n_eff = 0
00113         self.searched_hmms = 0
00114         self.date = None
00115         self.command = ''
00116 
00117 def ParseHeaderLine(line):
00118     '''Fetch header content.
00119 
00120     First, we seek the start of the identifier, that is, the first whitespace
00121     after the hit number + 1. Since the identifier may contain whitespaces
00122     itself, we cannot split the whole line
00123 
00124     :param line: Line from the output header.
00125     :type line: :class:`str`
00126 
00127     :return: Hit information
00128     :rtype: :class:`HHblitsHit`
00129     '''
00130     for i in range(0, len(line)):
00131         if line[i].isdigit():
00132             break
00133     for i in range(i, len(line)):
00134         if line[i] == ' ':
00135             break
00136     assert len(line)-i >= 31 and line[i+1] != ' '
00137     hit_id = line[i+1:i+31].strip()
00138     fields = line[i+32:].split()
00139     prob = float(fields[0])
00140     evalue = float(fields[1])
00141     pvalue = float(fields[2])
00142     score = float(fields[3])
00143     ss_score = float(fields[4])
00144     offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
00145     return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
00146             offsets)
00147 
00148 def ParseHHblitsOutput(output):
00149     """
00150     Parses the HHblits output and returns a tuple of :class:`HHblitsHeader` and
00151     a list of :class:`HHblitsHit` instances.
00152 
00153     :param output: output of a :meth:`HHblits.Search`, needs to be iteratable,
00154                    e.g. an open file handle
00155     :type output: :class:`file`/ iteratable
00156 
00157     :return: a tuple of the header of the search results and the hits
00158     :rtype: (:class:`HHblitsHeader`, :class:`HHblitsHit`)
00159     """
00160     lines = iter(output)
00161     def _ParseHeaderSection(lines):
00162         value_start_column = 14
00163         date_pattern = '%a %b %d %H:%M:%S %Y'
00164         header = HHblitsHeader()
00165         line = lines.next()
00166         assert line.startswith('Query')
00167         header.query = line[value_start_column:].strip()
00168         line = lines.next()
00169         assert line.startswith('Match_columns')
00170         header.match_columns = int(line[value_start_column:].strip())
00171 
00172         line = lines.next()
00173         assert line.startswith('No_of_seqs')
00174 
00175         line = lines.next()
00176         assert line.startswith('Neff')
00177         header.n_eff = float(line[value_start_column:].strip())
00178 
00179         line = lines.next()
00180         assert line.startswith('Searched_HMMs')
00181         header.searched_hmms = int(line[value_start_column:].strip())
00182 
00183         line = lines.next()
00184         assert line.startswith('Date')
00185         value = line[value_start_column:].strip()
00186         header.date = datetime.datetime.strptime(value, date_pattern)
00187 
00188         line = lines.next()
00189         assert line.startswith('Command')
00190         header.command = line[value_start_column:].strip()
00191 
00192         line = lines.next()
00193         assert len(line.strip()) == 0
00194         return header
00195 
00196     def _ParseTableOfContents(lines):
00197         assert lines.next().startswith(' No Hit')
00198         hits = []
00199         while True:
00200             line = lines.next()
00201             if len(line.strip()) == 0:
00202                 return hits
00203             hits.append(ParseHeaderLine(line))
00204         return hits
00205 
00206     def _ParseResultBody(query_id, hits, lines):
00207         entry_index = None
00208         query_str = ''
00209         templ_str = ''
00210         def _MakeAln(query_id, hit_id, query_string, templ_string,
00211                      q_offset, t_offset):
00212             s1 = seq.CreateSequence(query_id, query_string)
00213             s1.offset = q_offset-1
00214             s2 = seq.CreateSequence(hit_id, templ_string)
00215             s2.offset = t_offset-1
00216             return seq.CreateAlignment(s1, s2)
00217         try:
00218             while True:
00219                 line = lines.next()
00220                 if len(line.strip()) == 0:
00221                     continue
00222                 if line.startswith('Done!'):
00223                     if len(query_str) > 0:
00224                         hits[entry_index][0].aln = _MakeAln(\
00225                             query_id, hits[entry_index][0].hit_id,
00226                             query_str, templ_str, *hits[entry_index][1])
00227                     return [h for h, o in hits]
00228                 if line.startswith('No '):
00229                     if len(query_str) > 0:
00230                         hits[entry_index][0].aln = _MakeAln(\
00231                             query_id, hits[entry_index][0].hit_id,
00232                             query_str, templ_str, *hits[entry_index][1])
00233                     entry_index = int(line[3:].strip())-1
00234                     hits[entry_index][0].hit_id = lines.next()[1:].strip()
00235                     query_str = ''
00236                     templ_str = ''
00237                     # skip the next line. It doesn't contain information we
00238                     # don't already know
00239                     lines.next()
00240                     continue
00241                 assert entry_index != None
00242                 if line[1:].startswith(' Consensus'):
00243                     continue
00244                 if line[1:].startswith(' ss_pred'):
00245                     continue
00246                 if line[1:].startswith(' ss_conf'):
00247                     continue
00248                 if line.startswith('T '):
00249                     end_pos = line.find(' ', 22)
00250                     assert end_pos != -1
00251                     templ_str += line[22:end_pos]
00252                 if line.startswith('Q '):
00253                     end_pos = line.find(' ', 22)
00254                     assert end_pos != -1
00255                     query_str += line[22:end_pos]
00256         except StopIteration:
00257             if len(query_str) > 0:
00258                 hits[entry_index][0].aln = _MakeAln(query_id,
00259                                                     hits[entry_index][0].hit_id,
00260                                                     query_str, templ_str,
00261                                                     *hits[entry_index][1])
00262             return [h for h, o in hits]
00263     header = _ParseHeaderSection(lines)
00264     # parse the table of contents. This is neccessary as some of the properties
00265     # (i.e. start of alignment) we need are only given there. From the TOC we
00266     # create a list of hits that is then further filled with data when we parse
00267     # the actual result body
00268     hits = _ParseTableOfContents(lines)
00269     return header, _ParseResultBody(header.query, hits, lines)
00270 
00271 def ParseA3M(a3m_file):
00272     '''
00273     Parse secondary structure information and the multiple sequence alignment 
00274     out of an A3M file.
00275     
00276     :param a3m_file: Iteratable containing the lines of the A3M file
00277     :type a3m_file: iteratable, e.g. an opened file
00278     
00279     :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
00280              (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
00281     '''
00282     profile_dict = dict()
00283     state = 'NONE'
00284     pred_seq_txt = ''
00285     conf_seq_txt = ''
00286     msa_seq = list()
00287     msa_head = list()
00288     for line in a3m_file:
00289         if len(line.rstrip()) == 0:
00290             continue
00291         elif line.startswith('>ss_pred'):
00292             state = 'sspred'
00293             continue
00294         elif line.startswith('>ss_conf'):
00295             state = 'ssconf'
00296             continue
00297         elif line[0] == '>':
00298             if state == 'ssconf' or state == 'msa':
00299                 msa_seq.append('')
00300                 msa_head.append(line[1:].rstrip())
00301             else:
00302                 raise IOError('The A3M file is missing the "ss_conf" section')
00303             state = 'msa'
00304             continue
00305 
00306         if state == 'sspred':
00307             pred_seq_txt += line.rstrip()
00308         elif state == 'ssconf':
00309             conf_seq_txt += line.rstrip()
00310         elif state == 'msa':
00311             msa_seq[len(msa_seq)-1] += line.rstrip()
00312 
00313     profile_dict['ss_pred'] = list()
00314     profile_dict['ss_conf'] = list()
00315     for i in range(0, len(pred_seq_txt)):
00316         profile_dict['ss_pred'].append(pred_seq_txt[i])
00317         profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
00318     
00319     # post processing
00320     # MSA
00321     profile_dict['msa'] = None
00322     if len(msa_seq) > 1:
00323         t = msa_seq[0]
00324         al = seq.AlignmentList()
00325         for i in range(1, len(msa_seq)):
00326           qs = ''
00327           ts = ''
00328           k = 0
00329           for c in msa_seq[i]:
00330             if c.islower():
00331               qs += '-'
00332               ts += c.upper()
00333             else:
00334               qs += t[k]
00335               ts += c
00336               k += 1
00337           nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), 
00338                                  seq.CreateSequence(msa_head[i], ts))
00339           al.append(nl)
00340         profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
00341                                                 al,
00342                                                 seq.CreateSequence(msa_head[0],
00343                                                                    t))
00344     return profile_dict
00345 
00346 
00347 def ParseHHM(profile):
00348     '''Parse secondary structure information and the MSA out of an HHM profile.
00349 
00350     :param profile: Opened file handle holding the profile.
00351     :type profile: :class:`file`
00352 
00353     :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
00354              (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
00355              "consensus" (~ost.seq.SequenceHandle).
00356     '''
00357     profile_dict = dict()
00358     state = 'NONE'
00359     pred_seq_txt = ''
00360     conf_seq_txt = ''
00361     consensus_txt = ''
00362     msa_seq = list()
00363     msa_head = list()
00364     for line in profile:
00365         if len(line.rstrip()) == 0:
00366             continue
00367         if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
00368             state = 'sspred'
00369             continue
00370         elif line.rstrip() == '>ss_conf PSIPRED confidence values':
00371             state = 'ssconf'
00372             continue
00373         elif line.rstrip() == '>Consensus':
00374             state = 'consensus'
00375             continue
00376         elif line[0] == '>':
00377             if state == 'consensus' or state == 'msa':
00378                 msa_seq.append('')
00379                 msa_head.append(line[1:].rstrip())
00380             else:
00381                 raise IOError('Profile file "%s" is missing ' % profile.name+
00382                               'the "Consensus" section')
00383             state = 'msa'
00384             continue
00385         elif line[0] == '#':
00386             state = 'NONE'
00387             continue
00388 
00389         if state == 'sspred':
00390             pred_seq_txt += line.rstrip()
00391         elif state == 'ssconf':
00392             conf_seq_txt += line.rstrip()
00393         elif state == 'msa':
00394             msa_seq[len(msa_seq)-1] += line.rstrip()
00395         elif state == 'consensus':
00396             consensus_txt += line.rstrip()
00397 
00398     profile_dict['ss_pred'] = list()
00399     profile_dict['ss_conf'] = list()
00400     for i in range(0, len(pred_seq_txt)):
00401         profile_dict['ss_pred'].append(pred_seq_txt[i])
00402         profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
00403 
00404     # post processing
00405     # MSA
00406     profile_dict['msa'] = None
00407     if len(msa_seq):
00408         t = msa_seq[0]
00409         al = seq.AlignmentList()
00410         for i in range(1, len(msa_seq)):
00411             qs = ''
00412             ts = ''
00413             k = 0
00414             for c in msa_seq[i]:
00415                 if c.islower():
00416                     qs += '-'
00417                     ts += c.upper()
00418                 else:
00419                     qs += t[k]
00420                     ts += c
00421                     k += 1
00422             nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
00423                                      seq.CreateSequence(msa_head[i], ts))
00424             al.append(nl)
00425         profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
00426                                             al,
00427                                             seq.CreateSequence(msa_head[0], t))
00428       #print profile_dict['msa'].ToString(80)
00429     # Consensus
00430     profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
00431 
00432     return profile_dict
00433 
00434 def EstimateMemConsumption():
00435     """
00436     Estimate the memory needed by HHblits. By default it uses not more than 3G.
00437     Also for small sequences it already uses quite some memnmory (46AA, 1.48G).
00438     And since the memory consumption could depend on the iterative search runs,
00439     how many hits are found in each step, we just go with 4G, here.
00440 
00441     :return: Assumed memory consumtion
00442     :rtype: (:class:`float`, :class:`str`)
00443     """
00444     return 4.0, 'G'
00445 
00446 class HHblits:
00447     """
00448     Initialise a new HHblits "search" for the given query. Query may either
00449     be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
00450     query is the actual query sequence, in the latter case, the query is the
00451     filename to the file containing the query.
00452 
00453     :param query: Query sequence as file or sequence.
00454     :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
00455     :param hhsuite_root: Path to the top-level directory of your hhsuite
00456                          installation.
00457     :type hhsuite_root: :class:`str`
00458     :param hhblits_bin: Name of the hhblits binary. Will only be used if
00459                         :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
00460     :type hhblits_bin: :class:`str`
00461     :param working_dir: Directory for temporary files. Will be created if not
00462                         present but **not** automatically deleted.
00463     :type working_dir: :class:`str`
00464 
00465     """
00466     OUTPUT_PREFIX = 'query_hhblits'
00467     def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
00468         self.query = query
00469         self.hhsuite_root = hhsuite_root
00470         if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
00471             self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
00472             self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
00473         else:
00474             self.hhblits_bin = settings.Locate('hhblits',
00475                                                explicit_file_name=hhblits_bin)
00476             self.bin_dir = os.path.dirname(self.hhblits_bin)
00477         self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh')
00478         if working_dir:
00479             self.needs_cleanup = False
00480             self.working_dir = working_dir
00481             if not os.path.exists(working_dir):
00482                 os.mkdir(working_dir)
00483             if isinstance(query, str):
00484                 self.filename = os.path.abspath(os.path.join(
00485                     self.working_dir, os.path.basename(query)))
00486                 if self.filename != os.path.abspath(query):
00487                     shutil.copy(query, self.filename)
00488             else:
00489                 self.filename = os.path.join(self.working_dir,
00490                                              '%s.fasta' % HHblits.OUTPUT_PREFIX)
00491                 ost.io.SaveSequence(query, self.filename)
00492         else:
00493             self.needs_cleanup = True
00494             if isinstance(query, str):
00495                 self.working_dir = tempfile.mkdtemp()
00496                 self.filename = os.path.abspath(os.path.join(
00497                     self.working_dir, os.path.basename(query)))
00498                 shutil.copy(query, self.filename)
00499             else:
00500                 tmp_dir = utils.TempDirWithFiles((query,))
00501                 self.working_dir = tmp_dir.dirname
00502                 self.filename = tmp_dir.files[0]
00503 
00504     def Cleanup(self):
00505         """Delete temporary data.
00506 
00507         Delete temporary data if no working dir was given. Controlled by
00508         :attr:`needs_cleanup`.
00509         """
00510         if self.needs_cleanup and os.path.exists(self.working_dir):
00511             shutil.rmtree(self.working_dir)
00512 
00513     def BuildQueryMSA(self, nrdb, iterations=1, mact=None, cpu=1):
00514         """Builds the MSA for the query sequence
00515 
00516         This function directly uses hhblits of hhtools. While in theory it
00517         would be possible to do this by PSI-blasting on our own, hhblits is
00518         supposed to be faster. Also it is supposed to prevent alignment
00519         corruption. The alignment corruption is caused by low-scoring terminal
00520         alignments that draw the sequences found by PSI-blast away from the
00521         optimum. By removing these low scoring ends, part of the alignment
00522         corruption can be suppressed. hhblits does **not** call PSIPRED on the
00523         MSA to predict the secondary structure of the query sequence. This is
00524         done by addss.pl of hhtools. The predicted secondary structure is
00525         stored together with the sequences identified by hhblits.
00526 
00527         :param nrdb: Database to be align against; has to be an hhblits database
00528         :type nrdb: :class:`str`
00529 
00530         :param iterations: Number of hhblits iterations
00531         :type iterations: :class:`int`
00532 
00533         :param mact: ``-mact`` of hhblits
00534         :type mact: :class:`float`
00535 
00536         :param cpu: ``-cpu`` of hhblits
00537         :type cpu: :class:`int`
00538 
00539         :return: the path to the MSA file
00540         :rtype: :class:`str`
00541         """
00542         a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
00543         ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
00544         full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
00545                                  os.path.split(nrdb)[1])
00546         # create MSA
00547         hhblits_cmd = '%s -e 0.001 -cpu %d -i %s -oa3m %s -d %s -n %d' % \
00548                       (self.hhblits_bin, cpu, self.filename, a3m_file,
00549                        full_nrdb, iterations)
00550         if mact:
00551             hhblits_cmd += '-mact %f' % mact
00552         job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir,
00553                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00554         sout, _ = job.communicate()
00555         #lines = sout.splitlines()
00556         #for l in lines:
00557         #    print l.strip()
00558         #lines = serr.splitlines()
00559         #for l in lines:
00560         #    print l.strip()
00561         if not os.path.exists(a3m_file):
00562             ost.LogWarning('Building query profile failed, no output')
00563             return a3m_file
00564         # add secondary structure annotation
00565         addss_cmd = "%s %s" % (os.path.join(self.hhsuite_root,
00566                                             'lib/hh/scripts/addss.pl'),
00567                                a3m_file)
00568         env = dict(os.environ)
00569         env.update({'PERL5LIB' : os.path.join(self.hhsuite_root,
00570                                               'lib/hh/scripts'),
00571                     'HHLIB' : self.hhlib_dir,
00572                     'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
00573                                         os.environ['PATH'])})
00574         job = subprocess.Popen(addss_cmd, shell=True, cwd=self.working_dir,
00575                                env=env, stdout=subprocess.PIPE,
00576                                stderr=subprocess.PIPE)
00577         sout, serr = job.communicate()
00578         lines = sout.splitlines()
00579         for line in lines:
00580             if 'error' in line.lower():
00581                 ost.LogWarning('Predicting secondary structure for MSA '+
00582                                '(%s) failed, on command: %s' % (a3m_file, line))
00583             return a3m_file
00584         return a3m_file
00585 
00586     def A3MToProfile(self, a3m_file, hhm_file=None):
00587         """
00588         Converts the A3M alignment file to a hhm profile. If hhm_file is not
00589         given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
00590 
00591         :param a3m_file: input MSA
00592         :type a3m_file: :class:`str`
00593 
00594         :param hhm_file: output file name 
00595         :type hhm_file: :class:`str`
00596 
00597         :return: the path to the profile
00598         :rtype: :class:`str`
00599         """
00600         hhmake = os.path.join(self.bin_dir, 'hhmake')
00601         if not hhm_file:
00602             hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
00603         if os.path.exists(hhm_file):
00604             return hhm_file
00605         ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
00606         os.putenv('HHLIB', self.hhlib_dir)
00607         if subprocess.call('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
00608                            shell=True):
00609             raise IOError('could not convert a3m to hhm file')
00610         return hhm_file
00611 
00612 
00613     def A3MToCS(self, a3m_file, cs_file=None, options={}):
00614         """
00615         Converts the A3M alignment file to a column state sequence file. If
00616         cs_file is not given, the output file will be set to
00617         <:attr:`a3m_file`-basename>.seq219.
00618 
00619         :param a3m_file: A3M file to be converted
00620         :type a3m_file: :class:`str`
00621 
00622         :param cs_file: output file name (may be omitted)
00623         :type cs_file: :class:`str`
00624 
00625         :param options: dictionary of options to *cstranslate*, must come with
00626                         the right amount of '-' in front.
00627         :type options: :class:`dict`
00628 
00629         :return: the path to the column state sequence file
00630         :rtype: :class:`str`
00631         """
00632         cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
00633         if not cs_file:
00634             cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
00635         if os.path.exists(cs_file):
00636             return cs_file
00637         opt_cmd = list()
00638         for k, val in options.iteritems():
00639             if type(val) == type(True):
00640                 if val == True:
00641                     opt_cmd.append('%s' % str(k))
00642             else:
00643                 opt_cmd.append('%s %s' % (str(k), str(val)))
00644         opt_cmd = ' '.join(opt_cmd)
00645         cs_cmd = '%s -i %s -o %s %s' % (cstranslate, a3m_file, cs_file, opt_cmd)
00646         ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
00647         job = subprocess.Popen(cs_cmd, shell=True,
00648                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00649         sout, _ = job.communicate()
00650         #lines = serr.splitlines()
00651         #for l in lines:
00652         #    print l
00653         lines = sout.splitlines()
00654         for line in lines:
00655             if line in 'Wrote abstract state sequence to %s' % cs_file:
00656                 return cs_file
00657         ost.LogWarning('Creating column state sequence file (%s) failed' % \
00658                        cs_file)
00659 
00660     def CleanupFailed(self):
00661         '''In case something went wrong, call to make sure everything is clean.
00662         '''
00663         store_needs_cleanup = self.needs_cleanup
00664         self.needs_cleanup = True
00665         self.Cleanup()
00666         self.needs_cleanup = store_needs_cleanup
00667 
00668     def Search(self, a3m_file, database, options={}, prefix=''):
00669         """
00670         Searches for templates in the given database. Before running the
00671         search, the hhm file is copied. This makes it possible to launch
00672         several hhblits instances at once. Upon success, the filename of the
00673         result file is returned. This file may be parsed with
00674         :func:`ParseHHblitsOutput`.
00675 
00676         :param a3m_file: input MSA file
00677         :type a3m_file: :class:`str`
00678 
00679         :param database: search database, needs to be the common prefix of the
00680                          database files
00681         :type database: :class:`str`
00682 
00683         :param options: dictionary of options, must come with the right amount
00684                         of '-' in front.
00685         :type options: :class:`dict`
00686 
00687         :param prefix: prefix to the result file
00688         :type prefix: :class:`str`
00689 
00690         :return: the path to the result file
00691         :rtype: :class:`str`
00692         """
00693         opts = {'cpu' : 1, # no. of cpus used
00694                 'n'   : 1}   # no. of iterations
00695         opts.update(options)
00696         opt_cmd = []
00697         opt_str = []
00698         for k, val in opts.iteritems():
00699             if type(val) == type(True):
00700                 if val == True:
00701                     opt_cmd.append('-%s' % str(k))
00702                     opt_str.append(str(k))
00703             else:
00704                 opt_cmd.append('-%s %s' % (str(k), str(val)))
00705                 opt_str.append('%s%s' % (str(k), str(val)))
00706         opt_cmd = ' '.join(opt_cmd)
00707         opt_str = '_'.join(opt_str)
00708         base = os.path.basename(os.path.splitext(a3m_file)[0])
00709         hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
00710         hhr_file = os.path.join(self.working_dir, hhr_file)
00711         search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s'%(
00712             self.hhblits_bin,
00713             opt_cmd, os.path.abspath(a3m_file),
00714             hhr_file,
00715             os.path.join(os.path.abspath(os.path.split(database)[0]),
00716                          os.path.split(database)[1]))
00717         ost.LogInfo('searching %s' % database)
00718         ost.LogVerbose(search_cmd)
00719         job = subprocess.Popen(search_cmd, shell=True, cwd=self.working_dir,
00720                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00721         sout, serr = job.communicate()
00722         if job.returncode != 0:
00723             lines = sout.splitlines()
00724             for line in lines:
00725                 print line.strip()
00726             lines = serr.splitlines()
00727             for line in lines:
00728                 print line.strip()
00729             return None
00730         return hhr_file
00731 
00732 
00733 __all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader', 'ParseHeaderLine',
00734            'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
00735            'EstimateMemConsumption']
00736 
00737 #  LocalWords:  HHblits MSA hhblits hhtools PSIPRED addss param nrdb str mact
00738 #  LocalWords:  cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
00739 #  LocalWords:  attr basename rtype cstranslate tuple HHblitsHeader meth aln
00740 #  LocalWords:  HHblitsHit iteratable evalue pvalue neff hmms datetime
00741 #  LocalWords:  whitespace whitespaces