118 '''Fetch header content.
120 First, we seek the start of the identifier, that is, the first whitespace
121 after the hit number + 1. Since the identifier may contain whitespaces
122 itself, we cannot split the whole line
124 :param line: Line from the output header.
125 :type line: :class:`str`
127 :return: Hit information and query/template offsets
128 :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
130 for i
in range(0, len(line)):
131 if line[i].isdigit():
133 for i
in range(i, len(line)):
136 assert len(line)-i >= 31
and line[i+1] !=
' '
137 hit_id = line[i+1:i+31].strip()
138 fields = line[i+32:].split()
139 prob = float(fields[0])
140 evalue = float(fields[1])
141 pvalue = float(fields[2])
142 score = float(fields[3])
143 ss_score = float(fields[4])
144 offsets = (int(fields[6].split(
'-')[0]), int(fields[7].split(
'-')[0]))
145 return (
HHblitsHit(hit_id,
None, score, ss_score, evalue, pvalue, prob),
150 Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
151 the header of the search results and a list of hits.
153 :param output: Iterable containing the lines of the HHblits output file
154 :type output: iterable (e.g. an open file handle)
156 :return: a tuple of the header of the search results and the hits
157 :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
160 def _ParseHeaderSection(lines):
161 value_start_column = 14
162 date_pattern =
'%a %b %d %H:%M:%S %Y'
165 assert line.startswith(
'Query')
166 header.query = line[value_start_column:].strip()
168 assert line.startswith(
'Match_columns')
169 header.match_columns = int(line[value_start_column:].strip())
172 assert line.startswith(
'No_of_seqs')
175 assert line.startswith(
'Neff')
176 header.n_eff = float(line[value_start_column:].strip())
179 assert line.startswith(
'Searched_HMMs')
180 header.searched_hmms = int(line[value_start_column:].strip())
183 assert line.startswith(
'Date')
184 value = line[value_start_column:].strip()
185 header.date = datetime.datetime.strptime(value, date_pattern)
188 assert line.startswith(
'Command')
189 header.command = line[value_start_column:].strip()
192 assert len(line.strip()) == 0
195 def _ParseTableOfContents(lines):
197 assert line.startswith(
' No Hit')
201 if len(line.strip()) == 0:
206 def _ParseResultBody(query_id, hits, lines):
210 def _MakeAln(query_id, hit_id, query_string, templ_string,
212 s1 = seq.CreateSequence(query_id, query_string)
213 s1.offset = q_offset-1
214 s2 = seq.CreateSequence(hit_id, templ_string)
215 s2.offset = t_offset-1
216 return seq.CreateAlignment(s1, s2)
226 if len(line.strip()) == 0:
228 if line.startswith(
'Done!'):
229 if len(query_str) > 0:
230 hits[entry_index][0].aln = _MakeAln(\
231 query_id, hits[entry_index][0].hit_id,
232 query_str, templ_str, *hits[entry_index][1])
233 return [h
for h, o
in hits]
234 if line.startswith(
'No '):
235 if len(query_str) > 0:
236 hits[entry_index][0].aln = _MakeAln(\
237 query_id, hits[entry_index][0].hit_id,
238 query_str, templ_str, *hits[entry_index][1])
239 entry_index = int(line[3:].strip())-1
241 hits[entry_index][0].hit_id = line[1:].strip()
248 assert entry_index !=
None
250 if line[1:].startswith(
' Consensus'):
252 if line[1:].startswith(
' ss_pred'):
254 if line[1:].startswith(
' ss_conf'):
256 if line[1:].startswith(
' ss_dssp'):
258 if line.startswith(
'T '):
259 for start_pos
in range(22, len(line)):
260 if line[start_pos].isalpha()
or line[start_pos] ==
'-':
262 end_pos = line.find(
' ', start_pos)
265 error_str =
"Unparsable line '%s' for entry No %d" \
266 % (line.strip(), entry_index + 1)
267 raise AssertionError(error_str)
268 templ_str += line[start_pos:end_pos]
269 if line.startswith(
'Q '):
270 for start_pos
in range(22, len(line)):
271 if line[start_pos].isalpha()
or line[start_pos] ==
'-':
273 end_pos = line.find(
' ', start_pos)
276 error_str =
"Unparsable line '%s' for entry No %d" \
277 % (line.strip(), entry_index + 1)
278 raise AssertionError(error_str)
279 query_str += line[start_pos:end_pos]
280 except StopIteration:
281 if len(query_str) > 0:
282 hits[entry_index][0].aln = _MakeAln(query_id,
283 hits[entry_index][0].hit_id,
284 query_str, templ_str,
285 *hits[entry_index][1])
286 return [h
for h, o
in hits]
287 header = _ParseHeaderSection(lines)
292 hits = _ParseTableOfContents(lines)
293 return header, _ParseResultBody(header.query, hits, lines)
297 Parse secondary structure information and the multiple sequence alignment
298 out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
300 :param a3m_file: Iterable containing the lines of the A3M file
301 :type a3m_file: iterable (e.g. an open file handle)
303 :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
304 (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
305 If not available, "ss_pred" and "ss_conf" entries are set to None.
307 profile_dict = dict()
313 for line
in a3m_file:
314 if len(line.rstrip()) == 0:
316 elif line.startswith(
'>ss_pred'):
319 elif line.startswith(
'>ss_conf'):
324 msa_head.append(line[1:].rstrip())
328 if state ==
'sspred':
329 pred_seq_txt += line.rstrip()
330 elif state ==
'ssconf':
331 conf_seq_txt += line.rstrip()
333 msa_seq[len(msa_seq)-1] += line.rstrip()
335 if len(pred_seq_txt) > 0:
336 profile_dict[
'ss_pred'] = list()
337 profile_dict[
'ss_conf'] = list()
338 for i
in range(0, len(pred_seq_txt)):
339 profile_dict[
'ss_pred'].append(pred_seq_txt[i])
340 profile_dict[
'ss_conf'].append(int(conf_seq_txt[i]))
342 profile_dict[
'ss_pred'] =
None
343 profile_dict[
'ss_conf'] =
None
347 profile_dict[
'msa'] =
None
350 al = seq.AlignmentList()
351 for i
in range(1, len(msa_seq)):
363 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
364 seq.CreateSequence(msa_head[i], ts))
366 profile_dict[
'msa'] = seq.alg.MergePairwiseAlignments(\
367 al, seq.CreateSequence(msa_head[0], t))
373 Parse secondary structure information and the MSA out of an HHM profile as
374 produced by :meth:`HHblits.A3MToProfile`.
376 :param profile: Opened file handle holding the profile.
377 :type profile: :class:`file`
379 :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
380 (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
381 "consensus" (:class:`~ost.seq.SequenceHandle`).
382 If not available, "ss_pred" and "ss_conf" entries are set to None.
384 profile_dict = dict()
392 if len(line.rstrip()) == 0:
394 if line.rstrip() ==
'>ss_pred PSIPRED predicted secondary structure':
397 elif line.rstrip() ==
'>ss_conf PSIPRED confidence values':
400 elif line.rstrip() ==
'>Consensus':
404 if state ==
'consensus' or state ==
'msa':
406 msa_head.append(line[1:].rstrip())
408 raise IOError(
'Profile file "%s" is missing ' % profile.name+
409 'the "Consensus" section')
416 if state ==
'sspred':
417 pred_seq_txt += line.rstrip()
418 elif state ==
'ssconf':
419 conf_seq_txt += line.rstrip()
421 msa_seq[len(msa_seq)-1] += line.rstrip()
422 elif state ==
'consensus':
423 consensus_txt += line.rstrip()
425 if len(pred_seq_txt) > 0:
426 profile_dict[
'ss_pred'] = list()
427 profile_dict[
'ss_conf'] = list()
428 for i
in range(0, len(pred_seq_txt)):
429 profile_dict[
'ss_pred'].append(pred_seq_txt[i])
430 profile_dict[
'ss_conf'].append(int(conf_seq_txt[i]))
432 profile_dict[
'ss_pred'] =
None
433 profile_dict[
'ss_conf'] =
None
437 profile_dict[
'msa'] =
None
440 al = seq.AlignmentList()
441 for i
in range(1, len(msa_seq)):
453 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
454 seq.CreateSequence(msa_head[i], ts))
456 profile_dict[
'msa'] = seq.alg.MergePairwiseAlignments(\
457 al, seq.CreateSequence(msa_head[0], t))
459 profile_dict[
'consensus'] = seq.CreateSequence(
'Consensus', consensus_txt)