OpenStructure
clustalw.py
Go to the documentation of this file.
1 from ost.bindings import utils
2 from ost import settings, io, seq, LogError
3 import os
4 import subprocess
5 
6 def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False,
7  clustalw_option_string=False):
8  clustalw_path=settings.Locate(('clustalw', 'clustalw2'),
9  explicit_file_name=clustalw)
10 
11  if seq2!=None:
12  if isinstance(seq1, seq.SequenceHandle) and isinstance(seq2, seq.SequenceHandle):
13  seq_list=seq.CreateSequenceList()
14  seq_list.AddSequence(seq1)
15  seq_list.AddSequence(seq2)
16  elif isinstance(seq1, str) and isinstance(seq2, str):
17  seqh1=seq.CreateSequence("seq1", seq1)
18  seqh2=seq.CreateSequence("seq2", seq2)
19  seq_list=seq.CreateSequenceList()
20  seq_list.AddSequence(seqh1)
21  seq_list.AddSequence(seqh2)
22  else:
23  LogError("WARNING: Specify at least two Sequences")
24  return
25  elif isinstance(seq1, seq.SequenceList):
26  seq_list=seq1
27  else:
28  LogError("WARNING: Specify either two SequenceHandles or one SequenceList")
29  return
30 
31  temp_dir=utils.TempDirWithFiles((seq_list,))
32  out=os.path.join(temp_dir.dirname, 'out.fasta')
33  command='%s -infile="%s" -output=fasta -outfile="%s"' % (clustalw_path,
34  temp_dir.files[0],
35  out)
36  if nopgap:
37  command+=" -nopgap"
38  if clustalw_option_string!=False:
39  command=command+" "+clustalw_option_string #see useful flags: http://toolkit.tuebingen.mpg.de/clustalw/help_params
40 
41  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
42  ps.stdout.readlines()
43  aln=io.LoadAlignment(out)
44 
45 
46  for sequence in seq_list:
47  for seq_num,aln_seq in enumerate(aln.sequences):
48  if aln_seq.GetName()==sequence.GetName():
49  break
50  aln.SetSequenceOffset(seq_num,sequence.offset)
51  if sequence.HasAttachedView():
52  aln.AttachView(seq_num,sequence.GetAttachedView().Copy())
53 
54  if not keep_files:
55  temp_dir.Cleanup()
56 
57  return aln