OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
tmtools.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2009 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 """
20 Wrappers for the tmalign and tmscore utilities.
21 
22 References:
23 
24 tmscore: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710
25 tmalign: Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9
26 
27 
28 Authors: Pascal Benkert, Marco Biasini
29 """
30 
31 import subprocess, os, tempfile, platform
32 from ost import settings, io, geom, seq
33 
34 def _SetupFiles(models):
35  # create temporary directory
36  tmp_dir_name=tempfile.mkdtemp()
37  dia = 'PDB'
38  for index, model in enumerate(models):
39  for chain in model.chains:
40  if len(chain.name) > 1:
41  dia = 'CHARMM'
42  break;
43  for res in chain.residues:
44  if len(res.name) > 3:
45  dia = 'CHARMM'
46  break;
47  io.SavePDB(model, os.path.join(tmp_dir_name, 'model%02d.pdb' % (index+1)), dialect=dia)
48  return tmp_dir_name
49 
50 def _CleanupFiles(dir_name):
51  import shutil
52  shutil.rmtree(dir_name)
53 
55  """
56  Holds the result of running TMalign
57 
58  .. attribute:: rmsd
59 
60  The RMSD of the common Calpha atoms of both structures
61 
62  .. attribute:: transform
63 
64  The transform that superposes the model onto the reference structure.
65 
66  :type: :class:`~ost.geom.Mat4`
67 
68  .. attribute:: alignment
69 
70  The alignment of the structures, that is the pairing of Calphas of both
71  structures. Since the programs only read ATOM records, residues consisting
72  of HETATMs (MSE) are not included in the alignment.
73 
74  :type: :class:`~ost.seq.AlignmentHandle`
75 
76  .. attribute:: tm_score
77 
78  The TM-score of the structural superposition
79 
80  """
81  def __init__(self, rmsd, tm_score, aligned_length, transform,
82  ref_sequence, alignment):
83 
84  self.rmsd=rmsd
85  self.tm_score=tm_score
86  self.aligned_length=aligned_length
87  self.transform=transform
88  self.ref_sequence =ref_sequence
89  self.alignment=alignment
90 
91 def _ParseTmAlign(lines):
92  info_line=lines[11].split(',')
93  aln_length=float(info_line[0].split('=')[1].strip())
94  rmsd=float(info_line[1].split('=')[1].strip())
95  tm_score=float(info_line[2].split('=')[1].strip())
96  tf1=[float(i.strip()) for i in lines[15].split()]
97  tf2=[float(i.strip()) for i in lines[16].split()]
98  tf3=[float(i.strip()) for i in lines[17].split()]
99  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
100  tf2[4], tf3[2], tf3[3], tf3[4])
101  tf=geom.Mat4(rot)
102  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
103  seq1 = seq.CreateSequence("1",lines[26].strip())
104  seq2 = seq.CreateSequence("2",lines[28].strip())
105  alignment = seq.CreateAlignment()
106  alignment.AddSequence(seq2)
107  alignment.AddSequence(seq1)
108  return TMAlignResult(rmsd, tm_score, aln_length, tf, seq2, alignment)
109 
110 def _RunTmAlign(tmalign, tmp_dir):
111  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
112  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
113  if platform.system() == "Windows":
114  tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign)
115  command="\"%s\" %s %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename)
116  else:
117  tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign)
118  command="\"%s\" \"%s\" \"%s\"" %(tmalign_path, model1_filename, model2_filename)
119  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
120  ps.wait()
121  lines=ps.stdout.readlines()
122  if (len(lines))<22:
123  _CleanupFiles(tmp_dir)
124  raise RuntimeError("tmalign superposition failed")
125  return _ParseTmAlign(lines)
126 
128  def __init__(self, rmsd, tm_score, aligned_length, transform, ref_sequence, alignment):
129  self.rmsd=rmsd
130  self.tm_score=tm_score
131  self.aligned_length=aligned_length
132  self.transform=transform
133  self.ref_sequence =ref_sequence
134  self.alignment=alignment
135 
136 def _ParseMmAlign(lines):
137  info_line=lines[10].split(',')
138  aln_length=float(info_line[0].split('=')[1].strip())
139  rmsd=float(info_line[1].split('=')[1].strip())
140  tm_score=float(info_line[2].split('=')[1].strip())
141  tf1=[float(i.strip()) for i in lines[14].split()]
142  tf2=[float(i.strip()) for i in lines[15].split()]
143  tf3=[float(i.strip()) for i in lines[16].split()]
144  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
145  tf2[4], tf3[2], tf3[3], tf3[4])
146  tf=geom.Mat4(rot)
147  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
148  seq1 = seq.CreateSequence("1",lines[19].strip())
149  seq2 = seq.CreateSequence("2",lines[21].strip())
150  alignment = seq.CreateAlignment()
151  alignment.AddSequence(seq2)
152  alignment.AddSequence(seq1)
153  return MMAlignResult(rmsd, aln_length, tm_score, tf, seq2, alignment)
154 
155 def _RunMmAlign(mmalign, tmp_dir):
156  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
157  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
158  if platform.system() == "Windows":
159  mmalign_path=settings.Locate('mmalign.exe', explicit_file_name=mmalign)
160  command="\"%s\" %s %s" %(os.path.normpath(mmalign_path), model1_filename, model2_filename)
161  else:
162  mmalign_path=settings.Locate('MMalign', explicit_file_name=mmalign)
163  command="\"%s\" \"%s\" \"%s\"" %(mmalign_path, model1_filename, model2_filename)
164  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
165  ps.wait()
166  lines=ps.stdout.readlines()
167  if (len(lines))<22:
168  _CleanupFiles(tmp_dir)
169  raise RuntimeError("mmalign superposition failed")
170  return _ParseMmAlign(lines)
171 
173  """
174  Holds the result of running TMscore
175 
176  .. attribute:: rmsd_common
177 
178  The RMSD of the common Calpha atoms of both structures
179 
180  .. attribute:: rmsd_below_five
181 
182  The RMSD of all Calpha atoms that can be superposed below five Angstroem
183 
184  .. attribute:: tm_score
185 
186  The TM-score of the structural superposition
187 
188  .. attribute:: transform
189 
190  The transform that superposes the model onto the reference structure.
191 
192  :type: :class:`~ost.geom.Mat4`
193 
194  .. attribute:: gdt_ha
195 
196  The GDT_HA of the model to the reference structure.
197 
198  .. attribute:: gdt_ts
199 
200  The GDT_TS of the model to the reference structure.
201 
202  """
203  def __init__(self, rmsd_common, tm_score, max_sub,
204  gdt_ts, gdt_ha, rmsd_below_five, transform):
205  self.rmsd_common=rmsd_common
206  self.tm_score=tm_score
207  self.max_sub=max_sub
208  self.gdt_ts=gdt_ts
209  self.gdt_ha=gdt_ha
210  self.rmsd_below_five=rmsd_below_five
211  self.transform=transform
212 
213 def _ParseTmScore(lines):
214  tf1=[float(i.strip()) for i in lines[23].split()]
215  tf2=[float(i.strip()) for i in lines[24].split()]
216  tf3=[float(i.strip()) for i in lines[25].split()]
217  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
218  tf2[4], tf3[2], tf3[3], tf3[4])
219  tf=geom.Mat4(rot)
220  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
221  result=TMScoreResult(float(lines[14].split()[-1].strip()),
222  float(lines[16].split()[2].strip()),
223  float(lines[17].split()[1].strip()),
224  float(lines[18].split()[1].strip()),
225  float(lines[19].split()[1].strip()),
226  float(lines[27].split()[-1].strip()),
227  tf)
228  return result
229 
230 def _RunTmScore(tmscore, tmp_dir):
231  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
232  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
233  if platform.system() == "Windows":
234  tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore)
235  command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename,
236  model2_filename)
237  else:
238  tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore)
239  command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename,
240  model2_filename)
241  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
242  ps.wait()
243  lines=ps.stdout.readlines()
244  if (len(lines))<22:
245  _CleanupFiles(tmp_dir)
246  raise RuntimeError("tmscore superposition failed")
247  return _ParseTmScore(lines)
248 
249 
250 def TMAlign(model1, model2, tmalign=None):
251  """
252  Performs a sequence independent superposition of model1 onto model2, the
253  reference.
254 
255 
256  :param model1: The model structure. If the superposition is successful, will
257  be superposed onto the reference structure
258  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
259  :param model2: The reference structure
260  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
261  :param tmalign: If not None, the path to the tmalign executable.
262  :returns: The result of the tmscore superposition
263  :rtype: :class:`TMAlignResult`
264 
265  :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
266  :raises: :class:`RuntimeError` if the superposition failed
267  """
268  tmp_dir_name=_SetupFiles((model1, model2))
269  result=_RunTmAlign(tmalign, tmp_dir_name)
270  model1.handle.EditXCS().ApplyTransform(result.transform)
271  _CleanupFiles(tmp_dir_name)
272  return result
273 
274 def MMAlign(model1, model2, mmalign=None):
275  """
276  Run tmalign on two protein structures
277  """
278  tmp_dir_name=_SetupFiles((model1, model2))
279  result=_RunMmAlign(mmalign, tmp_dir_name)
280  model1.handle.EditXCS().ApplyTransform(result.transform)
281  _CleanupFiles(tmp_dir_name)
282  return result
283 
284 def TMScore(model1, model2, tmscore=None):
285  """
286  Performs a sequence dependent superposition of model1 onto model2,
287  the reference.
288 
289  :param model1: The model structure. If the superposition is successful, will
290  be superposed onto the reference structure
291  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
292  :param model2: The reference structure
293  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
294  :param tmscore: If not None, the path to the tmscore executable.
295  :returns: The result of the tmscore superposition
296  :rtype: :class:`TMScoreResult`
297 
298  :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
299  :raises: :class:`RuntimeError` if the superposition failed
300  """
301  tmp_dir_name=_SetupFiles((model1, model2))
302  result=_RunTmScore(tmscore, tmp_dir_name)
303  model1.handle.EditXCS().ApplyTransform(result.transform)
304  _CleanupFiles(tmp_dir_name)
305  return result