OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
tmtools.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2015 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 """
20 Wrappers for the tmalign and tmscore utilities.
21 
22 References:
23 
24 tmscore: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710
25 tmalign: Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9
26 
27 
28 Authors: Pascal Benkert, Marco Biasini
29 """
30 
31 import subprocess, os, tempfile, platform
32 from ost import settings, io, geom, seq
33 
34 def _SetupFiles(models):
35  # create temporary directory
36  tmp_dir_name=tempfile.mkdtemp()
37  dia = 'PDB'
38  for index, model in enumerate(models):
39  for chain in model.chains:
40  if len(chain.name) > 1:
41  dia = 'CHARMM'
42  break;
43  for res in chain.residues:
44  if len(res.name) > 3:
45  dia = 'CHARMM'
46  break;
47  io.SavePDB(model, os.path.join(tmp_dir_name, 'model%02d.pdb' % (index+1)), dialect=dia)
48  return tmp_dir_name
49 
50 def _CleanupFiles(dir_name):
51  import shutil
52  shutil.rmtree(dir_name)
53 
55  """
56  Holds the result of running TMalign
57 
58  .. attribute:: rmsd
59 
60  The RMSD of the common Calpha atoms of both structures
61 
62  .. attribute:: transform
63 
64  The transform that superposes the model onto the reference structure.
65 
66  :type: :class:`~ost.geom.Mat4`
67 
68  .. attribute:: alignment
69 
70  The alignment of the structures, that is the pairing of Calphas of both
71  structures. Since the programs only read ATOM records, residues consisting
72  of HETATMs (MSE) are not included in the alignment.
73 
74  :type: :class:`~ost.seq.AlignmentHandle`
75 
76  .. attribute:: tm_score
77 
78  The TM-score of the structural superposition
79 
80  """
81  def __init__(self, rmsd, tm_score, aligned_length, transform,
82  ref_sequence, alignment):
83 
84  self.rmsd=rmsd
85  self.tm_score=tm_score
86  self.aligned_length=aligned_length
87  self.transform=transform
88  self.ref_sequence =ref_sequence
89  self.alignment=alignment
90 
91 def _ParseTmAlign(lines,lines_matrix):
92  info_line=lines[12].split(',')
93  aln_length=float(info_line[0].split('=')[1].strip())
94  rmsd=float(info_line[1].split('=')[1].strip())
95  tm_score=float(lines[14].split('=')[1].split('(')[0].strip())
96  tf1=[float(i.strip()) for i in lines_matrix[2].split()]
97  tf2=[float(i.strip()) for i in lines_matrix[3].split()]
98  tf3=[float(i.strip()) for i in lines_matrix[4].split()]
99  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
100  tf2[4], tf3[2], tf3[3], tf3[4])
101  tf=geom.Mat4(rot)
102  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
103  seq1 = seq.CreateSequence("1",lines[18].strip())
104  seq2 = seq.CreateSequence("2",lines[20].strip())
105  alignment = seq.CreateAlignment()
106  alignment.AddSequence(seq2)
107  alignment.AddSequence(seq1)
108  return TMAlignResult(rmsd, tm_score, aln_length, tf, seq2, alignment)
109 
110 def _RunTmAlign(tmalign, tmp_dir):
111  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
112  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
113  if platform.system() == "Windows":
114  tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign)
115  command="\"%s\" %s %s -m %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
116  else:
117  tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign)
118  command="\"%s\" \"%s\" \"%s\" -m \"%s\"" %(tmalign_path, model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
119  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
120  ps.wait()
121  lines=ps.stdout.readlines()
122  if (len(lines))<22:
123  _CleanupFiles(tmp_dir)
124  raise RuntimeError("tmalign superposition failed")
125  matrix_file=open(os.path.join(tmp_dir,'matrix.txt'))
126  lines_matrix=matrix_file.readlines()
127  matrix_file.close()
128  return _ParseTmAlign(lines,lines_matrix)
129 
131  def __init__(self, rmsd, aligned_length, tm_score, transform, alignment):
132  self.rmsd=rmsd
133  self.tm_score=tm_score
134  self.aligned_length=aligned_length
135  self.transform=transform
136  self.alignment=alignment
137 
138 def _ParseMmAlign(lines):
139  info_line=lines[10].split(',')
140  aln_length=float(info_line[0].split('=')[1].strip())
141  rmsd=float(info_line[1].split('=')[1].strip())
142  tm_score=float(info_line[2].split('=')[1].strip())
143  tf1=[float(i.strip()) for i in lines[14].split()]
144  tf2=[float(i.strip()) for i in lines[15].split()]
145  tf3=[float(i.strip()) for i in lines[16].split()]
146  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
147  tf2[4], tf3[2], tf3[3], tf3[4])
148  tf=geom.Mat4(rot)
149  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
150  seq1 = seq.CreateSequence("1",lines[19].strip())
151  seq2 = seq.CreateSequence("2",lines[21].strip())
152  alignment = seq.CreateAlignment()
153  alignment.AddSequence(seq2)
154  alignment.AddSequence(seq1)
155 
156  return MMAlignResult(rmsd, tm_score, aln_length, tf, seq2, alignment)
157 
158 def _RunMmAlign(mmalign, tmp_dir):
159  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
160  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
161  if platform.system() == "Windows":
162  mmalign_path=settings.Locate('mmalign.exe', explicit_file_name=mmalign)
163  command="\"%s\" %s %s" %(os.path.normpath(mmalign_path), model1_filename, model2_filename)
164  else:
165  mmalign_path=settings.Locate('MMalign', explicit_file_name=mmalign)
166  command="\"%s\" \"%s\" \"%s\"" %(mmalign_path, model1_filename, model2_filename)
167  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
168  ps.wait()
169  lines=ps.stdout.readlines()
170  if (len(lines))<22:
171  _CleanupFiles(tmp_dir)
172  raise RuntimeError("mmalign superposition failed")
173  return _ParseMmAlign(lines)
174 
176  """
177  Holds the result of running TMscore
178 
179  .. attribute:: rmsd_common
180 
181  The RMSD of the common Calpha atoms of both structures
182 
183  .. attribute:: rmsd_below_five
184 
185  The RMSD of all Calpha atoms that can be superposed below five Angstroem
186 
187  .. attribute:: tm_score
188 
189  The TM-score of the structural superposition
190 
191  .. attribute:: transform
192 
193  The transform that superposes the model onto the reference structure.
194 
195  :type: :class:`~ost.geom.Mat4`
196 
197  .. attribute:: gdt_ha
198 
199  The GDT_HA of the model to the reference structure.
200 
201  .. attribute:: gdt_ts
202 
203  The GDT_TS of the model to the reference structure.
204 
205  """
206  def __init__(self, rmsd_common, tm_score, max_sub,
207  gdt_ts, gdt_ha, rmsd_below_five, transform):
208  self.rmsd_common=rmsd_common
209  self.tm_score=tm_score
210  self.max_sub=max_sub
211  self.gdt_ts=gdt_ts
212  self.gdt_ha=gdt_ha
213  self.rmsd_below_five=rmsd_below_five
214  self.transform=transform
215 
216 def _ParseTmScore(lines):
217  tf1=[float(i.strip()) for i in lines[23].split()]
218  tf2=[float(i.strip()) for i in lines[24].split()]
219  tf3=[float(i.strip()) for i in lines[25].split()]
220  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
221  tf2[4], tf3[2], tf3[3], tf3[4])
222  tf=geom.Mat4(rot)
223  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
224  result=TMScoreResult(float(lines[14].split()[-1].strip()),
225  float(lines[16].split()[2].strip()),
226  float(lines[17].split()[1].strip()),
227  float(lines[18].split()[1].strip()),
228  float(lines[19].split()[1].strip()),
229  float(lines[27].split()[-1].strip()),
230  tf)
231  return result
232 
233 def _RunTmScore(tmscore, tmp_dir):
234  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
235  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
236  if platform.system() == "Windows":
237  tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore)
238  command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename,
239  model2_filename)
240  else:
241  tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore)
242  command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename,
243  model2_filename)
244  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
245  ps.wait()
246  lines=ps.stdout.readlines()
247  if (len(lines))<22:
248  _CleanupFiles(tmp_dir)
249  raise RuntimeError("tmscore superposition failed")
250  return _ParseTmScore(lines)
251 
252 
253 def TMAlign(model1, model2, tmalign=None):
254  """
255  Performs a sequence independent superposition of model1 onto model2, the
256  reference.
257 
258 
259  :param model1: The model structure. If the superposition is successful, will
260  be superposed onto the reference structure
261  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
262  :param model2: The reference structure
263  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
264  :param tmalign: If not None, the path to the tmalign executable.
265  :returns: The result of the tmscore superposition
266  :rtype: :class:`TMAlignResult`
267 
268  :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
269  :raises: :class:`RuntimeError` if the superposition failed
270  """
271  tmp_dir_name=_SetupFiles((model1, model2))
272  result=_RunTmAlign(tmalign, tmp_dir_name)
273  model1.handle.EditXCS().ApplyTransform(result.transform)
274  _CleanupFiles(tmp_dir_name)
275  return result
276 
277 def MMAlign(model1, model2, mmalign=None):
278  """
279  Run tmalign on two protein structures
280  """
281  tmp_dir_name=_SetupFiles((model1, model2))
282  result=_RunMmAlign(mmalign, tmp_dir_name)
283  model1.handle.EditXCS().ApplyTransform(result.transform)
284  _CleanupFiles(tmp_dir_name)
285  return result
286 
287 def TMScore(model1, model2, tmscore=None):
288  """
289  Performs a sequence dependent superposition of model1 onto model2,
290  the reference.
291 
292  :param model1: The model structure. If the superposition is successful, will
293  be superposed onto the reference structure
294  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
295  :param model2: The reference structure
296  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
297  :param tmscore: If not None, the path to the tmscore executable.
298  :returns: The result of the tmscore superposition
299  :rtype: :class:`TMScoreResult`
300 
301  :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
302  :raises: :class:`RuntimeError` if the superposition failed
303  """
304  tmp_dir_name=_SetupFiles((model1, model2))
305  result=_RunTmScore(tmscore, tmp_dir_name)
306  model1.handle.EditXCS().ApplyTransform(result.transform)
307  _CleanupFiles(tmp_dir_name)
308  return result
Three dimensional vector class, using Real precision.
Definition: vec3.hh:42