OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
tmtools.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2020 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 """
20 Wrappers for the tmalign and tmscore utilities.
21 
22 References:
23 
24 tmscore: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710
25 tmalign: Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9
26 
27 
28 Authors: Pascal Benkert, Marco Biasini
29 """
30 
31 import subprocess, os, tempfile, platform
32 import ost
33 from ost import settings, io, geom, seq
34 
35 def _SetupFiles(models, custom_chain_mapping = None):
36  # create temporary directory
37  tmp_dir_name=tempfile.mkdtemp()
38  dia = 'PDB'
39  for index, model in enumerate(models):
40  for chain in model.chains:
41  if len(chain.name) > 1:
42  dia = 'CHARMM'
43  break;
44  for res in chain.residues:
45  if len(res.name) > 3:
46  dia = 'CHARMM'
47  break;
48  io.SavePDB(model, os.path.join(tmp_dir_name, 'model%02d.pdb' % (index+1)), dialect=dia)
49  if custom_chain_mapping is not None:
50  with open(os.path.join(tmp_dir_name, "custom_mapping.txt"), 'w') as fh:
51  fh.write('\n'.join([f"{mdl_ch}\t{ref_ch}" for ref_ch, mdl_ch in custom_chain_mapping.items()]))
52  return tmp_dir_name
53 
54 def _CleanupFiles(dir_name):
55  import shutil
56  shutil.rmtree(dir_name)
57 
58 def _ParseTmAlign(lines,lines_matrix):
59  info_line=lines[12].split(',')
60  aln_length=int(info_line[0].split('=')[1].strip())
61  rmsd=float(info_line[1].split('=')[1].strip())
62  tm_score_swapped=float(lines[13].split('=')[1].split('(')[0].strip())
63  tm_score=float(lines[14].split('=')[1].split('(')[0].strip())
64  tf1=[float(i.strip()) for i in lines_matrix[2].split()]
65  tf2=[float(i.strip()) for i in lines_matrix[3].split()]
66  tf3=[float(i.strip()) for i in lines_matrix[4].split()]
67  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
68  tf2[4], tf3[2], tf3[3], tf3[4])
69  tf=geom.Mat4(rot)
70  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
71  seq1 = seq.CreateSequence("1",lines[18].strip())
72  seq2 = seq.CreateSequence("2",lines[20].strip())
73  alignment = seq.CreateAlignment()
74  alignment.AddSequence(seq2)
75  alignment.AddSequence(seq1)
76  return ost.bindings.TMAlignResult(rmsd, tm_score, tm_score_swapped,
77  aln_length, tf, alignment)
78 
79 def _ParseUSAlign(lines,lines_matrix):
80 
81  # stuff that is immediately parsed
82  rmsd = None
83  tm_score = None
84  tm_score_swapped = None
85  aligned_length = None
86 
87  # first goes into intermediate data structures
88  aln_data = list()
89  mapping_data1 = list()
90  mapping_data2 = list()
91  in_aln = False
92 
93  for line in lines:
94  if in_aln:
95  if len(line.strip()) == 0:
96  in_aln = False
97  else:
98  aln_data.append(line.strip('*'))
99  elif line.startswith("Name of Structure_1:"):
100  tmp = [item.strip() for item in line.split()[3].split(':')[1:]]
101  for item in tmp:
102  if len(item) > 0:
103  mapping_data1.append(item.split(',')[1])
104  else:
105  mapping_data1.append("")
106  elif line.startswith("Name of Structure_2:"):
107  tmp = [item.strip() for item in line.split()[3].split(':')[1:]]
108  for item in tmp:
109  if len(item) > 0:
110  mapping_data2.append(item.split(',')[1])
111  else:
112  mapping_data2.append("")
113  elif line.startswith("Aligned length="):
114  data = [item.strip() for item in line.split(',')]
115  for item in data:
116  if item.startswith("Aligned length="):
117  aligned_length = int(item.split("=")[1])
118  elif item.startswith("RMSD="):
119  rmsd = float(item.split("=")[1])
120  elif line.startswith("TM-score="):
121  if "(normalized by length of Structure_1" in line:
122  tm_score_swapped = float(line.split('(')[0].split('=')[1].strip())
123  elif "(normalized by length of Structure_2" in line:
124  tm_score = float(line.split('(')[0].split('=')[1].strip())
125  elif line.startswith("(\":\" denotes residue pairs of"):
126  in_aln = True
127 
128  assert(len(aln_data)==3)
129  aln_sequences1 = aln_data[0].split('*')
130  aln_sequences2 = aln_data[2].split('*')
131 
132  # do mapping/aln data
133  alns = ost.seq.AlignmentList()
134  ent1_mapped_chains = ost.StringList()
135  ent2_mapped_chains = ost.StringList()
136  assert(len(mapping_data1) == len(mapping_data2))
137  assert(len(aln_sequences1) == len(aln_sequences2))
138  assert(len(mapping_data1) == len(aln_sequences1))
139  for a, b, c, d in zip(mapping_data1, mapping_data2,
140  aln_sequences1, aln_sequences2):
141  if len(a) > 0 and len(b) > 0:
142  ent1_mapped_chains.append(a)
143  ent2_mapped_chains.append(b)
144  assert(len(c) == len(d))
145  aln = seq.CreateAlignment()
146  aln.AddSequence(seq.CreateSequence(a, c))
147  aln.AddSequence(seq.CreateSequence(b, d))
148  alns.append(aln)
149 
150  # parse transformation matrix
151  tf1=[float(i.strip()) for i in lines_matrix[2].split()[1:]]
152  tf2=[float(i.strip()) for i in lines_matrix[3].split()[1:]]
153  tf3=[float(i.strip()) for i in lines_matrix[4].split()[1:]]
154  mat = geom.Mat4(tf1[1], tf1[2], tf1[3], tf1[0],
155  tf2[1], tf2[2], tf2[3], tf2[0],
156  tf3[1], tf3[2], tf3[3], tf3[0],
157  0.0, 0.0, 0.0, 1.0)
158 
159  return ost.bindings.MMAlignResult(rmsd, tm_score, tm_score_swapped,
160  aligned_length, mat, alns,
161  ent1_mapped_chains, ent2_mapped_chains)
162 
163 def _RunTmAlign(tmalign, tmp_dir):
164  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
165  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
166  if platform.system() == "Windows":
167  tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign)
168  command="\"%s\" %s %s -m %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
169  else:
170  tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign)
171  command="\"%s\" \"%s\" \"%s\" -m \"%s\"" %(tmalign_path, model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
172  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
173  stdout,_=ps.communicate()
174  lines=stdout.decode().splitlines()
175  if (len(lines))<22:
176  _CleanupFiles(tmp_dir)
177  raise RuntimeError("tmalign superposition failed")
178  matrix_file=open(os.path.join(tmp_dir,'matrix.txt'))
179  lines_matrix=matrix_file.readlines()
180  matrix_file.close()
181  return _ParseTmAlign(lines,lines_matrix)
182 
183 def _RunUSAlign(usalign, tmp_dir):
184  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
185  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
186  mat_filename = os.path.join(tmp_dir, "mat.txt")
187  usalign_path=settings.Locate('USalign', explicit_file_name=usalign)
188  command = f"{usalign_path} {model1_filename} {model2_filename} -mm 1 -ter 0 -m {mat_filename}"
189  custom_mapping = os.path.join(tmp_dir, "custom_mapping.txt")
190  if os.path.exists(custom_mapping):
191  command += f" -chainmap {custom_mapping}"
192  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
193  stdout,_=ps.communicate()
194  lines=stdout.decode().splitlines()
195  if (len(lines))<22:
196  _CleanupFiles(tmp_dir)
197  raise RuntimeError("USalign superposition failed")
198  with open(mat_filename) as fh:
199  lines_matrix = fh.readlines()
200  return _ParseUSAlign(lines,lines_matrix)
201 
203  """
204  Holds the result of running TMscore
205 
206  .. attribute:: rmsd_common
207 
208  The RMSD of the common Calpha atoms of both structures
209 
210  .. attribute:: rmsd_below_five
211 
212  The RMSD of all Calpha atoms that can be superposed below five Angstroem
213 
214  .. attribute:: tm_score
215 
216  The TM-score of the structural superposition
217 
218  .. attribute:: transform
219 
220  The transform that superposes the model onto the reference structure.
221 
222  :type: :class:`~ost.geom.Mat4`
223 
224  .. attribute:: gdt_ha
225 
226  The GDT_HA of the model to the reference structure.
227 
228  .. attribute:: gdt_ts
229 
230  The GDT_TS of the model to the reference structure.
231 
232  """
233  def __init__(self, rmsd_common, tm_score, max_sub,
234  gdt_ts, gdt_ha, rmsd_below_five, transform):
235  self.rmsd_common=rmsd_common
236  self.tm_score=tm_score
237  self.max_sub=max_sub
238  self.gdt_ts=gdt_ts
239  self.gdt_ha=gdt_ha
240  self.rmsd_below_five=rmsd_below_five
241  self.transform=transform
242 
243 def _ParseTmScore(lines):
244  tf1=[float(i.strip()) for i in lines[23].split()]
245  tf2=[float(i.strip()) for i in lines[24].split()]
246  tf3=[float(i.strip()) for i in lines[25].split()]
247  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
248  tf2[4], tf3[2], tf3[3], tf3[4])
249  tf=geom.Mat4(rot)
250  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
251  result=TMScoreResult(float(lines[14].split()[-1].strip()),
252  float(lines[16].split()[2].strip()),
253  float(lines[17].split()[1].strip()),
254  float(lines[18].split()[1].strip()),
255  float(lines[19].split()[1].strip()),
256  float(lines[27].split()[-1].strip()),
257  tf)
258  return result
259 
260 def _RunTmScore(tmscore, tmp_dir):
261  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
262  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
263  if platform.system() == "Windows":
264  tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore)
265  command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename,
266  model2_filename)
267  else:
268  tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore)
269  command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename,
270  model2_filename)
271  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
272  stdout,_=ps.communicate()
273  lines=stdout.decode().splitlines()
274  if (len(lines))<22:
275  _CleanupFiles(tmp_dir)
276  raise RuntimeError("tmscore superposition failed")
277  return _ParseTmScore(lines)
278 
279 
280 def TMAlign(model1, model2, tmalign=None):
281  """
282  Performs a sequence independent superposition of model1 onto model2, the
283  reference.
284 
285 
286  :param model1: The model structure. If the superposition is successful, will
287  be superposed onto the reference structure
288  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
289  :param model2: The reference structure
290  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
291  :param tmalign: If not None, the path to the tmalign executable.
292  :returns: The result of the tmscore superposition
293  :rtype: :class:`ost.bindings.TMAlignResult`
294 
295  :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
296  :raises: :class:`RuntimeError` if the superposition failed
297  """
298  tmp_dir_name=_SetupFiles((model1, model2))
299  result=_RunTmAlign(tmalign, tmp_dir_name)
300  model1.handle.EditXCS().ApplyTransform(result.transform)
301  _CleanupFiles(tmp_dir_name)
302  return result
303 
304 def TMScore(model1, model2, tmscore=None):
305  """
306  Performs a sequence dependent superposition of model1 onto model2,
307  the reference.
308 
309  :param model1: The model structure. If the superposition is successful, will
310  be superposed onto the reference structure
311  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
312  :param model2: The reference structure
313  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
314  :param tmscore: If not None, the path to the tmscore executable.
315  :returns: The result of the tmscore superposition
316  :rtype: :class:`TMScoreResult`
317 
318  :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
319  :raises: :class:`RuntimeError` if the superposition failed
320  """
321  tmp_dir_name=_SetupFiles((model1, model2))
322  result=_RunTmScore(tmscore, tmp_dir_name)
323  model1.handle.EditXCS().ApplyTransform(result.transform)
324  _CleanupFiles(tmp_dir_name)
325  return result
326 
327 
328 def USAlign(model1, model2, usalign=None, custom_chain_mapping=None):
329  """
330  Performs a sequence independent superposition of model1 onto model2, the
331  reference. Can deal with multimeric complexes and RNA.
332 
333  Creates temporary model files on disk and runs USalign with:
334  ``USalign model1.pdb model2.pdb -mm 1 -ter 0 -m rotmat.txt``
335 
336  :param model1: The model structure. If the superposition is successful, will
337  be superposed onto the reference structure
338  :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
339  :param model2: The reference structure
340  :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
341  :param usalign: If not None, the path to the USalign executable. Searches
342  for executable with name ``USalign`` in PATH if not given.
343  :param custom_chain_mapping: Custom chain mapping that is passed as -chainmap
344  to USalign executable. Raises an error is this
345  is not supported by the USalign executable you're
346  using (introduced in July 2023).
347  It's a dict with reference chain names as key
348  (model2) and model chain names as values
349  (model1).
350  :type custom_chain_mapping: :class:`dict`
351  :returns: The result of the superposition
352  :rtype: :class:`ost.bindings.MMAlignResult`
353 
354  :raises: :class:`~ost.settings.FileNotFound` if executable could not be located.
355  :raises: :class:`RuntimeError` if the superposition failed
356  """
357  tmp_dir_name=_SetupFiles((model1, model2),
358  custom_chain_mapping=custom_chain_mapping)
359  result=_RunUSAlign(usalign, tmp_dir_name)
360  model1.handle.EditXCS().ApplyTransform(result.transform)
361  _CleanupFiles(tmp_dir_name)
362  return result
std::vector< AlignmentHandle > AlignmentList
Three dimensional vector class, using Real precision.
Definition: vec3.hh:43