OpenStructure
Loading...
Searching...
No Matches
tmtools.py
Go to the documentation of this file.
1#------------------------------------------------------------------------------
2# This file is part of the OpenStructure project <www.openstructure.org>
3#
4# Copyright (C) 2008-2020 by the OpenStructure authors
5#
6# This library is free software; you can redistribute it and/or modify it under
7# the terms of the GNU Lesser General Public License as published by the Free
8# Software Foundation; either version 3.0 of the License, or (at your option)
9# any later version.
10# This library is distributed in the hope that it will be useful, but WITHOUT
11# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13# details.
14#
15# You should have received a copy of the GNU Lesser General Public License
16# along with this library; if not, write to the Free Software Foundation, Inc.,
17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18#------------------------------------------------------------------------------
19"""
20Wrappers for the tmalign and tmscore utilities.
21
22References:
23
24tmscore: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710
25tmalign: Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9
26
27
28Authors: Pascal Benkert, Marco Biasini
29"""
30
31import subprocess, os, tempfile, platform
32import ost
33from ost import settings, io, geom, seq
34
35def _SetupFiles(models, custom_chain_mapping = None):
36 # create temporary directory
37 tmp_dir_name=tempfile.mkdtemp()
38 dia = 'PDB'
39 for index, model in enumerate(models):
40 for chain in model.chains:
41 if len(chain.name) > 1:
42 dia = 'CHARMM'
43 break;
44 for res in chain.residues:
45 if len(res.name) > 3:
46 dia = 'CHARMM'
47 break;
48 io.SavePDB(model, os.path.join(tmp_dir_name, 'model%02d.pdb' % (index+1)), dialect=dia)
49 if custom_chain_mapping is not None:
50 with open(os.path.join(tmp_dir_name, "custom_mapping.txt"), 'w') as fh:
51 fh.write('\n'.join([f"{mdl_ch}\t{ref_ch}" for ref_ch, mdl_ch in custom_chain_mapping.items()]))
52 return tmp_dir_name
53
54def _CleanupFiles(dir_name):
55 import shutil
56 shutil.rmtree(dir_name)
57
58def _ParseTmAlign(lines,lines_matrix):
59 info_line=lines[12].split(',')
60 aln_length=int(info_line[0].split('=')[1].strip())
61 rmsd=float(info_line[1].split('=')[1].strip())
62 tm_score_swapped=float(lines[13].split('=')[1].split('(')[0].strip())
63 tm_score=float(lines[14].split('=')[1].split('(')[0].strip())
64 tf1=[float(i.strip()) for i in lines_matrix[2].split()]
65 tf2=[float(i.strip()) for i in lines_matrix[3].split()]
66 tf3=[float(i.strip()) for i in lines_matrix[4].split()]
67 rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
68 tf2[4], tf3[2], tf3[3], tf3[4])
69 tf=geom.Mat4(rot)
70 tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
71 seq1 = seq.CreateSequence("1",lines[18].strip())
72 seq2 = seq.CreateSequence("2",lines[20].strip())
73 alignment = seq.CreateAlignment()
74 alignment.AddSequence(seq2)
75 alignment.AddSequence(seq1)
76 return ost.bindings.TMAlignResult(rmsd, tm_score, tm_score_swapped,
77 aln_length, tf, alignment)
78
79def _ParseUSAlign(lines,lines_matrix):
80
81 # stuff that is immediately parsed
82 rmsd = None
83 tm_score = None
84 tm_score_swapped = None
85 aligned_length = None
86
87 # first goes into intermediate data structures
88 aln_data = list()
89 mapping_data1 = list()
90 mapping_data2 = list()
91 in_aln = False
92
93 for line in lines:
94 if in_aln:
95 if len(line.strip()) == 0:
96 in_aln = False
97 else:
98 aln_data.append(line.strip('*'))
99 elif line.startswith("Name of Structure_1:"):
100 tmp = [item.strip() for item in line.split()[3].split(':')[1:]]
101 for item in tmp:
102 if len(item) > 0:
103 mapping_data1.append(item.split(',')[1])
104 else:
105 mapping_data1.append("")
106 elif line.startswith("Name of Structure_2:"):
107 tmp = [item.strip() for item in line.split()[3].split(':')[1:]]
108 for item in tmp:
109 if len(item) > 0:
110 mapping_data2.append(item.split(',')[1])
111 else:
112 mapping_data2.append("")
113 elif line.startswith("Aligned length="):
114 data = [item.strip() for item in line.split(',')]
115 for item in data:
116 if item.startswith("Aligned length="):
117 aligned_length = int(item.split("=")[1])
118 elif item.startswith("RMSD="):
119 rmsd = float(item.split("=")[1])
120 elif line.startswith("TM-score="):
121 if "(normalized by length of Structure_1" in line:
122 tm_score_swapped = float(line.split('(')[0].split('=')[1].strip())
123 elif "(normalized by length of Structure_2" in line:
124 tm_score = float(line.split('(')[0].split('=')[1].strip())
125 elif line.startswith("(\":\" denotes residue pairs of"):
126 in_aln = True
127
128 assert(len(aln_data)==3)
129 aln_sequences1 = aln_data[0].split('*')
130 aln_sequences2 = aln_data[2].split('*')
131
132 # do mapping/aln data
133 alns = ost.seq.AlignmentList()
134 ent1_mapped_chains = ost.StringList()
135 ent2_mapped_chains = ost.StringList()
136 assert(len(mapping_data1) == len(mapping_data2))
137 assert(len(aln_sequences1) == len(aln_sequences2))
138 assert(len(mapping_data1) == len(aln_sequences1))
139 for a, b, c, d in zip(mapping_data1, mapping_data2,
140 aln_sequences1, aln_sequences2):
141 if len(a) > 0 and len(b) > 0:
142 ent1_mapped_chains.append(a)
143 ent2_mapped_chains.append(b)
144 assert(len(c) == len(d))
145 aln = seq.CreateAlignment()
146 aln.AddSequence(seq.CreateSequence(a, c))
147 aln.AddSequence(seq.CreateSequence(b, d))
148 alns.append(aln)
149
150 # parse transformation matrix
151 tf1=[float(i.strip()) for i in lines_matrix[2].split()[1:]]
152 tf2=[float(i.strip()) for i in lines_matrix[3].split()[1:]]
153 tf3=[float(i.strip()) for i in lines_matrix[4].split()[1:]]
154 mat = geom.Mat4(tf1[1], tf1[2], tf1[3], tf1[0],
155 tf2[1], tf2[2], tf2[3], tf2[0],
156 tf3[1], tf3[2], tf3[3], tf3[0],
157 0.0, 0.0, 0.0, 1.0)
158
159 return ost.bindings.MMAlignResult(rmsd, tm_score, tm_score_swapped,
160 aligned_length, mat, alns,
161 ent1_mapped_chains, ent2_mapped_chains)
162
163def _RunTmAlign(tmalign, tmp_dir):
164 model1_filename=os.path.join(tmp_dir, 'model01.pdb')
165 model2_filename=os.path.join(tmp_dir, 'model02.pdb')
166 if platform.system() == "Windows":
167 tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign)
168 command="\"%s\" %s %s -m %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
169 else:
170 tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign)
171 command="\"%s\" \"%s\" \"%s\" -m \"%s\"" %(tmalign_path, model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
172 ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
173 stdout,_=ps.communicate()
174 lines=stdout.decode().splitlines()
175 if (len(lines))<22:
176 _CleanupFiles(tmp_dir)
177 raise RuntimeError("tmalign superposition failed")
178 matrix_file=open(os.path.join(tmp_dir,'matrix.txt'))
179 lines_matrix=matrix_file.readlines()
180 matrix_file.close()
181 return _ParseTmAlign(lines,lines_matrix)
182
183def _RunUSAlign(usalign, tmp_dir):
184 model1_filename=os.path.join(tmp_dir, 'model01.pdb')
185 model2_filename=os.path.join(tmp_dir, 'model02.pdb')
186 mat_filename = os.path.join(tmp_dir, "mat.txt")
187 usalign_path=settings.Locate('USalign', explicit_file_name=usalign)
188 command = f"{usalign_path} {model1_filename} {model2_filename} -mm 1 -ter 0 -m {mat_filename}"
189 custom_mapping = os.path.join(tmp_dir, "custom_mapping.txt")
190 if os.path.exists(custom_mapping):
191 command += f" -chainmap {custom_mapping}"
192 ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
193 stdout,_=ps.communicate()
194 lines=stdout.decode().splitlines()
195 if (len(lines))<22:
196 _CleanupFiles(tmp_dir)
197 raise RuntimeError("USalign superposition failed")
198 with open(mat_filename) as fh:
199 lines_matrix = fh.readlines()
200 return _ParseUSAlign(lines,lines_matrix)
201
203 """
204 Holds the result of running TMscore
205
206 .. attribute:: rmsd_common
207
208 The RMSD of the common Calpha atoms of both structures
209
210 .. attribute:: rmsd_below_five
211
212 The RMSD of all Calpha atoms that can be superposed below five Angstroem
213
214 .. attribute:: tm_score
215
216 The TM-score of the structural superposition
217
218 .. attribute:: transform
219
220 The transform that superposes the model onto the reference structure.
221
222 :type: :class:`~ost.geom.Mat4`
223
224 .. attribute:: gdt_ha
225
226 The GDT_HA of the model to the reference structure.
227
228 .. attribute:: gdt_ts
229
230 The GDT_TS of the model to the reference structure.
231
232 """
233 def __init__(self, rmsd_common, tm_score, max_sub,
234 gdt_ts, gdt_ha, rmsd_below_five, transform):
235 self.rmsd_common=rmsd_common
236 self.tm_score=tm_score
237 self.max_sub=max_sub
238 self.gdt_ts=gdt_ts
239 self.gdt_ha=gdt_ha
240 self.rmsd_below_five=rmsd_below_five
241 self.transform=transform
242
243def _ParseTmScore(lines):
244 tf1=[float(i.strip()) for i in lines[23].split()]
245 tf2=[float(i.strip()) for i in lines[24].split()]
246 tf3=[float(i.strip()) for i in lines[25].split()]
247 rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
248 tf2[4], tf3[2], tf3[3], tf3[4])
249 tf=geom.Mat4(rot)
250 tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
251 result=TMScoreResult(float(lines[14].split()[-1].strip()),
252 float(lines[16].split()[2].strip()),
253 float(lines[17].split()[1].strip()),
254 float(lines[18].split()[1].strip()),
255 float(lines[19].split()[1].strip()),
256 float(lines[27].split()[-1].strip()),
257 tf)
258 return result
259
260def _RunTmScore(tmscore, tmp_dir):
261 model1_filename=os.path.join(tmp_dir, 'model01.pdb')
262 model2_filename=os.path.join(tmp_dir, 'model02.pdb')
263 if platform.system() == "Windows":
264 tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore)
265 command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename,
266 model2_filename)
267 else:
268 tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore)
269 command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename,
270 model2_filename)
271 ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
272 stdout,_=ps.communicate()
273 lines=stdout.decode().splitlines()
274 if (len(lines))<22:
275 _CleanupFiles(tmp_dir)
276 raise RuntimeError("tmscore superposition failed")
277 return _ParseTmScore(lines)
278
279
280def TMAlign(model1, model2, tmalign=None):
281 """
282 Performs a sequence independent superposition of model1 onto model2, the
283 reference.
284
285
286 :param model1: The model structure. If the superposition is successful, will
287 be superposed onto the reference structure
288 :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
289 :param model2: The reference structure
290 :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
291 :param tmalign: If not None, the path to the tmalign executable.
292 :returns: The result of the tmscore superposition
293 :rtype: :class:`ost.bindings.TMAlignResult`
294
295 :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
296 :raises: :class:`RuntimeError` if the superposition failed
297 """
298 tmp_dir_name=_SetupFiles((model1, model2))
299 result=_RunTmAlign(tmalign, tmp_dir_name)
300 model1.handle.EditXCS().ApplyTransform(result.transform)
301 _CleanupFiles(tmp_dir_name)
302 return result
303
304def TMScore(model1, model2, tmscore=None):
305 """
306 Performs a sequence dependent superposition of model1 onto model2,
307 the reference.
308
309 :param model1: The model structure. If the superposition is successful, will
310 be superposed onto the reference structure
311 :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
312 :param model2: The reference structure
313 :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
314 :param tmscore: If not None, the path to the tmscore executable.
315 :returns: The result of the tmscore superposition
316 :rtype: :class:`TMScoreResult`
317
318 :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
319 :raises: :class:`RuntimeError` if the superposition failed
320 """
321 tmp_dir_name=_SetupFiles((model1, model2))
322 result=_RunTmScore(tmscore, tmp_dir_name)
323 model1.handle.EditXCS().ApplyTransform(result.transform)
324 _CleanupFiles(tmp_dir_name)
325 return result
326
327
328def USAlign(model1, model2, usalign=None, custom_chain_mapping=None):
329 """
330 Performs a sequence independent superposition of model1 onto model2, the
331 reference. Can deal with multimeric complexes and RNA.
332
333 Creates temporary model files on disk and runs USalign with:
334 ``USalign model1.pdb model2.pdb -mm 1 -ter 0 -m rotmat.txt``
335
336 :param model1: The model structure. If the superposition is successful, will
337 be superposed onto the reference structure
338 :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
339 :param model2: The reference structure
340 :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
341 :param usalign: If not None, the path to the USalign executable. Searches
342 for executable with name ``USalign`` in PATH if not given.
343 :param custom_chain_mapping: Custom chain mapping that is passed as -chainmap
344 to USalign executable. Raises an error is this
345 is not supported by the USalign executable you're
346 using (introduced in July 2023).
347 It's a dict with reference chain names as key
348 (model2) and model chain names as values
349 (model1).
350 :type custom_chain_mapping: :class:`dict`
351 :returns: The result of the superposition
352 :rtype: :class:`ost.bindings.MMAlignResult`
353
354 :raises: :class:`~ost.settings.FileNotFound` if executable could not be located.
355 :raises: :class:`RuntimeError` if the superposition failed
356 """
357 tmp_dir_name=_SetupFiles((model1, model2),
358 custom_chain_mapping=custom_chain_mapping)
359 result=_RunUSAlign(usalign, tmp_dir_name)
360 model1.handle.EditXCS().ApplyTransform(result.transform)
361 _CleanupFiles(tmp_dir_name)
362 return result
Three dimensional vector class, using Real precision.
Definition vec3.hh:48
__init__(self, rmsd_common, tm_score, max_sub, gdt_ts, gdt_ha, rmsd_below_five, transform)
Definition tmtools.py:234
_RunTmAlign(tmalign, tmp_dir)
Definition tmtools.py:163
USAlign(model1, model2, usalign=None, custom_chain_mapping=None)
Definition tmtools.py:328
_RunTmScore(tmscore, tmp_dir)
Definition tmtools.py:260
_CleanupFiles(dir_name)
Definition tmtools.py:54
TMAlign(model1, model2, tmalign=None)
Definition tmtools.py:280
_SetupFiles(models, custom_chain_mapping=None)
Definition tmtools.py:35
_ParseUSAlign(lines, lines_matrix)
Definition tmtools.py:79
TMScore(model1, model2, tmscore=None)
Definition tmtools.py:304
_ParseTmAlign(lines, lines_matrix)
Definition tmtools.py:58
_RunUSAlign(usalign, tmp_dir)
Definition tmtools.py:183
std::vector< AlignmentHandle > AlignmentList