OpenStructure
Loading...
Searching...
No Matches
cadscore.py
Go to the documentation of this file.
1#------------------------------------------------------------------------------
2# This file is part of the OpenStructure project <www.openstructure.org>
3#
4# Copyright (C) 2008-2020 by the OpenStructure authors
5#
6# This library is free software; you can redistribute it and/or modify it under
7# the terms of the GNU Lesser General Public License as published by the Free
8# Software Foundation; either version 3.0 of the License, or (at your option)
9# any later version.
10# This library is distributed in the hope that it will be useful, but WITHOUT
11# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13# details.
14#
15# You should have received a copy of the GNU Lesser General Public License
16# along with this library; if not, write to the Free Software Foundation, Inc.,
17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18#------------------------------------------------------------------------------
19"""
20Wrapper for the CAD score.
21
22References:
23
24Olechnovic K, Kulberkyte E, Venclovas C., CAD-score: A new contact area
25difference-based function for evaluation of protein structural models
26Proteins. 2012 Aug 30. [Epub ahead of print]
27
28Authors: Valerio Mariani, Alessandro Barbato
29"""
30
31import subprocess, os, tempfile, platform, re
32from ost import settings, io, mol
33
34def _SetupFiles(model, reference, chain_mapping):
35
36 if chain_mapping is not None:
37 model_handle = model
38 if isinstance(model_handle, mol.EntityView):
39 model_handle = mol.CreateEntityFromView(model_handle, False)
40 mapped_model = mol.CreateEntity()
41 ed = mapped_model.EditXCS()
42 for k,v in chain_mapping.items():
43 if v is not None:
44 ed.InsertChain(v, model_handle.FindChain(k), deep=True)
45 model = mapped_model
46
47 # create temporary directory
48 tmp_dir_name=tempfile.mkdtemp()
49 dia = 'PDB'
50 for chain in model.chains:
51 if chain.name==" ":
52 raise RuntimeError("One of the chains in the model has no name. Cannot "
53 "calculate CAD score")
54 if len(chain.name) > 1:
55 dia = 'CHARMM'
56 break;
57 for res in chain.residues:
58 if len(res.name) > 3:
59 dia = 'CHARMM'
60 break;
61 io.SavePDB(model, os.path.join(tmp_dir_name, 'model.pdb'), dialect=dia)
62 dia = 'PDB'
63 for chain in reference.chains:
64 if chain.name==" ":
65 raise RuntimeError("One of the chains in the reference has no name. "
66 "Cannot calculate CAD score")
67 if len(chain.name) > 1:
68 dia = 'CHARMM'
69 break;
70 for res in chain.residues:
71 if len(res.name) > 3:
72 dia = 'CHARMM'
73 break;
74 io.SavePDB(reference, os.path.join(tmp_dir_name, 'reference.pdb'),dialect=dia)
75
76 return tmp_dir_name
77
78def _CleanupFiles(dir_name):
79 import shutil
80 shutil.rmtree(dir_name)
81
83 """
84 Holds the result of running CAD
85
86 .. attribute:: globalAA
87
88 The global CAD's atom-atom (AA) score
89
90 .. attribute:: localAA
91
92 Dictionary containing local CAD's atom-atom (AA) scores.
93
94 :type: dictionary (key: tuple(chain, resnum) (e.g.:
95 ("A", ost.mol.ResNum(24)), value: CAD local AA score
96 (see CAD Documentation online)
97 """
98 def __init__(self, globalAA, localAA):
99 self.globalAA=globalAA
100 self.localAA=localAA
101
103 header = lines[0].split()
104 aa_idx = header.index("AA")
105 aa_score=float(lines[1].split()[aa_idx])
106 return aa_score
107
108def _ParseCADLocal(lines):
109 local_scores_idx = None
110 for line_idx in range(len(lines)):
111 if "local_scores" in lines[line_idx]:
112 local_scores_idx = line_idx
113 break
114 if local_scores_idx == None:
115 raise RuntimeError("Failed to parse local cadscores")
116 local_aa_dict={}
117 for line_idx in range(local_scores_idx+2, len(lines)):
118 items=lines[line_idx].split()
119 local_aa = float(items[2])
120 if local_aa < 0.0:
121 continue # invalid CAD score
122 key = (items[0], mol.ResNum(int(items[1])))
123 local_aa_dict[key] = local_aa
124
125 return local_aa_dict
126
128 return float(lines[0].split()[4])
129
131 local_aa_dict={}
132 chain_name_regex = r'c<.+?>'
133 resnum_regex = r'r<\d+>'
134 insertion_code_regex = r'i<\D>'
135 for line in lines:
136 local_aa = float(line.split()[-1])
137 if local_aa < 0.0:
138 continue # invalid CAD score
139 chain_data = re.findall(chain_name_regex, line)
140 resnum_data = re.findall(resnum_regex, line)
141 insertion_code_data = re.findall(insertion_code_regex, line)
142 resnum = None
143 if len(insertion_code_data) == 0:
144 resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')))
145 else:
146 resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')),
147 insertion_code_data[0][1:].strip('><'))
148 key = (chain_data[0][1:].strip('><'), resnum)
149 local_aa_dict[key] = local_aa
150 return local_aa_dict
151
152def _RunCAD(tmp_dir, mode, cad_bin_path, old_regime):
153
154 model_filename=os.path.join(tmp_dir, 'model.pdb')
155 reference_filename=os.path.join(tmp_dir, 'reference.pdb')
156 globalAA = None
157 localAA = None
158
159 if platform.system() == "Windows":
160 raise RuntimeError('CAD score not available on Windows')
161
162 if mode == "classic":
163 cad_calc_path = None
164 cad_read_g_path = None
165 cad_read_l_path = None
166 if cad_bin_path:
167 cad_calc_path = settings.Locate('CADscore_calc.bash',
168 search_paths=[cad_bin_path],
169 search_system_paths=False)
170 cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash',
171 search_paths=[cad_bin_path],
172 search_system_paths=False)
173 cad_read_l_path=settings.Locate('CADscore_read_local_scores.bash',
174 search_paths=[cad_bin_path],
175 search_system_paths=False)
176 # also try to locate the actual executable that is called from the
177 # bash scripts
178 executable_path = settings.Locate('voroprot2',
179 search_paths=[cad_bin_path],
180 search_system_paths=False)
181 else:
182 cad_calc_path = settings.Locate('CADscore_calc.bash')
183 cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash')
184 cad_read_l_path = settings.Locate('CADscore_read_local_scores.bash')
185 # also try to locate the actual executable that is called from the
186 # bash scripts
187 executable_path = settings.Locate('voroprot2')
188 command1="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path,
189 model_filename,
190 reference_filename,
191 os.path.join(tmp_dir,
192 "cadtemp"))
193 command2="\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir,
194 "cadtemp"))
195 command3="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path,
196 model_filename,
197 reference_filename,
198 os.path.join(tmp_dir,
199 "cadtemp"))
200
201 ps1=subprocess.Popen(command1, shell=True, stdout=subprocess.PIPE)
202 ps1.communicate()
203 ps2=subprocess.Popen(command2, shell=True, stdout=subprocess.PIPE)
204 stdout,_ = ps2.communicate()
205 lines=stdout.decode().splitlines()
206 try:
207 globalAA=_ParseCADGlobal(lines)
208 except:
209 raise RuntimeError("CAD calculation failed")
210 ps3=subprocess.Popen(command3, shell=True, stdout=subprocess.PIPE)
211 stdout,_ = ps3.communicate()
212 lines=stdout.decode().splitlines()
213 try:
214 localAA=_ParseCADLocal(lines)
215 except:
216 raise RuntimeError("CAD calculation failed")
217
218 elif mode == "voronota":
219 local_score_filename = os.path.join(tmp_dir, "local_scores.txt")
220 voronota_cadscore_path = None
221 if cad_bin_path:
222 voronota_cadscore_path = settings.Locate("voronota-cadscore",
223 search_paths=[cad_bin_path],
224 search_system_paths=False)
225 # also try to locate the actual executable that is called from the
226 # bash script
227 executable_path = settings.Locate("voronota",
228 search_paths=[cad_bin_path],
229 search_system_paths=False)
230 else:
231 voronota_cadscore_path = settings.Locate("voronota-cadscore")
232 # also try to locate the actual executable that is called from the
233 # bash script
234 executable_path = settings.Locate("voronota")
235 cmd = [voronota_cadscore_path, '-m', model_filename, '-t',
236 reference_filename, '--contacts-query-by-code AA',
237 '--output-residue-scores', local_score_filename]
238 if old_regime:
239 cmd.append("--old-regime")
240 cmd = ' '.join(cmd)
241 ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
242 stdout, _ = ps.communicate()
243 try:
244 globalAA = _ParseVoronotaGlobal(stdout.decode().splitlines())
245 except:
246 raise RuntimeError("CAD calculation failed")
247 try:
248 with open(local_score_filename) as f:
249 localAA = _ParseVoronotaLocal(f.readlines())
250 except:
251 raise RuntimeError("CAD calculation failed")
252
253 else:
254 raise RuntimeError("Invalid CAD mode! Allowed are: "
255 "[\"classic\", \"voronota\"]")
256
257 return CADResult(globalAA,localAA)
258
259def _HasInsertionCodes(model, reference):
260 for r in model.residues:
261 if r.GetNumber().GetInsCode() != "\0":
262 return True
263 for r in reference.residues:
264 if r.GetNumber().GetInsCode() != "\0":
265 return True
266 return False
267
268def _MapLabels(model, cad_results, label, chain_mapping):
269
270 if chain_mapping is None:
271 for k,v in cad_results.localAA.items():
272 r = model.FindResidue(k[0], k[1])
273 if r.IsValid():
274 r.SetFloatProp(label, v)
275 else:
276 # chain_mapping has mdl chains as key and target chains as values
277 # the raw CAD results refer to the target chains => reverse mapping
278 rev_mapping = {v:k for k,v in chain_mapping.items()}
279 for k,v in cad_results.localAA.items():
280 cname = k[0]
281 rnum = k[1]
282 if cname in rev_mapping:
283 r = model.FindResidue(rev_mapping[cname], rnum)
284 if r.IsValid():
285 r.SetFloatProp(label, v)
286
287def CADScore(model, reference, mode = "voronota", label = "localcad",
288 old_regime = False, cad_bin_path = None, chain_mapping=None):
289 """
290 Calculates global and local atom-atom (AA) CAD Scores.
291
292 You can either access the original implementation available from
293 https://bitbucket.org/kliment/cadscore/downloads/
294 or the new implementation which is part of the Voronota package
295 available from https://bitbucket.org/kliment/voronota/downloads/.
296
297 The scores of the two implementations differ but strongly correlate
298 as the contacts between atoms are estimated differently. When using
299 the "voronota" *mode* you can minimize those discrepancies by
300 setting the *old_regime* flag to True.
301
302 Furthermore, the "voronota" *mode* generates per-residue scores that
303 are inverted when compared to the classical implementation
304 (0.0: bad, 1.0 good).
305
306 :param model: The model structure.
307 :type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
308 :param reference: The reference structure
309 :type reference: :class:`~ost.mol.EntityView` or
310 :class:`~ost.mol.EntityHandle`
311 :param mode: What CAD score implementation to use, must be one in
312 ["classic", "voronota"]
313 :param label: Local CAD scores will be mapped on residues of model as
314 float property with this name
315 :type label: :class:`str`
316 :param old_regime: Only has an effect if *mode* is "voronota". If set to true,
317 the discrepancies between the two modes is minimized but
318 the behaviour of inverted scores persists.
319 :type old_regime: :class:`bool`
320 :param cad_bin_path: Path to search for the required executables
321 (["CADscore_calc.bash",
322 "CADscore_read_global_scores.bash",
323 "CADscore_read_local_scores.bash"] for "classic" *mode*
324 or ["voronota-cadscore"] for "voronota" *mode*). If not
325 set, the env path is searched.
326 :type cad_bin_path: :class:`str`
327 :param chain_mapping: Provide custom chain mapping in case of oligomers
328 (only supported for "voronota" *mode*). Provided as
329 :class:`dict` with model chain name as key and target
330 chain name as value. If set, scoring happens on a
331 substructure of model that is stripped to chains with
332 valid mapping.
333 :type chain_mapping: :class:`dict`
334 :returns: The result of the CAD score calculation
335 :rtype: :class:`CADResult`
336
337 :raises: :class:`~ost.settings.FileNotFound` if any of the CAD score
338 executables could not be located.
339 :raises: :class:`RuntimeError` if the calculation failed
340 """
341 if mode == "classic" and _HasInsertionCodes(model, reference):
342 raise RuntimeError("The classic CAD score implementation does not support "
343 "insertion codes in residues")
344
345 if chain_mapping is not None:
346 if model == "classic":
347 raise RuntimeError("The classic CAD score implementation does not "
348 "support custom chain mappings")
349
350 # do consistency checks of custom chain mapping
351 mdl_cnames = [ch.GetName() for ch in model.chains]
352 ref_cnames = [ch.GetName() for ch in reference.chains]
353
354 # check that each model chain name in the mapping is actually there
355 for cname in chain_mapping.keys():
356 if cname not in mdl_cnames:
357 raise RuntimeError(f"Model chain name \"{cname}\" provided in "
358 f"custom chain mapping is not present in provided "
359 f"model structure.")
360
361 # check that each target chain name in the mapping is actually there
362 for cname in chain_mapping.values():
363 if cname not in ref_cnames:
364 raise RuntimeError(f"Reference chain name \"{cname}\" provided in "
365 f"custom chain mapping is not present in provided "
366 f"reference structure.")
367
368 tmp_dir_name=_SetupFiles(model, reference, chain_mapping)
369 result=_RunCAD(tmp_dir_name, mode, cad_bin_path, old_regime)
370 _CleanupFiles(tmp_dir_name)
371 _MapLabels(model, result, label, chain_mapping)
372 return result
__init__(self, globalAA, localAA)
Definition cadscore.py:98
definition of EntityView
_HasInsertionCodes(model, reference)
Definition cadscore.py:259
_CleanupFiles(dir_name)
Definition cadscore.py:78
CADScore(model, reference, mode="voronota", label="localcad", old_regime=False, cad_bin_path=None, chain_mapping=None)
Definition cadscore.py:288
_RunCAD(tmp_dir, mode, cad_bin_path, old_regime)
Definition cadscore.py:152
_SetupFiles(model, reference, chain_mapping)
Definition cadscore.py:34
_MapLabels(model, cad_results, label, chain_mapping)
Definition cadscore.py:268