20 Wrapper for the CAD score.
24 Olechnovic K, Kulberkyte E, Venclovas C., CAD-score: A new contact area
25 difference-based function for evaluation of protein structural models
26 Proteins. 2012 Aug 30. [Epub ahead of print]
28 Authors: Valerio Mariani, Alessandro Barbato
31 import subprocess, os, tempfile, platform, re
32 from ost
import settings, io, mol
34 def _SetupFiles(model, reference, chain_mapping):
36 if chain_mapping
is not None:
39 model_handle = mol.CreateEntityFromView(model_handle,
False)
40 mapped_model = mol.CreateEntity()
41 ed = mapped_model.EditXCS()
42 for k,v
in chain_mapping.items():
44 ed.InsertChain(v, model_handle.FindChain(k), deep=
True)
48 tmp_dir_name=tempfile.mkdtemp()
50 for chain
in model.chains:
52 raise RuntimeError(
"One of the chains in the model has no name. Cannot "
53 "calculate CAD score")
54 if len(chain.name) > 1:
57 for res
in chain.residues:
61 io.SavePDB(model, os.path.join(tmp_dir_name,
'model.pdb'), dialect=dia)
63 for chain
in reference.chains:
65 raise RuntimeError(
"One of the chains in the reference has no name. "
66 "Cannot calculate CAD score")
67 if len(chain.name) > 1:
70 for res
in chain.residues:
74 io.SavePDB(reference, os.path.join(tmp_dir_name,
'reference.pdb'),dialect=dia)
78 def _CleanupFiles(dir_name):
80 shutil.rmtree(dir_name)
84 Holds the result of running CAD
86 .. attribute:: globalAA
88 The global CAD's atom-atom (AA) score
90 .. attribute:: localAA
92 Dictionary containing local CAD's atom-atom (AA) scores.
94 :type: dictionary (key: tuple(chain, resnum) (e.g.:
95 ("A", ost.mol.ResNum(24)), value: CAD local AA score
96 (see CAD Documentation online)
102 def _ParseCADGlobal(lines):
103 header = lines[0].split()
104 aa_idx = header.index(
"AA")
105 aa_score=float(lines[1].split()[aa_idx])
108 def _ParseCADLocal(lines):
109 local_scores_idx =
None
110 for line_idx
in range(len(lines)):
111 if "local_scores" in lines[line_idx]:
112 local_scores_idx = line_idx
114 if local_scores_idx ==
None:
115 raise RuntimeError(
"Failed to parse local cadscores")
117 for line_idx
in range(local_scores_idx+2, len(lines)):
118 items=lines[line_idx].split()
119 local_aa = float(items[2])
123 local_aa_dict[key] = local_aa
127 def _ParseVoronotaGlobal(lines):
128 return float(lines[0].split()[4])
130 def _ParseVoronotaLocal(lines):
132 chain_name_regex =
r'c<.+?>'
133 resnum_regex =
r'r<\d+>'
134 insertion_code_regex =
r'i<\D>'
136 local_aa = float(line.split()[-1])
139 chain_data = re.findall(chain_name_regex, line)
140 resnum_data = re.findall(resnum_regex, line)
141 insertion_code_data = re.findall(insertion_code_regex, line)
143 if len(insertion_code_data) == 0:
144 resnum =
mol.ResNum(int(resnum_data[0][1:].strip(
'><')))
146 resnum =
mol.ResNum(int(resnum_data[0][1:].strip(
'><')),
147 insertion_code_data[0][1:].strip(
'><'))
148 key = (chain_data[0][1:].strip(
'><'), resnum)
149 local_aa_dict[key] = local_aa
152 def _RunCAD(tmp_dir, mode, cad_bin_path, old_regime):
154 model_filename=os.path.join(tmp_dir,
'model.pdb')
155 reference_filename=os.path.join(tmp_dir,
'reference.pdb')
159 if platform.system() ==
"Windows":
160 raise RuntimeError(
'CAD score not available on Windows')
162 if mode ==
"classic":
164 cad_read_g_path =
None
165 cad_read_l_path =
None
167 cad_calc_path = settings.Locate(
'CADscore_calc.bash',
168 search_paths=[cad_bin_path],
169 search_system_paths=
False)
170 cad_read_g_path = settings.Locate(
'CADscore_read_global_scores.bash',
171 search_paths=[cad_bin_path],
172 search_system_paths=
False)
173 cad_read_l_path=settings.Locate(
'CADscore_read_local_scores.bash',
174 search_paths=[cad_bin_path],
175 search_system_paths=
False)
178 executable_path = settings.Locate(
'voroprot2',
179 search_paths=[cad_bin_path],
180 search_system_paths=
False)
182 cad_calc_path = settings.Locate(
'CADscore_calc.bash')
183 cad_read_g_path = settings.Locate(
'CADscore_read_global_scores.bash')
184 cad_read_l_path = settings.Locate(
'CADscore_read_local_scores.bash')
187 executable_path = settings.Locate(
'voroprot2')
188 command1=
"\"%s\" -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path,
191 os.path.join(tmp_dir,
193 command2=
"\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir,
195 command3=
"\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path,
198 os.path.join(tmp_dir,
201 ps1=subprocess.Popen(command1, shell=
True, stdout=subprocess.PIPE)
203 ps2=subprocess.Popen(command2, shell=
True, stdout=subprocess.PIPE)
204 stdout,_ = ps2.communicate()
205 lines=stdout.decode().splitlines()
207 globalAA=_ParseCADGlobal(lines)
209 raise RuntimeError(
"CAD calculation failed")
210 ps3=subprocess.Popen(command3, shell=
True, stdout=subprocess.PIPE)
211 stdout,_ = ps3.communicate()
212 lines=stdout.decode().splitlines()
214 localAA=_ParseCADLocal(lines)
216 raise RuntimeError(
"CAD calculation failed")
218 elif mode ==
"voronota":
219 local_score_filename = os.path.join(tmp_dir,
"local_scores.txt")
220 voronota_cadscore_path =
None
222 voronota_cadscore_path = settings.Locate(
"voronota-cadscore",
223 search_paths=[cad_bin_path],
224 search_system_paths=
False)
227 executable_path = settings.Locate(
"voronota",
228 search_paths=[cad_bin_path],
229 search_system_paths=
False)
231 voronota_cadscore_path = settings.Locate(
"voronota-cadscore")
234 executable_path = settings.Locate(
"voronota")
235 cmd = [voronota_cadscore_path,
'-m', model_filename,
'-t',
236 reference_filename,
'--contacts-query-by-code AA',
237 '--output-residue-scores', local_score_filename]
239 cmd.append(
"--old-regime")
241 ps = subprocess.Popen(cmd, shell=
True, stdout=subprocess.PIPE)
242 stdout, _ = ps.communicate()
244 globalAA = _ParseVoronotaGlobal(stdout.decode().splitlines())
246 raise RuntimeError(
"CAD calculation failed")
248 with open(local_score_filename)
as f:
249 localAA = _ParseVoronotaLocal(f.readlines())
251 raise RuntimeError(
"CAD calculation failed")
254 raise RuntimeError(
"Invalid CAD mode! Allowed are: "
255 "[\"classic\", \"voronota\"]")
259 def _HasInsertionCodes(model, reference):
260 for r
in model.residues:
261 if r.GetNumber().GetInsCode() !=
"\0":
263 for r
in reference.residues:
264 if r.GetNumber().GetInsCode() !=
"\0":
268 def _MapLabels(model, cad_results, label, chain_mapping):
270 if chain_mapping
is None:
271 for k,v
in cad_results.localAA.items():
272 r = model.FindResidue(k[0], k[1])
274 r.SetFloatProp(label, v)
278 rev_mapping = {v:k
for k,v
in chain_mapping.items()}
279 for k,v
in cad_results.localAA.items():
282 if cname
in rev_mapping:
283 r = model.FindResidue(rev_mapping[cname], rnum)
285 r.SetFloatProp(label, v)
287 def CADScore(model, reference, mode = "voronota", label = "localcad",
288 old_regime =
False, cad_bin_path =
None, chain_mapping=
None):
290 Calculates global and local atom-atom (AA) CAD Scores.
292 You can either access the original implementation available from
293 https://bitbucket.org/kliment/cadscore/downloads/
294 or the new implementation which is part of the Voronota package
295 available from https://bitbucket.org/kliment/voronota/downloads/.
297 The scores of the two implementations differ but strongly correlate
298 as the contacts between atoms are estimated differently. When using
299 the "voronota" *mode* you can minimize those discrepancies by
300 setting the *old_regime* flag to True.
302 Furthermore, the "voronota" *mode* generates per-residue scores that
303 are inverted when compared to the classical implementation
304 (0.0: bad, 1.0 good).
306 :param model: The model structure.
307 :type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
308 :param reference: The reference structure
309 :type reference: :class:`~ost.mol.EntityView` or
310 :class:`~ost.mol.EntityHandle`
311 :param mode: What CAD score implementation to use, must be one in
312 ["classic", "voronota"]
313 :param label: Local CAD scores will be mapped on residues of model as
314 float property with this name
315 :type label: :class:`str`
316 :param old_regime: Only has an effect if *mode* is "voronota". If set to true,
317 the discrepancies between the two modes is minimized but
318 the behaviour of inverted scores persists.
319 :type old_regime: :class:`bool`
320 :param cad_bin_path: Path to search for the required executables
321 (["CADscore_calc.bash",
322 "CADscore_read_global_scores.bash",
323 "CADscore_read_local_scores.bash"] for "classic" *mode*
324 or ["voronota-cadscore"] for "voronota" *mode*). If not
325 set, the env path is searched.
326 :type cad_bin_path: :class:`str`
327 :param chain_mapping: Provide custom chain mapping in case of oligomers
328 (only supported for "voronota" *mode*). Provided as
329 :class:`dict` with model chain name as key and target
330 chain name as value. If set, scoring happens on a
331 substructure of model that is stripped to chains with
333 :type chain_mapping: :class:`dict`
334 :returns: The result of the CAD score calculation
335 :rtype: :class:`CADResult`
337 :raises: :class:`~ost.settings.FileNotFound` if any of the CAD score
338 executables could not be located.
339 :raises: :class:`RuntimeError` if the calculation failed
341 if mode ==
"classic" and _HasInsertionCodes(model, reference):
342 raise RuntimeError(
"The classic CAD score implementation does not support "
343 "insertion codes in residues")
345 if chain_mapping
is not None:
346 if model ==
"classic":
347 raise RuntimeError(
"The classic CAD score implementation does not "
348 "support custom chain mappings")
351 mdl_cnames = [ch.GetName()
for ch
in model.chains]
352 ref_cnames = [ch.GetName()
for ch
in reference.chains]
355 for cname
in chain_mapping.keys():
356 if cname
not in mdl_cnames:
357 raise RuntimeError(f
"Model chain name \"{cname}\" provided in "
358 f
"custom chain mapping is not present in provided "
362 for cname
in chain_mapping.values():
363 if cname
not in ref_cnames:
364 raise RuntimeError(f
"Reference chain name \"{cname}\" provided in "
365 f
"custom chain mapping is not present in provided "
366 f
"reference structure.")
368 tmp_dir_name=_SetupFiles(model, reference, chain_mapping)
369 result=_RunCAD(tmp_dir_name, mode, cad_bin_path, old_regime)
370 _CleanupFiles(tmp_dir_name)
371 _MapLabels(model, result, label, chain_mapping)