20 Wrapper for the CAD score.
24 Olechnovic K, Kulberkyte E, Venclovas C., CAD-score: A new contact area
25 difference-based function for evaluation of protein structural models
26 Proteins. 2012 Aug 30. [Epub ahead of print]
28 Authors: Valerio Mariani, Alessandro Barbato
31 import subprocess, os, tempfile, platform, re
32 from ost
import settings, io, mol
34 def _SetupFiles(model,reference):
36 tmp_dir_name=tempfile.mkdtemp()
38 for chain
in model.chains:
40 raise RuntimeError(
"One of the chains in the model has no name. Cannot "
41 "calculate CAD score")
42 if len(chain.name) > 1:
45 for res
in chain.residues:
49 io.SavePDB(model, os.path.join(tmp_dir_name,
'model.pdb'), dialect=dia)
51 for chain
in reference.chains:
53 raise RuntimeError(
"One of the chains in the reference has no name. "
54 "Cannot calculate CAD score")
55 if len(chain.name) > 1:
58 for res
in chain.residues:
62 io.SavePDB(reference, os.path.join(tmp_dir_name,
'reference.pdb'),dialect=dia)
65 def _CleanupFiles(dir_name):
67 shutil.rmtree(dir_name)
71 Holds the result of running CAD
73 .. attribute:: globalAA
75 The global CAD's atom-atom (AA) score
77 .. attribute:: localAA
79 Dictionary containing local CAD's atom-atom (AA) scores.
81 :type: dictionary (key: tuple(chain, resnum) (e.g.:
82 ("A", ost.mol.ResNum(24)), value: CAD local AA score
83 (see CAD Documentation online)
89 def _ParseCADGlobal(lines):
90 header = lines[0].split()
91 aa_idx = header.index(
"AA")
92 aa_score=float(lines[1].split()[aa_idx])
95 def _ParseCADLocal(lines):
96 local_scores_idx =
None
97 for line_idx
in range(len(lines)):
98 if "local_scores" in lines[line_idx]:
99 local_scores_idx = line_idx
101 if local_scores_idx ==
None:
102 raise RuntimeError(
"Failed to parse local cadscores")
104 for line_idx
in range(local_scores_idx+2, len(lines)):
105 items=lines[line_idx].split()
106 local_aa = float(items[2])
110 local_aa_dict[key] = local_aa
114 def _ParseVoronotaGlobal(lines):
115 return float(lines[0].split()[4])
117 def _ParseVoronotaLocal(lines):
119 chain_name_regex =
r'c<\D+>'
120 resnum_regex =
r'r<\d+>'
121 insertion_code_regex =
r'i<\D>'
123 local_aa = float(line.split()[-1])
126 chain_data = re.findall(chain_name_regex, line)
127 resnum_data = re.findall(resnum_regex, line)
128 insertion_code_data = re.findall(insertion_code_regex, line)
130 if len(insertion_code_data) == 0:
131 resnum =
mol.ResNum(int(resnum_data[0][1:].strip(
'><')))
133 resnum =
mol.ResNum(int(resnum_data[0][1:].strip(
'><')),
134 insertion_code_data[0][1:].strip(
'><'))
135 key = (chain_data[0][1:].strip(
'><'), resnum)
136 local_aa_dict[key] = local_aa
139 def _RunCAD(tmp_dir, mode, cad_bin_path, old_regime):
141 model_filename=os.path.join(tmp_dir,
'model.pdb')
142 reference_filename=os.path.join(tmp_dir,
'reference.pdb')
146 if platform.system() ==
"Windows":
147 raise RuntimeError(
'CAD score not available on Windows')
149 if mode ==
"classic":
151 cad_read_g_path =
None
152 cad_read_l_path =
None
154 cad_calc_path = settings.Locate(
'CADscore_calc.bash',
155 search_paths=[cad_bin_path],
156 search_system_paths=
False)
157 cad_read_g_path = settings.Locate(
'CADscore_read_global_scores.bash',
158 search_paths=[cad_bin_path],
159 search_system_paths=
False)
160 cad_read_l_path=settings.Locate(
'CADscore_read_local_scores.bash',
161 search_paths=[cad_bin_path],
162 search_system_paths=
False)
165 executable_path = settings.Locate(
'voroprot2',
166 search_paths=[cad_bin_path],
167 search_system_paths=
False)
169 cad_calc_path = settings.Locate(
'CADscore_calc.bash')
170 cad_read_g_path = settings.Locate(
'CADscore_read_global_scores.bash')
171 cad_read_l_path = settings.Locate(
'CADscore_read_local_scores.bash')
174 executable_path = settings.Locate(
'voroprot2')
175 command1=
"\"%s\" -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path,
178 os.path.join(tmp_dir,
180 command2=
"\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir,
182 command3=
"\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path,
185 os.path.join(tmp_dir,
188 ps1=subprocess.Popen(command1, shell=
True, stdout=subprocess.PIPE)
190 ps2=subprocess.Popen(command2, shell=
True, stdout=subprocess.PIPE)
192 lines=ps2.stdout.readlines()
194 globalAA=_ParseCADGlobal(lines)
196 raise RuntimeError(
"CAD calculation failed")
197 ps3=subprocess.Popen(command3, shell=
True, stdout=subprocess.PIPE)
199 lines=ps3.stdout.readlines()
201 localAA=_ParseCADLocal(lines)
203 raise RuntimeError(
"CAD calculation failed")
205 elif mode ==
"voronota":
206 local_score_filename = os.path.join(tmp_dir,
"local_scores.txt")
207 voronota_cadscore_path =
None
209 voronota_cadscore_path = settings.Locate(
"voronota-cadscore",
210 search_paths=[cad_bin_path],
211 search_system_paths=
False)
214 executable_path = settings.Locate(
"voronota",
215 search_paths=[cad_bin_path],
216 search_system_paths=
False)
218 voronota_cadscore_path = settings.Locate(
"voronota-cadscore")
221 executable_path = settings.Locate(
"voronota")
222 cmd = [voronota_cadscore_path,
'-m', model_filename,
'-t',
223 reference_filename,
'--contacts-query-by-code AA',
224 '--output-residue-scores', local_score_filename]
226 cmd.append(
"--old-regime")
228 ps = subprocess.Popen(cmd, shell=
True, stdout=subprocess.PIPE)
231 globalAA = _ParseVoronotaGlobal(ps.stdout.readlines())
233 raise RuntimeError(
"CAD calculation failed")
235 localAA = _ParseVoronotaLocal(open(local_score_filename).readlines())
237 raise RuntimeError(
"CAD calculation failed")
240 raise RuntimeError(
"Invalid CAD mode! Allowed are: "
241 "[\"classic\", \"voronota\"]")
246 def _HasInsertionCodes(model, reference):
247 for r
in model.residues:
248 if r.GetNumber().GetInsCode() !=
"\0":
251 for r
in reference.residues:
252 if r.GetNumber().GetInsCode() !=
"\0":
257 def _MapLabels(model, cad_results, label):
258 for k,v
in cad_results.localAA.iteritems():
259 r = model.FindResidue(k[0], k[1])
261 raise RuntimeError(
"Failed to map cadscore on residues: " +
262 "CAD score estimated for residue in chain \"" +
263 k[0] +
"\" with ResNum " + str(k[1]) +
". Residue " +
264 "could not be found in model.")
265 r.SetFloatProp(label, v)
267 def CADScore(model, reference, mode = "classic", label = "localcad",
268 old_regime =
False, cad_bin_path =
None):
270 Calculates global and local atom-atom (AA) CAD Scores.
272 You can either access the original implementation available from
273 https://bitbucket.org/kliment/cadscore/downloads/
274 or the new implementation which is part of the Voronota package
275 available from https://bitbucket.org/kliment/voronota/downloads/.
277 The scores of the two implementations differ but strongly correlate
278 as the contacts between atoms are estimated differently. When using
279 the "voronota" *mode* you can minimize those discrepancies by
280 setting the *old_regime* flag to True.
282 Furthermore, the "voronota" *mode* generates per-residue scores that
283 are inverted when compared to the classical implementation
284 (0.0: bad, 1.0 good).
286 :param model: The model structure.
287 :type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
288 :param reference: The reference structure
289 :type reference: :class:`~ost.mol.EntityView` or
290 :class:`~ost.mol.EntityHandle`
291 :param mode: What CAD score implementation to use, must be one in
292 ["classic", "voronota"]
293 :param label: Local CAD scores will be mapped on residues of model as
294 float property with this name
295 :type label: :class:`str`
296 :param old_regime: Only has an effect if *mode* is "voronota". If set to true,
297 the discrepancies between the two modes is minimized but
298 the behaviour of inverted scores persists.
299 :type old_regime: :class:`bool`
300 :param cad_bin_path: Path to search for the required executables
301 (["CADscore_calc.bash",
302 "CADscore_read_global_scores.bash",
303 "CADscore_read_local_scores.bash"] for "classic" *mode*
304 or ["voronota-cadscore"] for "voronota" *mode*). If not
305 set, the env path is searched.
306 :type cad_bin_path: :class:`str`
307 :returns: The result of the CAD score calculation
308 :rtype: :class:`CADResult`
310 :raises: :class:`~ost.settings.FileNotFound` if any of the CAD score
311 executables could not be located.
312 :raises: :class:`RuntimeError` if the calculation failed
314 if mode ==
"classic" and _HasInsertionCodes(model, reference):
315 raise RuntimeError(
"The classic CAD score implementation does not support "
316 "insertion codes in residues")
317 tmp_dir_name=_SetupFiles(model, reference)
318 result=_RunCAD(tmp_dir_name, mode, cad_bin_path, old_regime)
319 _CleanupFiles(tmp_dir_name)
320 _MapLabels(model, result, label)