OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
cadscore.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2020 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 """
20 Wrapper for the CAD score.
21 
22 References:
23 
24 Olechnovic K, Kulberkyte E, Venclovas C., CAD-score: A new contact area
25 difference-based function for evaluation of protein structural models
26 Proteins. 2012 Aug 30. [Epub ahead of print]
27 
28 Authors: Valerio Mariani, Alessandro Barbato
29 """
30 
31 import subprocess, os, tempfile, platform, re
32 from ost import settings, io, mol
33 
34 def _SetupFiles(model, reference, chain_mapping):
35 
36  if chain_mapping is not None:
37  model_handle = model
38  if isinstance(model_handle, mol.EntityView):
39  model_handle = mol.CreateEntityFromView(model_handle, False)
40  mapped_model = mol.CreateEntity()
41  ed = mapped_model.EditXCS()
42  for k,v in chain_mapping.items():
43  if v is not None:
44  ed.InsertChain(v, model_handle.FindChain(k), deep=True)
45  model = mapped_model
46 
47  # create temporary directory
48  tmp_dir_name=tempfile.mkdtemp()
49  dia = 'PDB'
50  for chain in model.chains:
51  if chain.name==" ":
52  raise RuntimeError("One of the chains in the model has no name. Cannot "
53  "calculate CAD score")
54  if len(chain.name) > 1:
55  dia = 'CHARMM'
56  break;
57  for res in chain.residues:
58  if len(res.name) > 3:
59  dia = 'CHARMM'
60  break;
61  io.SavePDB(model, os.path.join(tmp_dir_name, 'model.pdb'), dialect=dia)
62  dia = 'PDB'
63  for chain in reference.chains:
64  if chain.name==" ":
65  raise RuntimeError("One of the chains in the reference has no name. "
66  "Cannot calculate CAD score")
67  if len(chain.name) > 1:
68  dia = 'CHARMM'
69  break;
70  for res in chain.residues:
71  if len(res.name) > 3:
72  dia = 'CHARMM'
73  break;
74  io.SavePDB(reference, os.path.join(tmp_dir_name, 'reference.pdb'),dialect=dia)
75 
76  return tmp_dir_name
77 
78 def _CleanupFiles(dir_name):
79  import shutil
80  shutil.rmtree(dir_name)
81 
82 class CADResult:
83  """
84  Holds the result of running CAD
85 
86  .. attribute:: globalAA
87 
88  The global CAD's atom-atom (AA) score
89 
90  .. attribute:: localAA
91 
92  Dictionary containing local CAD's atom-atom (AA) scores.
93 
94  :type: dictionary (key: tuple(chain, resnum) (e.g.:
95  ("A", ost.mol.ResNum(24)), value: CAD local AA score
96  (see CAD Documentation online)
97  """
98  def __init__(self, globalAA, localAA):
99  self.globalAA=globalAA
100  self.localAA=localAA
101 
102 def _ParseCADGlobal(lines):
103  header = lines[0].split()
104  aa_idx = header.index("AA")
105  aa_score=float(lines[1].split()[aa_idx])
106  return aa_score
107 
108 def _ParseCADLocal(lines):
109  local_scores_idx = None
110  for line_idx in range(len(lines)):
111  if "local_scores" in lines[line_idx]:
112  local_scores_idx = line_idx
113  break
114  if local_scores_idx == None:
115  raise RuntimeError("Failed to parse local cadscores")
116  local_aa_dict={}
117  for line_idx in range(local_scores_idx+2, len(lines)):
118  items=lines[line_idx].split()
119  local_aa = float(items[2])
120  if local_aa < 0.0:
121  continue # invalid CAD score
122  key = (items[0], mol.ResNum(int(items[1])))
123  local_aa_dict[key] = local_aa
124 
125  return local_aa_dict
126 
127 def _ParseVoronotaGlobal(lines):
128  return float(lines[0].split()[4])
129 
130 def _ParseVoronotaLocal(lines):
131  local_aa_dict={}
132  chain_name_regex = r'c<.+?>'
133  resnum_regex = r'r<\d+>'
134  insertion_code_regex = r'i<\D>'
135  for line in lines:
136  local_aa = float(line.split()[-1])
137  if local_aa < 0.0:
138  continue # invalid CAD score
139  chain_data = re.findall(chain_name_regex, line)
140  resnum_data = re.findall(resnum_regex, line)
141  insertion_code_data = re.findall(insertion_code_regex, line)
142  resnum = None
143  if len(insertion_code_data) == 0:
144  resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')))
145  else:
146  resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')),
147  insertion_code_data[0][1:].strip('><'))
148  key = (chain_data[0][1:].strip('><'), resnum)
149  local_aa_dict[key] = local_aa
150  return local_aa_dict
151 
152 def _RunCAD(tmp_dir, mode, cad_bin_path, old_regime):
153 
154  model_filename=os.path.join(tmp_dir, 'model.pdb')
155  reference_filename=os.path.join(tmp_dir, 'reference.pdb')
156  globalAA = None
157  localAA = None
158 
159  if platform.system() == "Windows":
160  raise RuntimeError('CAD score not available on Windows')
161 
162  if mode == "classic":
163  cad_calc_path = None
164  cad_read_g_path = None
165  cad_read_l_path = None
166  if cad_bin_path:
167  cad_calc_path = settings.Locate('CADscore_calc.bash',
168  search_paths=[cad_bin_path],
169  search_system_paths=False)
170  cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash',
171  search_paths=[cad_bin_path],
172  search_system_paths=False)
173  cad_read_l_path=settings.Locate('CADscore_read_local_scores.bash',
174  search_paths=[cad_bin_path],
175  search_system_paths=False)
176  # also try to locate the actual executable that is called from the
177  # bash scripts
178  executable_path = settings.Locate('voroprot2',
179  search_paths=[cad_bin_path],
180  search_system_paths=False)
181  else:
182  cad_calc_path = settings.Locate('CADscore_calc.bash')
183  cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash')
184  cad_read_l_path = settings.Locate('CADscore_read_local_scores.bash')
185  # also try to locate the actual executable that is called from the
186  # bash scripts
187  executable_path = settings.Locate('voroprot2')
188  command1="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path,
189  model_filename,
190  reference_filename,
191  os.path.join(tmp_dir,
192  "cadtemp"))
193  command2="\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir,
194  "cadtemp"))
195  command3="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path,
196  model_filename,
197  reference_filename,
198  os.path.join(tmp_dir,
199  "cadtemp"))
200 
201  ps1=subprocess.Popen(command1, shell=True, stdout=subprocess.PIPE)
202  ps1.communicate()
203  ps2=subprocess.Popen(command2, shell=True, stdout=subprocess.PIPE)
204  stdout,_ = ps2.communicate()
205  lines=stdout.decode().splitlines()
206  try:
207  globalAA=_ParseCADGlobal(lines)
208  except:
209  raise RuntimeError("CAD calculation failed")
210  ps3=subprocess.Popen(command3, shell=True, stdout=subprocess.PIPE)
211  stdout,_ = ps3.communicate()
212  lines=stdout.decode().splitlines()
213  try:
214  localAA=_ParseCADLocal(lines)
215  except:
216  raise RuntimeError("CAD calculation failed")
217 
218  elif mode == "voronota":
219  local_score_filename = os.path.join(tmp_dir, "local_scores.txt")
220  voronota_cadscore_path = None
221  if cad_bin_path:
222  voronota_cadscore_path = settings.Locate("voronota-cadscore",
223  search_paths=[cad_bin_path],
224  search_system_paths=False)
225  # also try to locate the actual executable that is called from the
226  # bash script
227  executable_path = settings.Locate("voronota",
228  search_paths=[cad_bin_path],
229  search_system_paths=False)
230  else:
231  voronota_cadscore_path = settings.Locate("voronota-cadscore")
232  # also try to locate the actual executable that is called from the
233  # bash script
234  executable_path = settings.Locate("voronota")
235  cmd = [voronota_cadscore_path, '-m', model_filename, '-t',
236  reference_filename, '--contacts-query-by-code AA',
237  '--output-residue-scores', local_score_filename]
238  if old_regime:
239  cmd.append("--old-regime")
240  cmd = ' '.join(cmd)
241  ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
242  stdout, _ = ps.communicate()
243  try:
244  globalAA = _ParseVoronotaGlobal(stdout.decode().splitlines())
245  except:
246  raise RuntimeError("CAD calculation failed")
247  try:
248  with open(local_score_filename) as f:
249  localAA = _ParseVoronotaLocal(f.readlines())
250  except:
251  raise RuntimeError("CAD calculation failed")
252 
253  else:
254  raise RuntimeError("Invalid CAD mode! Allowed are: "
255  "[\"classic\", \"voronota\"]")
256 
257  return CADResult(globalAA,localAA)
258 
259 def _HasInsertionCodes(model, reference):
260  for r in model.residues:
261  if r.GetNumber().GetInsCode() != "\0":
262  return True
263  for r in reference.residues:
264  if r.GetNumber().GetInsCode() != "\0":
265  return True
266  return False
267 
268 def _MapLabels(model, cad_results, label, chain_mapping):
269 
270  if chain_mapping is None:
271  for k,v in cad_results.localAA.items():
272  r = model.FindResidue(k[0], k[1])
273  if r.IsValid():
274  r.SetFloatProp(label, v)
275  else:
276  # chain_mapping has mdl chains as key and target chains as values
277  # the raw CAD results refer to the target chains => reverse mapping
278  rev_mapping = {v:k for k,v in chain_mapping.items()}
279  for k,v in cad_results.localAA.items():
280  cname = k[0]
281  rnum = k[1]
282  if cname in rev_mapping:
283  r = model.FindResidue(rev_mapping[cname], rnum)
284  if r.IsValid():
285  r.SetFloatProp(label, v)
286 
287 def CADScore(model, reference, mode = "voronota", label = "localcad",
288  old_regime = False, cad_bin_path = None, chain_mapping=None):
289  """
290  Calculates global and local atom-atom (AA) CAD Scores.
291 
292  You can either access the original implementation available from
293  https://bitbucket.org/kliment/cadscore/downloads/
294  or the new implementation which is part of the Voronota package
295  available from https://bitbucket.org/kliment/voronota/downloads/.
296 
297  The scores of the two implementations differ but strongly correlate
298  as the contacts between atoms are estimated differently. When using
299  the "voronota" *mode* you can minimize those discrepancies by
300  setting the *old_regime* flag to True.
301 
302  Furthermore, the "voronota" *mode* generates per-residue scores that
303  are inverted when compared to the classical implementation
304  (0.0: bad, 1.0 good).
305 
306  :param model: The model structure.
307  :type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
308  :param reference: The reference structure
309  :type reference: :class:`~ost.mol.EntityView` or
310  :class:`~ost.mol.EntityHandle`
311  :param mode: What CAD score implementation to use, must be one in
312  ["classic", "voronota"]
313  :param label: Local CAD scores will be mapped on residues of model as
314  float property with this name
315  :type label: :class:`str`
316  :param old_regime: Only has an effect if *mode* is "voronota". If set to true,
317  the discrepancies between the two modes is minimized but
318  the behaviour of inverted scores persists.
319  :type old_regime: :class:`bool`
320  :param cad_bin_path: Path to search for the required executables
321  (["CADscore_calc.bash",
322  "CADscore_read_global_scores.bash",
323  "CADscore_read_local_scores.bash"] for "classic" *mode*
324  or ["voronota-cadscore"] for "voronota" *mode*). If not
325  set, the env path is searched.
326  :type cad_bin_path: :class:`str`
327  :param chain_mapping: Provide custom chain mapping in case of oligomers
328  (only supported for "voronota" *mode*). Provided as
329  :class:`dict` with model chain name as key and target
330  chain name as value. If set, scoring happens on a
331  substructure of model that is stripped to chains with
332  valid mapping.
333  :type chain_mapping: :class:`dict`
334  :returns: The result of the CAD score calculation
335  :rtype: :class:`CADResult`
336 
337  :raises: :class:`~ost.settings.FileNotFound` if any of the CAD score
338  executables could not be located.
339  :raises: :class:`RuntimeError` if the calculation failed
340  """
341  if mode == "classic" and _HasInsertionCodes(model, reference):
342  raise RuntimeError("The classic CAD score implementation does not support "
343  "insertion codes in residues")
344 
345  if chain_mapping is not None:
346  if model == "classic":
347  raise RuntimeError("The classic CAD score implementation does not "
348  "support custom chain mappings")
349 
350  # do consistency checks of custom chain mapping
351  mdl_cnames = [ch.GetName() for ch in model.chains]
352  ref_cnames = [ch.GetName() for ch in reference.chains]
353 
354  # check that each model chain name in the mapping is actually there
355  for cname in chain_mapping.keys():
356  if cname not in mdl_cnames:
357  raise RuntimeError(f"Model chain name \"{cname}\" provided in "
358  f"custom chain mapping is not present in provided "
359  f"model structure.")
360 
361  # check that each target chain name in the mapping is actually there
362  for cname in chain_mapping.values():
363  if cname not in ref_cnames:
364  raise RuntimeError(f"Reference chain name \"{cname}\" provided in "
365  f"custom chain mapping is not present in provided "
366  f"reference structure.")
367 
368  tmp_dir_name=_SetupFiles(model, reference, chain_mapping)
369  result=_RunCAD(tmp_dir_name, mode, cad_bin_path, old_regime)
370  _CleanupFiles(tmp_dir_name)
371  _MapLabels(model, result, label, chain_mapping)
372  return result
definition of EntityView
Definition: entity_view.hh:86