OpenStructure
dockq.py
Go to the documentation of this file.
1 from ost import geom
2 from ost import mol
3 from ost import seq
4 
5 def _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
6  ch1_aln = None, ch2_aln = None):
7  """ Preprocesses *mdl* and *ref*
8 
9  Sets int properties to each residue in mdl_ch1, mdl_ch2 as well as
10  the respective reference chains.
11  dockq_mapped: 1 if a residues in mdl_ch1 is mapped to a residue in ref_ch1
12  and vice versa, 0 otherwise. Same is done for mdl_ch2 and
13  ref_ch2.
14  dockq_idx: If a pair of residue is mapped, the same index will be set
15  to both residues. The index is unique otherwise.
16 
17  By default, mapping happens with residue numbers but you can enforce a
18  mapping with an alignment. In the example of ch1_aln, the first sequence
19  corresponds to the ATOMSEQ of ref_ch1 in ref and the second sequence to
20  the ATOMSEQ of mdl_ch1 in mdl.
21  """
22 
23  # set default values for dockq_mapped and dockq_idx properties
24  # => makes sure that we have a clean slate if stuff has been set in
25  # previous runs
26  for cname in [ref_ch1, ref_ch2]:
27  ch = ref.FindChain(cname)
28  for r in ch.residues:
29  r.SetIntProp("dockq_mapped", 0)
30  r.SetIntProp("dockq_idx", -1)
31  for cname in [mdl_ch1, mdl_ch2]:
32  ch = mdl.FindChain(cname)
33  for r in ch.residues:
34  r.SetIntProp("dockq_mapped", 0)
35  r.SetIntProp("dockq_idx", -1)
36 
37  dockq_idx = 0
38  if ch1_aln is not None and ch2_aln is not None:
39  # there are potentially already views attached to the alns but
40  # we go for *mdl* and *ref* here
41  if ch1_aln.GetCount() != 2 or ch2_aln.GetCount() != 2:
42  raise RuntimeError("Expect exactly two sequences in alns provided "
43  "to DockQ!")
44  tmp = ch1_aln.GetSequence(0)
45  ref_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
46  ref_s1.SetOffset(tmp.GetOffset())
47  ref_s1.AttachView(ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)}"))
48  tmp = ch1_aln.GetSequence(1)
49  mdl_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
50  mdl_s1.SetOffset(tmp.GetOffset())
51  mdl_s1.AttachView(mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)}"))
52  new_ch1_aln = seq.CreateAlignment(ref_s1, mdl_s1)
53  for col in new_ch1_aln:
54  if col[0] != '-' and col[1] != '-':
55  ref_r = col.GetResidue(0)
56  mdl_r = col.GetResidue(1)
57  if not (ref_r.IsValid() and ref_r.one_letter_code == col[0]):
58  raise RuntimeError("DockQ: mismatch between provided "
59  "alignments and ATOMSEQ in structures")
60  if not (mdl_r.IsValid() and mdl_r.one_letter_code == col[1]):
61  raise RuntimeError("DockQ: mismatch between provided "
62  "alignments and ATOMSEQ in structures")
63  ref_r.SetIntProp("dockq_idx", dockq_idx)
64  mdl_r.SetIntProp("dockq_idx", dockq_idx)
65  ref_r.SetIntProp("dockq_mapped", 1)
66  mdl_r.SetIntProp("dockq_mapped", 1)
67  dockq_idx += 1
68 
69  tmp = ch2_aln.GetSequence(0)
70  ref_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
71  ref_s2.SetOffset(tmp.GetOffset())
72  ref_s2.AttachView(ref.Select(f"cname={mol.QueryQuoteName(ref_ch2)}"))
73  tmp = ch2_aln.GetSequence(1)
74  mdl_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
75  mdl_s2.SetOffset(tmp.GetOffset())
76  mdl_s2.AttachView(mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch2)}"))
77  new_ch2_aln = seq.CreateAlignment(ref_s2, mdl_s2)
78  for col in new_ch2_aln:
79  if col[0] != '-' and col[1] != '-':
80  ref_r = col.GetResidue(0)
81  mdl_r = col.GetResidue(1)
82  if not (ref_r.IsValid() and ref_r.one_letter_code == col[0]):
83  raise RuntimeError("DockQ: mismatch between provided "
84  "alignments and ATOMSEQ in structures")
85  if not (mdl_r.IsValid() and mdl_r.one_letter_code == col[1]):
86  raise RuntimeError("DockQ: mismatch between provided "
87  "alignments and ATOMSEQ in structures")
88  ref_r.SetIntProp("dockq_idx", dockq_idx)
89  mdl_r.SetIntProp("dockq_idx", dockq_idx)
90  ref_r.SetIntProp("dockq_mapped", 1)
91  mdl_r.SetIntProp("dockq_mapped", 1)
92  dockq_idx += 1
93  else:
94  # go by residue numbers
95  for mdl_r in mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)}").residues:
96  ref_r = ref.FindResidue(ref_ch1, mdl_r.GetNumber())
97  if ref_r.IsValid():
98  ref_r.SetIntProp("dockq_idx", dockq_idx)
99  mdl_r.SetIntProp("dockq_idx", dockq_idx)
100  ref_r.SetIntProp("dockq_mapped", 1)
101  mdl_r.SetIntProp("dockq_mapped", 1)
102  dockq_idx += 1
103  for mdl_r in mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch2)}").residues:
104  ref_r = ref.FindResidue(ref_ch2, mdl_r.GetNumber())
105  if ref_r.IsValid():
106  ref_r.SetIntProp("dockq_idx", dockq_idx)
107  mdl_r.SetIntProp("dockq_idx", dockq_idx)
108  ref_r.SetIntProp("dockq_mapped", 1)
109  mdl_r.SetIntProp("dockq_mapped", 1)
110  dockq_idx += 1
111 
112  # set unique dockq_idx property for all residues that are still -1
113  for cname in [ref_ch1, ref_ch2]:
114  ch = ref.FindChain(cname)
115  for r in ch.residues:
116  if r.GetIntProp("dockq_idx") == -1:
117  r.SetIntProp("dockq_idx", dockq_idx)
118  dockq_idx += 1
119  for cname in [mdl_ch1, mdl_ch2]:
120  ch = mdl.FindChain(cname)
121  for r in ch.residues:
122  if r.GetIntProp("dockq_idx") == -1:
123  r.SetIntProp("dockq_idx", dockq_idx)
124  dockq_idx += 1
125 
126 def _GetContacts(ent, ch1, ch2, dist_thresh):
127  int1 = ent.Select(f"cname={mol.QueryQuoteName(ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch2)}]")
128  int2 = ent.Select(f"cname={mol.QueryQuoteName(ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch1)}]")
129  contacts = set()
130  int1_p = [geom.Vec3List([a.pos for a in r.atoms]) for r in int1.residues]
131  int2_p = [geom.Vec3List([a.pos for a in r.atoms]) for r in int2.residues]
132  for r1, p1 in zip(int1.residues, int1_p):
133  for r2, p2 in zip(int2.residues, int2_p):
134  if p1.IsWithin(p2, dist_thresh):
135  contacts.add((r1.GetIntProp("dockq_idx"),
136  r2.GetIntProp("dockq_idx")))
137  return contacts
138 
139 def _ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
140  dist_thresh=5.0):
141  ref_contacts = _GetContacts(ref, ref_ch1, ref_ch2, dist_thresh)
142  mdl_contacts = _GetContacts(mdl, mdl_ch1, mdl_ch2, dist_thresh)
143 
144  nnat = len(ref_contacts)
145  nmdl = len(mdl_contacts)
146 
147  fnat = len(ref_contacts.intersection(mdl_contacts))
148  if nnat > 0:
149  fnat /= nnat
150 
151  fnonnat = len(mdl_contacts.difference(ref_contacts))
152  if len(mdl_contacts) > 0:
153  fnonnat /= len(mdl_contacts)
154 
155  return (nnat, nmdl, fnat, fnonnat)
156 
157 def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0,
158  cb_mode=False):
159 
160  # backbone atoms used for superposition
161  sup_atoms = ["CA","C","N","O",
162  "P", "OP1", "OP2", "O2'", "O3'", "O4'", "O5'", "C1'", "C2'",
163  "C3'", "C4'", "C5'"]
164 
165  # make mapped residues accessible by the dockq_idx property
166  mapped_mdl = mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)},{mol.QueryQuoteName(mdl_ch2)} and grdockq_mapped=1")
167  mapped_ref = ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)},{mol.QueryQuoteName(ref_ch2)} and grdockq_mapped=1")
168  ch = mapped_mdl.FindChain(mdl_ch1)
169  mdl_ch1_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
170  ch = mapped_mdl.FindChain(mdl_ch2)
171  mdl_ch2_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
172  ch = mapped_ref.FindChain(ref_ch1)
173  ref_ch1_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
174  ch = mapped_ref.FindChain(ref_ch2)
175  ref_ch2_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
176 
177  # iRMSD
178 
179  i_ref = ref
180  if cb_mode:
181  i_ref = ref.Select("aname=CB or (rname=GLY and aname=CA)")
182  int1 = i_ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch2)}]")
183  int2 = i_ref.Select(f"cname={mol.QueryQuoteName(ref_ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch1)}]")
184  ref_pos = geom.Vec3List()
185  mdl_pos = geom.Vec3List()
186  for r in int1.residues:
187  idx = r.GetIntProp("dockq_idx")
188  if idx in ref_ch1_residues and idx in mdl_ch1_residues:
189  ref_r = ref_ch1_residues[idx]
190  mdl_r = mdl_ch1_residues[idx]
191  for aname in sup_atoms:
192  ref_a = ref_r.FindAtom(aname)
193  mdl_a = mdl_r.FindAtom(aname)
194  if ref_a.IsValid() and mdl_a.IsValid():
195  ref_pos.append(ref_a.pos)
196  mdl_pos.append(mdl_a.pos)
197  for r in int2.residues:
198  idx = r.GetIntProp("dockq_idx")
199  if idx in ref_ch2_residues and idx in mdl_ch2_residues:
200  ref_r = ref_ch2_residues[idx]
201  mdl_r = mdl_ch2_residues[idx]
202  for aname in sup_atoms:
203  ref_a = ref_r.FindAtom(aname)
204  mdl_a = mdl_r.FindAtom(aname)
205  if ref_a.IsValid() and mdl_a.IsValid():
206  ref_pos.append(ref_a.pos)
207  mdl_pos.append(mdl_a.pos)
208 
209  if len(mdl_pos) >= 3:
210  sup_result = mol.alg.SuperposeSVD(mdl_pos, ref_pos)
211  irmsd = sup_result.rmsd
212  else:
213  irmsd = 0.0
214 
215  # lRMSD
216 
218  n_ch1 = len(ref.FindChain(ref_ch1).residues)
219  n_ch2 = len(ref.FindChain(ref_ch2).residues)
220  if n_ch1 >= n_ch2:
221  ref_receptor_residues = ref_ch1_residues.values()
222  ref_ligand_residues = ref_ch2_residues.values()
223  mdl_receptor_residues = \
224  [mdl_ch1_residues[idx] for idx in ref_ch1_residues.keys()]
225  mdl_ligand_residues = \
226  [mdl_ch2_residues[idx] for idx in ref_ch2_residues.keys()]
227  else:
228  ref_receptor_residues = ref_ch2_residues.values()
229  ref_ligand_residues = ref_ch1_residues.values()
230  mdl_receptor_residues = \
231  [mdl_ch2_residues[idx] for idx in ref_ch2_residues.keys()]
232  mdl_ligand_residues = \
233  [mdl_ch1_residues[idx] for idx in ref_ch1_residues.keys()]
234  ref_receptor_positions = geom.Vec3List()
235  mdl_receptor_positions = geom.Vec3List()
236  ref_ligand_positions = geom.Vec3List()
237  mdl_ligand_positions = geom.Vec3List()
238  for ref_r, mdl_r in zip(ref_receptor_residues, mdl_receptor_residues):
239  for aname in sup_atoms:
240  ref_a = ref_r.FindAtom(aname)
241  mdl_a = mdl_r.FindAtom(aname)
242  if ref_a.IsValid() and mdl_a.IsValid():
243  ref_receptor_positions.append(ref_a.pos)
244  mdl_receptor_positions.append(mdl_a.pos)
245  for ref_r, mdl_r in zip(ref_ligand_residues, mdl_ligand_residues):
246  for aname in sup_atoms:
247  ref_a = ref_r.FindAtom(aname)
248  mdl_a = mdl_r.FindAtom(aname)
249  if ref_a.IsValid() and mdl_a.IsValid():
250  ref_ligand_positions.append(ref_a.pos)
251  mdl_ligand_positions.append(mdl_a.pos)
252 
253  if len(mdl_receptor_positions) >= 3:
254  sup_result = mol.alg.SuperposeSVD(mdl_receptor_positions,
255  ref_receptor_positions)
256  mdl_ligand_positions.ApplyTransform(sup_result.transformation)
257  lrmsd = mdl_ligand_positions.GetRMSD(ref_ligand_positions)
258  else:
259  lrmsd = 0.0
260 
261  return (irmsd, lrmsd)
262 
263 def _ScaleRMSD(rmsd, d):
264  return 1.0/(1+(rmsd/d)**2)
265 
266 def _DockQ(fnat, lrmsd, irmsd, d1, d2):
267  """ The final number chrunching as described in the DockQ manuscript
268  """
269  return (fnat + _ScaleRMSD(lrmsd, d1) + _ScaleRMSD(irmsd, d2))/3
270 
271 def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
272  ch1_aln=None, ch2_aln=None, contact_dist_thresh = 5.0,
273  interface_dist_thresh = 10.0, interface_cb = False):
274  """ Computes DockQ for specified interface
275 
276  DockQ is described in: Sankar Basu and Bjoern Wallner (2016), "DockQ: A
277  Quality Measure for Protein-Protein Docking Models", PLOS one
278 
279  Residues are mapped based on residue numbers by default. If you provide
280  *ch1_aln* and *ch2_aln* you can enforce an arbitrary mapping.
281 
282  :param mdl: Model structure
283  :type mdl: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
284  :param ref: Reference structure, i.e. native structure
285  :type ref: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
286  :param mdl_ch1: Specifies chain in model constituting first part of
287  interface
288  :type mdl_ch1: :class:`str`
289  :param mdl_ch2: Specifies chain in model constituting second part of
290  interface
291  :type mdl_ch2: :class:`str`
292  :param ref_ch1: ref equivalent of mdl_ch1
293  :type ref_ch1: :class:`str`
294  :param ref_ch2: ref equivalent of mdl_ch2
295  :type ref_ch2: :class:`str`
296  :param ch1_aln: Alignment with two sequences to map *ref_ch1* and *mdl_ch1*.
297  The first sequence must match the sequence in *ref_ch1* and
298  the second to *mdl_ch1*.
299  :type ch1_aln: :class:`ost.seq.AlignmentHandle`
300  :param ch2_aln: Alignment with two sequences to map *ref_ch2* and *mdl_ch2*.
301  The first sequence must match the sequence in *ref_ch2* and
302  the second to *mdl_ch2*.
303  :type ch2_aln: :class:`ost.seq.AlignmentHandle`
304  :param contact_dist_thresh: Residues with any atom within this threshold
305  are considered to be in contact. Affects contact
306  based scores fnat and fnonnat. CAPRI suggests
307  to lower the default of 5.0 to 4.0 for
308  protein-peptide interactions.
309  :type contact_dist_thresh: :class:`float`
310  :param interface_dist_thresh: Residues with any atom within this threshold
311  to another chain are considered interface
312  residues. Affects irmsd. CAPRI suggests to
313  lower the default of 10.0 to 8.0 in
314  combination with interface_cb=True for
315  protein-peptide interactions.
316  :type interface_dist_thresh: :class:`float`
317  :param interface_cb: Only use CB atoms (CA for GLY) to identify interface
318  residues for irmsd. CAPRI suggests to enable this
319  flag in combination with lowering
320  *interface_dist_thresh* to 8.0 for protein-peptide
321  interactions.
322  :type interface_cb: :class:`bool`
323  :returns: :class:`dict` with keys nnat, nmdl, fnat, fnonnat, irmsd, lrmsd,
324  DockQ which corresponds to the equivalent values in the original
325  DockQ implementation.
326  """
327  _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
328  ch1_aln = ch1_aln, ch2_aln = ch2_aln)
329  nnat, nmdl, fnat, fnonnat = _ContactScores(mdl, ref, mdl_ch1, mdl_ch2,
330  ref_ch1, ref_ch2,
331  dist_thresh = contact_dist_thresh)
332  irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
333  dist_thresh = interface_dist_thresh,
334  cb_mode = interface_cb)
335  return {"nnat": nnat,
336  "nmdl": nmdl,
337  "fnat": fnat,
338  "fnonnat": fnonnat,
339  "irmsd": round(irmsd, 3),
340  "lrmsd": round(lrmsd, 3),
341  "DockQ": round(_DockQ(fnat, lrmsd, irmsd, 8.5, 1.5), 3)}
def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, ch1_aln=None, ch2_aln=None, contact_dist_thresh=5.0, interface_dist_thresh=10.0, interface_cb=False)
Definition: dockq.py:273