OpenStructure
Loading...
Searching...
No Matches
dockq.py
Go to the documentation of this file.
1from ost import geom
2from ost import mol
3from ost import seq
4
5def _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
6 ch1_aln = None, ch2_aln = None):
7 """ Preprocesses *mdl* and *ref*
8
9 Sets int properties to each residue in mdl_ch1, mdl_ch2 as well as
10 the respective reference chains.
11 dockq_mapped: 1 if a residues in mdl_ch1 is mapped to a residue in ref_ch1
12 and vice versa, 0 otherwise. Same is done for mdl_ch2 and
13 ref_ch2.
14 dockq_idx: If a pair of residue is mapped, the same index will be set
15 to both residues. The index is unique otherwise.
16
17 By default, mapping happens with residue numbers but you can enforce a
18 mapping with an alignment. In the example of ch1_aln, the first sequence
19 corresponds to the ATOMSEQ of ref_ch1 in ref and the second sequence to
20 the ATOMSEQ of mdl_ch1 in mdl.
21 """
22
23 # set default values for dockq_mapped and dockq_idx properties
24 # => makes sure that we have a clean slate if stuff has been set in
25 # previous runs
26 for cname in [ref_ch1, ref_ch2]:
27 ch = ref.FindChain(cname)
28 for r in ch.residues:
29 r.SetIntProp("dockq_mapped", 0)
30 r.SetIntProp("dockq_idx", -1)
31 for cname in [mdl_ch1, mdl_ch2]:
32 ch = mdl.FindChain(cname)
33 for r in ch.residues:
34 r.SetIntProp("dockq_mapped", 0)
35 r.SetIntProp("dockq_idx", -1)
36
37 dockq_idx = 0
38 if ch1_aln is not None and ch2_aln is not None:
39 # there are potentially already views attached to the alns but
40 # we go for *mdl* and *ref* here
41 if ch1_aln.GetCount() != 2 or ch2_aln.GetCount() != 2:
42 raise RuntimeError("Expect exactly two sequences in alns provided "
43 "to DockQ!")
44 tmp = ch1_aln.GetSequence(0)
45 ref_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
46 ref_s1.SetOffset(tmp.GetOffset())
47 ref_s1.AttachView(ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)}"))
48 tmp = ch1_aln.GetSequence(1)
49 mdl_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
50 mdl_s1.SetOffset(tmp.GetOffset())
51 mdl_s1.AttachView(mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)}"))
52 new_ch1_aln = seq.CreateAlignment(ref_s1, mdl_s1)
53 for col in new_ch1_aln:
54 if col[0] != '-' and col[1] != '-':
55 ref_r = col.GetResidue(0)
56 mdl_r = col.GetResidue(1)
57 if not (ref_r.IsValid() and ref_r.one_letter_code == col[0]):
58 raise RuntimeError("DockQ: mismatch between provided "
59 "alignments and ATOMSEQ in structures")
60 if not (mdl_r.IsValid() and mdl_r.one_letter_code == col[1]):
61 raise RuntimeError("DockQ: mismatch between provided "
62 "alignments and ATOMSEQ in structures")
63 ref_r.SetIntProp("dockq_idx", dockq_idx)
64 mdl_r.SetIntProp("dockq_idx", dockq_idx)
65 ref_r.SetIntProp("dockq_mapped", 1)
66 mdl_r.SetIntProp("dockq_mapped", 1)
67 dockq_idx += 1
68
69 tmp = ch2_aln.GetSequence(0)
70 ref_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
71 ref_s2.SetOffset(tmp.GetOffset())
72 ref_s2.AttachView(ref.Select(f"cname={mol.QueryQuoteName(ref_ch2)}"))
73 tmp = ch2_aln.GetSequence(1)
74 mdl_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
75 mdl_s2.SetOffset(tmp.GetOffset())
76 mdl_s2.AttachView(mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch2)}"))
77 new_ch2_aln = seq.CreateAlignment(ref_s2, mdl_s2)
78 for col in new_ch2_aln:
79 if col[0] != '-' and col[1] != '-':
80 ref_r = col.GetResidue(0)
81 mdl_r = col.GetResidue(1)
82 if not (ref_r.IsValid() and ref_r.one_letter_code == col[0]):
83 raise RuntimeError("DockQ: mismatch between provided "
84 "alignments and ATOMSEQ in structures")
85 if not (mdl_r.IsValid() and mdl_r.one_letter_code == col[1]):
86 raise RuntimeError("DockQ: mismatch between provided "
87 "alignments and ATOMSEQ in structures")
88 ref_r.SetIntProp("dockq_idx", dockq_idx)
89 mdl_r.SetIntProp("dockq_idx", dockq_idx)
90 ref_r.SetIntProp("dockq_mapped", 1)
91 mdl_r.SetIntProp("dockq_mapped", 1)
92 dockq_idx += 1
93 else:
94 # go by residue numbers
95 for mdl_r in mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)}").residues:
96 ref_r = ref.FindResidue(ref_ch1, mdl_r.GetNumber())
97 if ref_r.IsValid():
98 ref_r.SetIntProp("dockq_idx", dockq_idx)
99 mdl_r.SetIntProp("dockq_idx", dockq_idx)
100 ref_r.SetIntProp("dockq_mapped", 1)
101 mdl_r.SetIntProp("dockq_mapped", 1)
102 dockq_idx += 1
103 for mdl_r in mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch2)}").residues:
104 ref_r = ref.FindResidue(ref_ch2, mdl_r.GetNumber())
105 if ref_r.IsValid():
106 ref_r.SetIntProp("dockq_idx", dockq_idx)
107 mdl_r.SetIntProp("dockq_idx", dockq_idx)
108 ref_r.SetIntProp("dockq_mapped", 1)
109 mdl_r.SetIntProp("dockq_mapped", 1)
110 dockq_idx += 1
111
112 # set unique dockq_idx property for all residues that are still -1
113 for cname in [ref_ch1, ref_ch2]:
114 ch = ref.FindChain(cname)
115 for r in ch.residues:
116 if r.GetIntProp("dockq_idx") == -1:
117 r.SetIntProp("dockq_idx", dockq_idx)
118 dockq_idx += 1
119 for cname in [mdl_ch1, mdl_ch2]:
120 ch = mdl.FindChain(cname)
121 for r in ch.residues:
122 if r.GetIntProp("dockq_idx") == -1:
123 r.SetIntProp("dockq_idx", dockq_idx)
124 dockq_idx += 1
125
126def _GetContacts(ent, ch1, ch2, dist_thresh):
127 int1 = ent.Select(f"cname={mol.QueryQuoteName(ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch2)}]")
128 int2 = ent.Select(f"cname={mol.QueryQuoteName(ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch1)}]")
129 contacts = set()
130 int1_p = [geom.Vec3List([a.pos for a in r.atoms]) for r in int1.residues]
131 int2_p = [geom.Vec3List([a.pos for a in r.atoms]) for r in int2.residues]
132 for r1, p1 in zip(int1.residues, int1_p):
133 for r2, p2 in zip(int2.residues, int2_p):
134 if p1.IsWithin(p2, dist_thresh):
135 contacts.add((r1.GetIntProp("dockq_idx"),
136 r2.GetIntProp("dockq_idx")))
137 return contacts
138
139def _ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
140 dist_thresh=5.0):
141 ref_contacts = _GetContacts(ref, ref_ch1, ref_ch2, dist_thresh)
142 mdl_contacts = _GetContacts(mdl, mdl_ch1, mdl_ch2, dist_thresh)
143
144 nnat = len(ref_contacts)
145 nmdl = len(mdl_contacts)
146
147 fnat = len(ref_contacts.intersection(mdl_contacts))
148 if nnat > 0:
149 fnat /= nnat
150
151 fnonnat = len(mdl_contacts.difference(ref_contacts))
152 if len(mdl_contacts) > 0:
153 fnonnat /= len(mdl_contacts)
154
155 return (nnat, nmdl, fnat, fnonnat)
156
157def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0,
158 cb_mode=False):
159
160 # backbone atoms used for superposition
161 sup_atoms = ["CA","C","N","O",
162 "P", "OP1", "OP2", "O2'", "O3'", "O4'", "O5'", "C1'", "C2'",
163 "C3'", "C4'", "C5'"]
164
165 # make mapped residues accessible by the dockq_idx property
166 mapped_mdl = mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)},{mol.QueryQuoteName(mdl_ch2)} and grdockq_mapped=1")
167 mapped_ref = ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)},{mol.QueryQuoteName(ref_ch2)} and grdockq_mapped=1")
168 ch = mapped_mdl.FindChain(mdl_ch1)
169 mdl_ch1_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
170 ch = mapped_mdl.FindChain(mdl_ch2)
171 mdl_ch2_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
172 ch = mapped_ref.FindChain(ref_ch1)
173 ref_ch1_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
174 ch = mapped_ref.FindChain(ref_ch2)
175 ref_ch2_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
176
177 # iRMSD
178
179 i_ref = ref
180 if cb_mode:
181 i_ref = ref.Select("aname=CB or (rname=GLY and aname=CA)")
182 int1 = i_ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch2)}]")
183 int2 = i_ref.Select(f"cname={mol.QueryQuoteName(ref_ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch1)}]")
184 ref_pos = geom.Vec3List()
185 mdl_pos = geom.Vec3List()
186 for r in int1.residues:
187 idx = r.GetIntProp("dockq_idx")
188 if idx in ref_ch1_residues and idx in mdl_ch1_residues:
189 ref_r = ref_ch1_residues[idx]
190 mdl_r = mdl_ch1_residues[idx]
191 for aname in sup_atoms:
192 ref_a = ref_r.FindAtom(aname)
193 mdl_a = mdl_r.FindAtom(aname)
194 if ref_a.IsValid() and mdl_a.IsValid():
195 ref_pos.append(ref_a.pos)
196 mdl_pos.append(mdl_a.pos)
197 for r in int2.residues:
198 idx = r.GetIntProp("dockq_idx")
199 if idx in ref_ch2_residues and idx in mdl_ch2_residues:
200 ref_r = ref_ch2_residues[idx]
201 mdl_r = mdl_ch2_residues[idx]
202 for aname in sup_atoms:
203 ref_a = ref_r.FindAtom(aname)
204 mdl_a = mdl_r.FindAtom(aname)
205 if ref_a.IsValid() and mdl_a.IsValid():
206 ref_pos.append(ref_a.pos)
207 mdl_pos.append(mdl_a.pos)
208
209 if len(mdl_pos) >= 3:
210 sup_result = mol.alg.SuperposeSVD(mdl_pos, ref_pos)
211 irmsd = sup_result.rmsd
212 else:
213 irmsd = 0.0
214
215 # lRMSD
216
218 n_ch1 = len(ref.FindChain(ref_ch1).residues)
219 n_ch2 = len(ref.FindChain(ref_ch2).residues)
220 if n_ch1 >= n_ch2:
221 ref_receptor_residues = ref_ch1_residues.values()
222 ref_ligand_residues = ref_ch2_residues.values()
223 mdl_receptor_residues = \
224 [mdl_ch1_residues[idx] for idx in ref_ch1_residues.keys()]
225 mdl_ligand_residues = \
226 [mdl_ch2_residues[idx] for idx in ref_ch2_residues.keys()]
227 else:
228 ref_receptor_residues = ref_ch2_residues.values()
229 ref_ligand_residues = ref_ch1_residues.values()
230 mdl_receptor_residues = \
231 [mdl_ch2_residues[idx] for idx in ref_ch2_residues.keys()]
232 mdl_ligand_residues = \
233 [mdl_ch1_residues[idx] for idx in ref_ch1_residues.keys()]
234 ref_receptor_positions = geom.Vec3List()
235 mdl_receptor_positions = geom.Vec3List()
236 ref_ligand_positions = geom.Vec3List()
237 mdl_ligand_positions = geom.Vec3List()
238 for ref_r, mdl_r in zip(ref_receptor_residues, mdl_receptor_residues):
239 for aname in sup_atoms:
240 ref_a = ref_r.FindAtom(aname)
241 mdl_a = mdl_r.FindAtom(aname)
242 if ref_a.IsValid() and mdl_a.IsValid():
243 ref_receptor_positions.append(ref_a.pos)
244 mdl_receptor_positions.append(mdl_a.pos)
245 for ref_r, mdl_r in zip(ref_ligand_residues, mdl_ligand_residues):
246 for aname in sup_atoms:
247 ref_a = ref_r.FindAtom(aname)
248 mdl_a = mdl_r.FindAtom(aname)
249 if ref_a.IsValid() and mdl_a.IsValid():
250 ref_ligand_positions.append(ref_a.pos)
251 mdl_ligand_positions.append(mdl_a.pos)
252
253 if len(mdl_receptor_positions) >= 3:
254 sup_result = mol.alg.SuperposeSVD(mdl_receptor_positions,
255 ref_receptor_positions)
256 mdl_ligand_positions.ApplyTransform(sup_result.transformation)
257 lrmsd = mdl_ligand_positions.GetRMSD(ref_ligand_positions)
258 else:
259 lrmsd = 0.0
260
261 return (irmsd, lrmsd)
262
263def _ScaleRMSD(rmsd, d):
264 return 1.0/(1+(rmsd/d)**2)
265
266def _DockQ(fnat, lrmsd, irmsd, d1, d2):
267 """ The final number chrunching as described in the DockQ manuscript
268 """
269 return (fnat + _ScaleRMSD(lrmsd, d1) + _ScaleRMSD(irmsd, d2))/3
270
271def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
272 ch1_aln=None, ch2_aln=None, contact_dist_thresh = 5.0,
273 interface_dist_thresh = 10.0, interface_cb = False):
274 """ Computes DockQ for specified interface
275
276 DockQ is described in: Sankar Basu and Bjoern Wallner (2016), "DockQ: A
277 Quality Measure for Protein-Protein Docking Models", PLOS one
278
279 Residues are mapped based on residue numbers by default. If you provide
280 *ch1_aln* and *ch2_aln* you can enforce an arbitrary mapping.
281
282 :param mdl: Model structure
283 :type mdl: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
284 :param ref: Reference structure, i.e. native structure
285 :type ref: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
286 :param mdl_ch1: Specifies chain in model constituting first part of
287 interface
288 :type mdl_ch1: :class:`str`
289 :param mdl_ch2: Specifies chain in model constituting second part of
290 interface
291 :type mdl_ch2: :class:`str`
292 :param ref_ch1: ref equivalent of mdl_ch1
293 :type ref_ch1: :class:`str`
294 :param ref_ch2: ref equivalent of mdl_ch2
295 :type ref_ch2: :class:`str`
296 :param ch1_aln: Alignment with two sequences to map *ref_ch1* and *mdl_ch1*.
297 The first sequence must match the sequence in *ref_ch1* and
298 the second to *mdl_ch1*.
299 :type ch1_aln: :class:`ost.seq.AlignmentHandle`
300 :param ch2_aln: Alignment with two sequences to map *ref_ch2* and *mdl_ch2*.
301 The first sequence must match the sequence in *ref_ch2* and
302 the second to *mdl_ch2*.
303 :type ch2_aln: :class:`ost.seq.AlignmentHandle`
304 :param contact_dist_thresh: Residues with any atom within this threshold
305 are considered to be in contact. Affects contact
306 based scores fnat and fnonnat. CAPRI suggests
307 to lower the default of 5.0 to 4.0 for
308 protein-peptide interactions.
309 :type contact_dist_thresh: :class:`float`
310 :param interface_dist_thresh: Residues with any atom within this threshold
311 to another chain are considered interface
312 residues. Affects irmsd. CAPRI suggests to
313 lower the default of 10.0 to 8.0 in
314 combination with interface_cb=True for
315 protein-peptide interactions.
316 :type interface_dist_thresh: :class:`float`
317 :param interface_cb: Only use CB atoms (CA for GLY) to identify interface
318 residues for irmsd. CAPRI suggests to enable this
319 flag in combination with lowering
320 *interface_dist_thresh* to 8.0 for protein-peptide
321 interactions.
322 :type interface_cb: :class:`bool`
323 :returns: :class:`dict` with keys nnat, nmdl, fnat, fnonnat, irmsd, lrmsd,
324 DockQ which corresponds to the equivalent values in the original
325 DockQ implementation.
326 """
327 _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
328 ch1_aln = ch1_aln, ch2_aln = ch2_aln)
329 nnat, nmdl, fnat, fnonnat = _ContactScores(mdl, ref, mdl_ch1, mdl_ch2,
330 ref_ch1, ref_ch2,
331 dist_thresh = contact_dist_thresh)
332 irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
333 dist_thresh = interface_dist_thresh,
334 cb_mode = interface_cb)
335 return {"nnat": nnat,
336 "nmdl": nmdl,
337 "fnat": fnat,
338 "fnonnat": fnonnat,
339 "irmsd": round(irmsd, 3),
340 "lrmsd": round(lrmsd, 3),
341 "DockQ": round(_DockQ(fnat, lrmsd, irmsd, 8.5, 1.5), 3)}
_RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0, cb_mode=False)
Definition dockq.py:158
_DockQ(fnat, lrmsd, irmsd, d1, d2)
Definition dockq.py:266
_ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=5.0)
Definition dockq.py:140
DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, ch1_aln=None, ch2_aln=None, contact_dist_thresh=5.0, interface_dist_thresh=10.0, interface_cb=False)
Definition dockq.py:273
_ScaleRMSD(rmsd, d)
Definition dockq.py:263
_GetContacts(ent, ch1, ch2, dist_thresh)
Definition dockq.py:126
_PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, ch1_aln=None, ch2_aln=None)
Definition dockq.py:6