5 def _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
6 ch1_aln = None, ch2_aln = None):
7 """ Preprocesses *mdl* and *ref*
9 Sets int properties to each residue in mdl_ch1, mdl_ch2 as well as
10 the respective reference chains.
11 dockq_mapped: 1 if a residues in mdl_ch1 is mapped to a residue in ref_ch1
12 and vice versa, 0 otherwise. Same is done for mdl_ch2 and
14 dockq_idx: If a pair of residue is mapped, the same index will be set
15 to both residues. The index is unique otherwise.
17 By default, mapping happens with residue numbers but you can enforce a
18 mapping with an alignment. In the example of ch1_aln, the first sequence
19 corresponds to the ATOMSEQ of ref_ch1 in ref and the second sequence to
20 the ATOMSEQ of mdl_ch1 in mdl.
26 for cname
in [ref_ch1, ref_ch2]:
27 ch = ref.FindChain(cname)
29 r.SetIntProp(
"dockq_mapped", 0)
30 r.SetIntProp(
"dockq_idx", -1)
31 for cname
in [mdl_ch1, mdl_ch2]:
32 ch = mdl.FindChain(cname)
34 r.SetIntProp(
"dockq_mapped", 0)
35 r.SetIntProp(
"dockq_idx", -1)
38 if ch1_aln
is not None and ch2_aln
is not None:
41 if ch1_aln.GetCount() != 2
or ch2_aln.GetCount() != 2:
42 raise RuntimeError(
"Expect exactly two sequences in alns provided "
44 tmp = ch1_aln.GetSequence(0)
45 ref_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
46 ref_s1.SetOffset(tmp.GetOffset())
47 ref_s1.AttachView(ref.Select(f
"cname={mol.QueryQuoteName(ref_ch1)}"))
48 tmp = ch1_aln.GetSequence(1)
49 mdl_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
50 mdl_s1.SetOffset(tmp.GetOffset())
51 mdl_s1.AttachView(mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch1)}"))
52 new_ch1_aln = seq.CreateAlignment(ref_s1, mdl_s1)
53 for col
in new_ch1_aln:
54 if col[0] !=
'-' and col[1] !=
'-':
55 ref_r = col.GetResidue(0)
56 mdl_r = col.GetResidue(1)
57 if not (ref_r.IsValid()
and ref_r.one_letter_code == col[0]):
58 raise RuntimeError(
"DockQ: mismatch between provided "
59 "alignments and ATOMSEQ in structures")
60 if not (mdl_r.IsValid()
and mdl_r.one_letter_code == col[1]):
61 raise RuntimeError(
"DockQ: mismatch between provided "
62 "alignments and ATOMSEQ in structures")
63 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
64 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
65 ref_r.SetIntProp(
"dockq_mapped", 1)
66 mdl_r.SetIntProp(
"dockq_mapped", 1)
69 tmp = ch2_aln.GetSequence(0)
70 ref_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
71 ref_s2.SetOffset(tmp.GetOffset())
72 ref_s2.AttachView(ref.Select(f
"cname={mol.QueryQuoteName(ref_ch2)}"))
73 tmp = ch2_aln.GetSequence(1)
74 mdl_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
75 mdl_s2.SetOffset(tmp.GetOffset())
76 mdl_s2.AttachView(mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch2)}"))
77 new_ch2_aln = seq.CreateAlignment(ref_s2, mdl_s2)
78 for col
in new_ch2_aln:
79 if col[0] !=
'-' and col[1] !=
'-':
80 ref_r = col.GetResidue(0)
81 mdl_r = col.GetResidue(1)
82 if not (ref_r.IsValid()
and ref_r.one_letter_code == col[0]):
83 raise RuntimeError(
"DockQ: mismatch between provided "
84 "alignments and ATOMSEQ in structures")
85 if not (mdl_r.IsValid()
and mdl_r.one_letter_code == col[1]):
86 raise RuntimeError(
"DockQ: mismatch between provided "
87 "alignments and ATOMSEQ in structures")
88 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
89 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
90 ref_r.SetIntProp(
"dockq_mapped", 1)
91 mdl_r.SetIntProp(
"dockq_mapped", 1)
95 for mdl_r
in mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch1)}").residues:
96 ref_r = ref.FindResidue(ref_ch1, mdl_r.GetNumber())
98 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
99 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
100 ref_r.SetIntProp(
"dockq_mapped", 1)
101 mdl_r.SetIntProp(
"dockq_mapped", 1)
103 for mdl_r
in mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch2)}").residues:
104 ref_r = ref.FindResidue(ref_ch2, mdl_r.GetNumber())
106 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
107 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
108 ref_r.SetIntProp(
"dockq_mapped", 1)
109 mdl_r.SetIntProp(
"dockq_mapped", 1)
113 for cname
in [ref_ch1, ref_ch2]:
114 ch = ref.FindChain(cname)
115 for r
in ch.residues:
116 if r.GetIntProp(
"dockq_idx") == -1:
117 r.SetIntProp(
"dockq_idx", dockq_idx)
119 for cname
in [mdl_ch1, mdl_ch2]:
120 ch = mdl.FindChain(cname)
121 for r
in ch.residues:
122 if r.GetIntProp(
"dockq_idx") == -1:
123 r.SetIntProp(
"dockq_idx", dockq_idx)
126 def _GetContacts(ent, ch1, ch2, dist_thresh):
127 int1 = ent.Select(f
"cname={mol.QueryQuoteName(ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch2)}]")
128 int2 = ent.Select(f
"cname={mol.QueryQuoteName(ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch1)}]")
130 int1_p = [
geom.Vec3List([a.pos
for a
in r.atoms])
for r
in int1.residues]
131 int2_p = [
geom.Vec3List([a.pos
for a
in r.atoms])
for r
in int2.residues]
132 for r1, p1
in zip(int1.residues, int1_p):
133 for r2, p2
in zip(int2.residues, int2_p):
134 if p1.IsWithin(p2, dist_thresh):
135 contacts.add((r1.GetIntProp(
"dockq_idx"),
136 r2.GetIntProp(
"dockq_idx")))
139 def _ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
141 ref_contacts = _GetContacts(ref, ref_ch1, ref_ch2, dist_thresh)
142 mdl_contacts = _GetContacts(mdl, mdl_ch1, mdl_ch2, dist_thresh)
144 nnat = len(ref_contacts)
145 nmdl = len(mdl_contacts)
147 fnat = len(ref_contacts.intersection(mdl_contacts))
151 fnonnat = len(mdl_contacts.difference(ref_contacts))
152 if len(mdl_contacts) > 0:
153 fnonnat /= len(mdl_contacts)
155 return (nnat, nmdl, fnat, fnonnat)
157 def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0,
161 sup_atoms = [
'CA',
'C',
'N',
'O']
164 mapped_mdl = mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch1)},{mol.QueryQuoteName(mdl_ch2)} and grdockq_mapped=1")
165 mapped_ref = ref.Select(f
"cname={mol.QueryQuoteName(ref_ch1)},{mol.QueryQuoteName(ref_ch2)} and grdockq_mapped=1")
166 ch = mapped_mdl.FindChain(mdl_ch1)
167 mdl_ch1_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
168 ch = mapped_mdl.FindChain(mdl_ch2)
169 mdl_ch2_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
170 ch = mapped_ref.FindChain(ref_ch1)
171 ref_ch1_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
172 ch = mapped_ref.FindChain(ref_ch2)
173 ref_ch2_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
179 i_ref = ref.Select(
"aname=CB or (rname=GLY and aname=CA)")
180 int1 = i_ref.Select(f
"cname={mol.QueryQuoteName(ref_ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch2)}]")
181 int2 = i_ref.Select(f
"cname={mol.QueryQuoteName(ref_ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch1)}]")
184 for r
in int1.residues:
185 idx = r.GetIntProp(
"dockq_idx")
186 if idx
in ref_ch1_residues
and idx
in mdl_ch1_residues:
187 ref_r = ref_ch1_residues[idx]
188 mdl_r = mdl_ch1_residues[idx]
189 for aname
in sup_atoms:
190 ref_a = ref_r.FindAtom(aname)
191 mdl_a = mdl_r.FindAtom(aname)
192 if ref_a.IsValid()
and mdl_a.IsValid():
193 ref_pos.append(ref_a.pos)
194 mdl_pos.append(mdl_a.pos)
195 for r
in int2.residues:
196 idx = r.GetIntProp(
"dockq_idx")
197 if idx
in ref_ch2_residues
and idx
in mdl_ch2_residues:
198 ref_r = ref_ch2_residues[idx]
199 mdl_r = mdl_ch2_residues[idx]
200 for aname
in sup_atoms:
201 ref_a = ref_r.FindAtom(aname)
202 mdl_a = mdl_r.FindAtom(aname)
203 if ref_a.IsValid()
and mdl_a.IsValid():
204 ref_pos.append(ref_a.pos)
205 mdl_pos.append(mdl_a.pos)
207 if len(mdl_pos) >= 3:
208 sup_result = mol.alg.SuperposeSVD(mdl_pos, ref_pos)
209 irmsd = sup_result.rmsd
216 n_ch1 = len(ref.FindChain(ref_ch1).residues)
217 n_ch2 = len(ref.FindChain(ref_ch2).residues)
219 ref_receptor_residues = ref_ch1_residues.values()
220 ref_ligand_residues = ref_ch2_residues.values()
221 mdl_receptor_residues = \
222 [mdl_ch1_residues[idx]
for idx
in ref_ch1_residues.keys()]
223 mdl_ligand_residues = \
224 [mdl_ch2_residues[idx]
for idx
in ref_ch2_residues.keys()]
226 ref_receptor_residues = ref_ch2_residues.values()
227 ref_ligand_residues = ref_ch1_residues.values()
228 mdl_receptor_residues = \
229 [mdl_ch2_residues[idx]
for idx
in ref_ch2_residues.keys()]
230 mdl_ligand_residues = \
231 [mdl_ch1_residues[idx]
for idx
in ref_ch1_residues.keys()]
236 for ref_r, mdl_r
in zip(ref_receptor_residues, mdl_receptor_residues):
237 for aname
in sup_atoms:
238 ref_a = ref_r.FindAtom(aname)
239 mdl_a = mdl_r.FindAtom(aname)
240 if ref_a.IsValid()
and mdl_a.IsValid():
241 ref_receptor_positions.append(ref_a.pos)
242 mdl_receptor_positions.append(mdl_a.pos)
243 for ref_r, mdl_r
in zip(ref_ligand_residues, mdl_ligand_residues):
244 for aname
in sup_atoms:
245 ref_a = ref_r.FindAtom(aname)
246 mdl_a = mdl_r.FindAtom(aname)
247 if ref_a.IsValid()
and mdl_a.IsValid():
248 ref_ligand_positions.append(ref_a.pos)
249 mdl_ligand_positions.append(mdl_a.pos)
251 if len(mdl_receptor_positions) >= 3:
252 sup_result = mol.alg.SuperposeSVD(mdl_receptor_positions,
253 ref_receptor_positions)
254 mdl_ligand_positions.ApplyTransform(sup_result.transformation)
255 lrmsd = mdl_ligand_positions.GetRMSD(ref_ligand_positions)
259 return (irmsd, lrmsd)
261 def _ScaleRMSD(rmsd, d):
262 return 1.0/(1+(rmsd/d)**2)
264 def _DockQ(fnat, lrmsd, irmsd, d1, d2):
265 """ The final number chrunching as described in the DockQ manuscript
267 return (fnat + _ScaleRMSD(lrmsd, d1) + _ScaleRMSD(irmsd, d2))/3
269 def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
270 ch1_aln=None, ch2_aln=None, contact_dist_thresh = 5.0,
271 interface_dist_thresh = 10.0, interface_cb = False):
272 """ Computes DockQ for specified interface
274 DockQ is described in: Sankar Basu and Bjoern Wallner (2016), "DockQ: A
275 Quality Measure for Protein-Protein Docking Models", PLOS one
277 Residues are mapped based on residue numbers by default. If you provide
278 *ch1_aln* and *ch2_aln* you can enforce an arbitrary mapping.
280 :param mdl: Model structure
281 :type mdl: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
282 :param ref: Reference structure, i.e. native structure
283 :type ref: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
284 :param mdl_ch1: Specifies chain in model constituting first part of
286 :type mdl_ch1: :class:`str`
287 :param mdl_ch2: Specifies chain in model constituting second part of
289 :type mdl_ch2: :class:`str`
290 :param ref_ch1: ref equivalent of mdl_ch1
291 :type ref_ch1: :class:`str`
292 :param ref_ch2: ref equivalent of mdl_ch2
293 :type ref_ch2: :class:`str`
294 :param ch1_aln: Alignment with two sequences to map *ref_ch1* and *mdl_ch1*.
295 The first sequence must match the sequence in *ref_ch1* and
296 the second to *mdl_ch1*.
297 :type ch1_aln: :class:`ost.seq.AlignmentHandle`
298 :param ch2_aln: Alignment with two sequences to map *ref_ch2* and *mdl_ch2*.
299 The first sequence must match the sequence in *ref_ch2* and
300 the second to *mdl_ch2*.
301 :type ch2_aln: :class:`ost.seq.AlignmentHandle`
302 :param contact_dist_thresh: Residues with any atom within this threshold
303 are considered to be in contact. Affects contact
304 based scores fnat and fnonnat. CAPRI suggests
305 to lower the default of 5.0 to 4.0 for
306 protein-peptide interactions.
307 :type contact_dist_thresh: :class:`float`
308 :param interface_dist_thresh: Residues with any atom within this threshold
309 to another chain are considered interface
310 residues. Affects irmsd. CAPRI suggests to
311 lower the default of 10.0 to 8.0 in
312 combination with interface_cb=True for
313 protein-peptide interactions.
314 :type interface_dist_thresh: :class:`float`
315 :param interface_cb: Only use CB atoms (CA for GLY) to identify interface
316 residues for irmsd. CAPRI suggests to enable this
317 flag in combination with lowering
318 *interface_dist_thresh* to 8.0 for protein-peptide
320 :type interface_cb: :class:`bool`
321 :returns: :class:`dict` with keys nnat, nmdl, fnat, fnonnat, irmsd, lrmsd,
322 DockQ which corresponds to the equivalent values in the original
323 DockQ implementation.
325 _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
326 ch1_aln = ch1_aln, ch2_aln = ch2_aln)
327 nnat, nmdl, fnat, fnonnat = _ContactScores(mdl, ref, mdl_ch1, mdl_ch2,
329 dist_thresh = contact_dist_thresh)
330 irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
331 dist_thresh = interface_dist_thresh,
332 cb_mode = interface_cb)
333 return {
"nnat": nnat,
337 "irmsd": round(irmsd, 3),
338 "lrmsd": round(lrmsd, 3),
339 "DockQ": round(_DockQ(fnat, lrmsd, irmsd, 8.5, 1.5), 3)}
def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, ch1_aln=None, ch2_aln=None, contact_dist_thresh=5.0, interface_dist_thresh=10.0, interface_cb=False)