OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
dssp.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2020 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 """
20 Utility functions to load secondary structure information from DSSP files
21 and assign them to entities.
22 
23 Authors: Pascal Benkert, Marco Biasini
24 """
25 
26 import os
27 import tempfile,subprocess
28 
29 from ost import io,mol
30 from ost import settings
31 
32 
33 def _SkipHeader(stream):
34  line=stream.readline()
35  while line:
36  if line.strip().find('#')==0:
37  return True
38  line=stream.readline()
39  return False
40 
41 def _Cleanup(temp_dssp_path, pdb_path):
42  if os.path.exists(temp_dssp_path):
43  os.remove(temp_dssp_path)
44  if os.path.exists(pdb_path):
45  os.remove(pdb_path)
46 
47 def _CalcRelativeSA(residue_type, absolute_sa):
48  solvent_max_list=[118,317,238,243,183,262,286,154,258,228,
49  243,278,260,271,204,234,206,300,303,216] #TODO: source?
50  residue_indices = "ARNDCQEGHILKMFPSTWYV"
51  # cysteine bridges are marked with lower-case letters by DSSP. We don't
52  # really care which cysteines are forming covalent bonds, so let's set the
53  # one-letter-code to "C".
54  if residue_type.islower():
55  residue_type='C'
56  if residue_indices.find(residue_type)==-1:
57  raise RuntimeError('residue %s is a non-standard residue' %(residue_type))
58  else:
59  rel=float(absolute_sa)/(solvent_max_list[residue_indices.find(residue_type)])
60  return rel
61 
62 #
63 #def AssignDSSP(ent, pdb_path="", extract_burial_status=False, tmp_dir=None,
64 # dssp_bin=None):
65 # """
66 # Assign secondary structure states to peptide residues in the structure. This
67 # function uses the DSSP command line program.
68 #
69 # If you already have a DSSP output file and would like to assign the secondary
70 # structure states to an entity, use :func:`LoadDSSP`.
71 #
72 # :param ent: The entity for which the secondary structure should be calculated
73 # :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
74 # :param extract_burial_status: If true, also extract burial status and store
75 # as float-property
76 # ``relative_solvent_accessibility`` at residue
77 # level
78 # :param tmp_dir: If set, overrides the default tmp directory of the
79 # operating system
80 # :param dssp_bin: The path to the DSSP executable
81 # :raises: :class:`~ost.settings.FileNotFound` if the dssp executable is not
82 # in the path.
83 # :raises: :class:`RuntimeError` when dssp is executed with errors
84 # """
85 #
86 # if not ent.IsValid():
87 # raise ValueError('model entity is not valid')
88 # if ent.atom_count==0:
89 # raise ValueError('model entity does not contain any atoms')
90 #
91 # dssp_abs_path=settings.Locate(['dsspcmbi','dssp','mkdssp'], env_name='DSSP_EXECUTABLE',
92 # explicit_file_name=dssp_bin)
93 # if os.path.isdir(dssp_abs_path):
94 # raise RuntimeError('"%s" is a directory. Specify path to DSSP binary' % dssp_abs_path)
95 # if not os.access(dssp_abs_path, os.X_OK):
96 # raise RuntimeError('"%s" is not executable' % dssp_abs_path)
97 #
98 # pdb_path=tempfile.mktemp(suffix=".pdb",prefix="temp_entity", dir=tmp_dir)
99 # io.SavePDB(ent, pdb_path)
100 # temp_dssp_path=tempfile.mktemp(suffix=".out",prefix="dssp", dir=tmp_dir)
101 #
102 # cmd = [dssp_abs_path, pdb_path, temp_dssp_path]
103 # s = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
104 # if s.returncode == 0:
105 # try:
106 # LoadDSSP(temp_dssp_path, ent, extract_burial_status)
107 # _Cleanup(temp_dssp_path, pdb_path)
108 # return ent
109 # except:
110 # pass # continue with dssp 4 fallback
111 #
112 # # either dssp execution failed or output file cannot be loaded
113 # # both can be the result of using dssp 4 (https://github.com/PDB-REDO/dssp)
114 # # => try fallback
115 #
116 # # dssp 4 desperately wants a CRYST1 record (REMOVE IF NOT NEEDED ANYMORE!)
117 # # Be aware, Even more stuff is needed if you don't set the CLIBD_MON env var
118 # # The dssp binding has therefore been deprecated and we mimic to "old" interface
119 # # using OpenStructure internal functionality
120 # with open(pdb_path, 'r') as fh:
121 # file_content = fh.read()
122 # with open(pdb_path, 'w') as fh:
123 # fh.write('CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1 ' +
124 # os.linesep + file_content)
125 #
126 # # explicitely request dssp output format
127 # cmd = [dssp_abs_path, '--output-format', 'dssp', pdb_path, temp_dssp_path]
128 # s_fallback = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
129 #
130 # if s_fallback.returncode == 0:
131 # try:
132 # LoadDSSP(temp_dssp_path, ent, extract_burial_status)
133 # _Cleanup(temp_dssp_path, pdb_path)
134 # return ent
135 # except:
136 # pass # continue with cleanup and raise error
137 #
138 # _Cleanup(temp_dssp_path, pdb_path)
139 # raise RuntimeError('Failed to assign DSSP')
140 
141 
142 def AssignDSSP(ent, pdb_path="", extract_burial_status=False, tmp_dir=None,
143  dssp_bin=None):
144  """
145  Assign secondary structure states to peptide residues in the structure. This
146  function replaces the "old" AssignDSSP which relies on the DSSP command line
147  program and uses OpenStructure internal functionality only. The sole purpose
148  is to retain the "old" interface and you're adviced to directly use
149  :func:`ost.mol.alg.AssignSecStruct` and :func:`ost.mol.alg.Accessibility`.
150 
151  If you already have a DSSP output file and would like to assign the secondary
152  structure states to an entity, use :func:`LoadDSSP`.
153 
154  :param ent: The entity for which the secondary structure should be calculated
155  :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
156  :param extract_burial_status: If true, also extract burial status and store
157  as float-property
158  ``relative_solvent_accessibility`` at residue
159  level
160  :param tmp_dir: If set, overrides the default tmp directory of the
161  operating system - deprecated, has no effect
162  :param dssp_bin: The path to the DSSP executable - deprecated, has no effect
163  """
164 
165  if not ent.IsValid():
166  raise ValueError('model entity is not valid')
167  if ent.atom_count==0:
168  raise ValueError('model entity does not contain any atoms')
169 
170  mol.alg.AssignSecStruct(ent)
171  if extract_burial_status:
172  mol.alg.Accessibility(ent, algorithm=mol.alg.DSSP)
173  # map float properties from Accessibility algorithm to the original
174  # properties that have been set in the binding
175  for r in ent.residues:
176  if r.HasProp("asaAbs"):
177  asa = round(r.GetFloatProp("asaAbs")) # original DSSP output is rounded
178  asa_rel = _CalcRelativeSA(r.one_letter_code, asa) # there would be the
179  # asaRel property but
180  # it relates to the
181  # non-rounded asa
182  r.SetFloatProp("solvent_accessibility", asa)
183  r.SetFloatProp("relative_solvent_accessibility", asa_rel)
184  if asa_rel < 0.25:
185  r.SetStringProp("burial_status", 'b')
186  else:
187  r.SetStringProp("burial_status", 'e')
188 
189 
190 def LoadDSSP(file_name, model, extract_burial_status=False,
191  entity_saved=False, calculate_relative_sa=True):
192  """
193  Loads DSSP output and assigns secondary structure states to the peptidic
194  residues.
195 
196  If you would like to run dssp *and* assign the secondary structure,
197  use :func:`AssignDSSP` instead.
198 
199  :param file_name: The filename of the DSSP output file
200  :param model: The entity to which the secondary structure states should be
201  assigned
202  :param extract_burial_status: If true also calculates burial status of
203  residues and assigns it to the burial_status string property.
204  :param calculate_relative_sa: If true also relative solvent accessibility and
205  and assigns it to the relative_solvent_accessibility float property of
206  the residue.
207  :param entity_save: Whether the entity was saved.
208  """
209  if not model.IsValid():
210  raise ValueError('model entity is not valid')
211  if model.atom_count==0:
212  raise ValueError('model entity does not contain any atoms')
213  stream=open(file_name)
214  if not _SkipHeader(stream):
215  stream.close()
216  raise RuntimeError('Ill-formatted DSSP file')
217 
218  for line in stream:
219  num=line[6:10].strip()
220  ins_code=line[10].strip()
221  chain_name=line[11]
222  solvent_accessibility=float(line[34:39].strip())
223  #solvent_accessibility=line[34:39].strip()
224  amino_acid=line[13]
225  #print line
226 
227 
228  if isinstance(model,mol.ChainView):
229  chain=model
230  else:
231  chain=model.FindChain(chain_name)
232 
233  if not chain.IsValid():
234  continue
235  if num=='':
236  continue
237  residue=None
238  try:
239  if ins_code == "":
240  residue=chain.FindResidue(mol.ResNum(int(num)))
241  else:
242  residue=chain.FindResidue(mol.ResNum(int(num),ins_code))
243 
244  # set property "burial status:
245  if extract_burial_status:
246  #set default (dummy) burial status for incomplete residues:
247  residue.SetStringProp("burial_status", 'X')
248 
249  #handle seleno-methionine appearing as amino acid 'X' in DSSP:
250  if residue.name=="MSE" and amino_acid=='X':
251  amino_acid='M'
252 
253  residue.SetFloatProp("solvent_accessibility",
254  solvent_accessibility)
255  if calculate_relative_sa:
256  relative_sa=_CalcRelativeSA(amino_acid,solvent_accessibility)
257  residue.SetFloatProp("relative_solvent_accessibility",
258  relative_sa)
259  if relative_sa < 0.25:
260  residue.SetStringProp("burial_status", 'b')
261  else:
262  residue.SetStringProp("burial_status", 'e')
263  except Exception as e:
264  print("ERROR:",e)
265  continue
266 
267  rtype=line[16:17]
268  rt=mol.SecStructure.COIL
269  if rtype=='H':
270  rt=mol.SecStructure.ALPHA_HELIX
271  elif rtype=='E':
272  rt=mol.SecStructure.EXTENDED
273  elif rtype=='B':
274  rt=mol.SecStructure.BETA_BRIDGE
275  elif rtype=='S':
276  rt=mol.SecStructure.BEND
277  elif rtype=='T':
278  rt=mol.SecStructure.TURN
279  elif rtype=='I':
280  rt=mol.SecStructure.PI_HELIX
281  elif rtype=='G':
282  rt=mol.SecStructure.THREE_TEN_HELIX
283  # for corrupted DSSP files. Catch in calling routine:
284  if not residue.IsValid():
285  #Todo: if residues with occupancy 0 have been removed before
286  #using a selection statement, they are missed here
287  #IMPORTANT: asign DSSP before any selections
288  stream.close()
289  raise RuntimeError('Ill-formatted DSSP file: invalid residue')
290  residue.SetSecStructure(mol.SecStructure(rt))
291  stream.close()
Secondary structure types as defined by DSSP. For convenience, the enum values match the characters u...
definition of ChainView
Definition: chain_view.hh:37