OpenStructure
Loading...
Searching...
No Matches
naccess.py
Go to the documentation of this file.
1'''
2Naccess module
3
4Author: Florian Kiefer, Gerardo Tauriello (cleanup/speedup)
5
6This module is for calculating surface areas
7from OpenStructure using the external program naccess
8
9How To Use This Module:
10 1. Import it (e.g. as "from ost.bindings import naccess")
11 2. Use it (e.g. as "sasa = naccess.CalculateSurfaceArea(entity)")
12
13Requirement:
14 - naccess installed
15'''
16
17import tempfile
18import subprocess
19import os
20import re
21from ost import io, mol, settings, LogWarning
22
23def _GetExecutable(naccess_exe):
24 """
25 Method to check if naccess executable is present
26
27 :param naccess: Explicit path to naccess executable
28 :returns: Path to the executable
29 :exception: FileNotFound if executable is not found
30 """
31 return settings.Locate('naccess', explicit_file_name=naccess_exe)
32
33def _CheckNaccessRoot(naccess_root):
34 """
35 :return: True, if given directory contains "accall" binary and files
36 "vdw.radii" and "standard.data".
37 :param naccess_root: Path to naccess folder to check.
38 """
39 accall_exe = os.path.join(naccess_root, "accall")
40 check = (os.path.exists(accall_exe) and os.access(accall_exe, os.X_OK) \
41 and os.path.exists(os.path.join(naccess_root, "vdw.radii")) \
42 and os.path.exists(os.path.join(naccess_root, "standard.data")))
43 if not check:
44 LogWarning("NACCESS: Could not find required files to launch accall " \
45 "directly in %s." % naccess_root)
46 return check
47
48def _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms):
49 """
50 Setup files for naccess calculation in temporary directory
51
52 :param entity: OST entity to calculate surface
53 :param selection: Calculate surface for subset of entity
54 :param scratch_dir: Scratch directory. A subfolder for temporary files
55 is created in there. If not specified, a default
56 directory is used (see :func:`tempfile.mkdtemp`).
57 :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
58 in the default NACCESS version to 50 000)
59 :returns: Tuple containing temporary directory, input filename for naccess and
60 directory of the input file
61 :exception: RuntimeError if selection is not valid or too many atoms
62 """
63
64 # create temporary directory
65 tmp_dir_name = ""
66 if scratch_dir != None:
67 tmp_dir_name = tempfile.mkdtemp(dir=scratch_dir)
68 else:
69 tmp_dir_name = tempfile.mkdtemp()
70
71 # select as specified by user
72 if selection != "":
73 entity_view = entity.Select(selection)
74 else:
75 entity_view = entity
76 if len(entity_view.atoms) > max_number_of_atoms:
77 raise RuntimeError("Too much atoms for NACCESS (> %s)" % max_number_of_atoms)
78 if not entity_view.IsValid():
79 raise RuntimeError("Could not create view for selection (%s)"%(selection))
80
81 # write entity to tmp file
82 tmp_file_name = "entity.pdb"
83 tmp_file_base = os.path.join(tmp_dir_name, "entity")
84 io.SavePDB(entity_view, os.path.join(tmp_dir_name, tmp_file_name))
85 return (tmp_dir_name, tmp_file_name, tmp_file_base)
86
87
88def _ParseAsaFile(entity, file, asa_atom):
89 """
90 Reads Area file (.asa) and attach asa per atom to an entitiy
91
92 :param entity: EntityHandle or EntityView for attaching sasa on atom level
93 :param file: Filename of area file
94 :param asa_atom: Name of the float property for SASA
95 """
96
97 asa_fh = open(file)
98 asa_lines = asa_fh.readlines()
99 asa_fh.close()
100
101 for l in asa_lines:
102 if l.startswith("ATOM"):
103 # get res_number, chain_id and atom name
104 atom_name = l[12:16]
105 chain_id = l[21]
106 res_number = l[22:27]
107 asa = l[54:63]
108 atom_name = atom_name.strip()
109 chain_id = chain_id
110 res_number = res_number.strip()
111 asa = asa.strip()
112 m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
113 di = m.groupdict()
114
115 if di["ins"] == None:
116 resNum = mol.ResNum(int(di["num"]))
117 else:
118 resNum = mol.ResNum(int(di["num"]), di["ins"])
119
120 a = entity.FindAtom(chain_id, resNum, atom_name)
121 if(a.IsValid()):
122 a.SetFloatProp(asa_atom, float(asa))
123 else:
124 LogWarning("NACCESS: invalid asa entry %s %s %s" \
125 % (chain_id, resNum, atom_name))
126
127def _ParseRsaFile(entity, file, asa_abs, asa_rel):
128 """
129 Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy
130
131 :param entity: EntityHandle or EntityView for attaching sasa on atom level
132 :param file: Filename of .rsa file
133 :param asa_atom: Name of the float property for absolute SASA
134 :param asa_atom: Name of the float property for relative SASA
135 :exception: RuntimeError if residue names are not the same
136 """
137 area_fh = open(file)
138 area_lines = area_fh.readlines()
139 area_fh.close()
140 # shift first line
141 area_lines = area_lines[4:]
142 # parse lines
143 for l in area_lines:
144 if l.startswith("RES"):
145 # extract data
146 p = re.compile(r'\s+')
147 res_name = l[3:8]
148 res_name = res_name.strip()
149 chain_id = l[8:9]
150 res_number = l[9:14]
151 res_number = res_number.strip()
152 abs_all, rel_all = l[15:28].strip().split()
153 m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
154 di = m.groupdict()
155 if di["ins"] == None:
156 resNum = mol.ResNum(int(di["num"]))
157 else:
158 resNum = mol.ResNum(int(di["num"]), di["ins"])
159 # set res. props
160 res = entity.FindResidue(chain_id, resNum)
161 if res_name == res.name:
162 res.SetFloatProp(asa_rel, float(rel_all) )
163 res.SetFloatProp(asa_abs, float(abs_all) )
164 else:
165 raise RuntimeError("Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name))
166
167
168def __CleanupFiles(dir_name):
169 """
170 Method which recursively deletes a directory
171
172 :warning: This method removes also non-empty directories without asking, so
173 be careful!
174 """
175 import shutil
176 shutil.rmtree(dir_name)
177
178
179def _RunACCALL(command, temp_dir, query):
180 """
181 Fast method to run the Naccess surface calculation.
182
183 This method starts the accall binary directly and pipes in the input provided
184 in *query*. This is faster than calling the "naccess" script since the script
185 has a constant overhead of roughly 1.3s in each call.
186
187 :param command: Command to execute
188 :param temp_dir: Command is executed with this working directory
189 :param query: User input to pipe into *command*
190 :returns: stdout of command
191 :exception: CalledProcessError for non-zero return value
192 """
193
194 proc = subprocess.Popen(command, cwd=temp_dir, stdout=subprocess.PIPE,
195 stderr=subprocess.PIPE, stdin=subprocess.PIPE)
196 stdout_value, stderr_value = proc.communicate(query.encode())
197
198 # check for successful completion of naccess
199 if proc.returncode != 0:
200 LogWarning("WARNING: naccess error\n%s\n%s" % (stdout_value.decode(),
201 stderr_value.decode()))
202 raise subprocess.CalledProcessError(proc.returncode, command)
203
204 return stdout_value.decode()
205
206
207def _RunNACCESS(command, temp_dir):
208 """
209 Method to run the Naccess surface calculation.
210
211 This method starts the external Naccess executable and returns the stdout.
212
213 :param command: Command to execute
214 :param temp_dir: Command is executed with this working directory
215 :returns: stdout of command
216 :exception: CalledProcessError for non-zero return value
217 """
218 proc = subprocess.Popen(command, cwd=temp_dir, shell=True,
219 stdout=subprocess.PIPE)
220 stdout_value, stderr_value = proc.communicate()
221
222 # check for successful completion of naccess
223 if proc.returncode != 0:
224 LogWarning("WARNING: naccess error\n%s" % stdout_value.decode())
225 raise subprocess.CalledProcessError(proc.returncode, command)
226
227 return stdout_value.decode()
228
229
230def CalculateSurfaceArea(entity, radius=1.4,
231 include_hydrogens=False, include_hetatm = False,
232 include_water=False, selection="", naccess_exe=None,
233 naccess_root=None, keep_files=False, asa_abs= "asaAbs",
234 asa_rel="asaRel", asa_atom="asaAtom", scratch_dir=None,
235 max_number_of_atoms=50000):
236 """
237 Calculates analytical the solvent accessible surface area by using the
238 external naccess program
239
240 This method calculates the molecular surface areas by invoking the external
241 program naccess. First, it is checked if the naccess executable is present, then,
242 the necessary files are prepared in a temporary directory and naccess is
243 executed. The last step is to remove the temporary directory.
244
245
246 :param entity: OST entity to calculate surface
247 :param radius: Surface probe radius
248 :param include_hydrogens: Calculate surface including hydrogens
249 :param include_hetatm: Calculate surface including hetatms
250 :param include_water: Calculate surface including water
251 :param selection: Calculate surface for subset of entity
252 :param naccess_exe: naccess executable (full path to executable)
253 :param naccess_root: Path to folder containing "accall" binary and
254 files "vdw.radii" and "standard.data". This is the
255 fastest way to call naccess!
256 :param keep_files: If True, do not delete temporary files
257 :param asa_abs: Attaches per residue absolute SASA to specified
258 FloatProp on residue level
259 :param asa_rel: Attaches per residue relative SASA to specified
260 FloatProp on residue level
261 :param asa_atom: Attaches per atom SASA to specified FloatProp at
262 atom level
263 :param scratch_dir: Scratch directory. A subfolder for temporary files
264 is created in there. If not specified, a default
265 directory is used (see :func:`tempfile.mkdtemp`).
266 :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
267 in the default NACCESS version to 50 000)
268
269 :returns: absolute SASA calculated using asa_atom
270 """
271
272 # check if naccess executable is specified
273 if naccess_root and _CheckNaccessRoot(naccess_root):
274 # use faster, direct call to accall binary
275 fast_mode = True
276 else:
277 # get naccess executable
278 naccess_executable = _GetExecutable(naccess_exe)
279 # see if we can extract naccess_root from there (fallback to old mode)
280 naccess_root = os.path.dirname(naccess_executable)
281 fast_mode = _CheckNaccessRoot(naccess_root)
282
283 # setup files for naccess
284 (naccess_data_dir, naccess_data_file, naccess_data_base) \
285 = _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)
286
287 try:
288 # call naccess
289 if fast_mode:
290 # cook up stdin query (same logic as naccess script)
291 query = "PDBFILE %s\n" \
292 "VDWFILE %s\n" \
293 "STDFILE %s\n" \
294 "PROBE %f\n" \
295 "ZSLICE 0.05\n" \
296 % (naccess_data_file, os.path.join(naccess_root, "vdw.radii"),
297 os.path.join(naccess_root, "standard.data"), radius)
298 if include_hydrogens:
299 query += "HYDROGENS\n"
300 if include_water:
301 query += "WATERS\n"
302 if include_hetatm:
303 query += "HETATOMS\n"
304 # call it
305 command = os.path.join(naccess_root, "accall")
306 _RunACCALL(command, naccess_data_dir, query)
307 else:
308 LogWarning("NACCESS: Falling back to slower call to %s." \
309 % naccess_executable)
310 # set command line
311 command = "%s %s -p %f " % \
312 (naccess_executable, naccess_data_file, radius)
313 if include_hydrogens:
314 command = "%s -y" % command
315 if include_water:
316 command = "%s -w" % command
317 if include_hetatm:
318 command = "%s -h" % command
319 # execute naccess
320 _RunNACCESS(command, naccess_data_dir)
321
322 # parse outout
323 new_asa = os.path.join(naccess_data_dir, "%s.asa" % naccess_data_base)
324 _ParseAsaFile(entity, new_asa, asa_atom)
325
326 new_rsa = os.path.join(naccess_data_dir, "%s.rsa" % naccess_data_base)
327 _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)
328
329 finally:
330 # clean up
331 if not keep_files:
332 __CleanupFiles(naccess_data_dir)
333
334 # sum up Asa for all atoms
335 sasa = 0.0
336 for a in entity.atoms:
337 sasa += a.GetFloatProp(asa_atom, 0.0)
338
339 return sasa
_ParseRsaFile(entity, file, asa_abs, asa_rel)
Definition naccess.py:127
_GetExecutable(naccess_exe)
Definition naccess.py:23
_RunNACCESS(command, temp_dir)
Definition naccess.py:207
_RunACCALL(command, temp_dir, query)
Definition naccess.py:179
CalculateSurfaceArea(entity, radius=1.4, include_hydrogens=False, include_hetatm=False, include_water=False, selection="", naccess_exe=None, naccess_root=None, keep_files=False, asa_abs="asaAbs", asa_rel="asaRel", asa_atom="asaAtom", scratch_dir=None, max_number_of_atoms=50000)
Definition naccess.py:235
_CheckNaccessRoot(naccess_root)
Definition naccess.py:33
_ParseAsaFile(entity, file, asa_atom)
Definition naccess.py:88
_SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)
Definition naccess.py:48