4 Author: Florian Kiefer, Gerardo Tauriello (cleanup/speedup)
6 This module is for calculating surface areas
7 from OpenStructure using the external program naccess
9 How To Use This Module:
10 1. Import it (e.g. as "from ost.bindings import naccess")
11 2. Use it (e.g. as "sasa = naccess.CalculateSurfaceArea(entity)")
21 from ost
import io, mol, settings, LogWarning
23 def _GetExecutable(naccess_exe):
25 Method to check if naccess executable is present
27 :param naccess: Explicit path to naccess executable
28 :returns: Path to the executable
29 :exception: FileNotFound if executable is not found
31 return settings.Locate(
'naccess', explicit_file_name=naccess_exe)
33 def _CheckNaccessRoot(naccess_root):
35 :return: True, if given directory contains "accall" binary and files
36 "vdw.radii" and "standard.data".
37 :param naccess_root: Path to naccess folder to check.
39 accall_exe = os.path.join(naccess_root,
"accall")
40 check = (os.path.exists(accall_exe)
and os.access(accall_exe, os.X_OK) \
41 and os.path.exists(os.path.join(naccess_root,
"vdw.radii")) \
42 and os.path.exists(os.path.join(naccess_root,
"standard.data")))
44 LogWarning(
"NACCESS: Could not find required files to launch accall " \
45 "directly in %s." % naccess_root)
48 def _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms):
50 Setup files for naccess calculation in temporary directory
52 :param entity: OST entity to calculate surface
53 :param selection: Calculate surface for subset of entity
54 :param scratch_dir: Scratch directory. A subfolder for temporary files
55 is created in there. If not specified, a default
56 directory is used (see :func:`tempfile.mkdtemp`).
57 :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
58 in the default NACCESS version to 50 000)
59 :returns: Tuple containing temporary directory, input filename for naccess and
60 directory of the input file
61 :exception: RuntimeError if selection is not valid or too many atoms
66 if scratch_dir !=
None:
67 tmp_dir_name = tempfile.mkdtemp(dir=scratch_dir)
69 tmp_dir_name = tempfile.mkdtemp()
73 entity_view = entity.Select(selection)
76 if len(entity_view.atoms) > max_number_of_atoms:
77 raise RuntimeError(
"Too much atoms for NACCESS (> %s)" % max_number_of_atoms)
78 if not entity_view.IsValid():
79 raise RuntimeError(
"Could not create view for selection (%s)"%(selection))
82 tmp_file_name =
"entity.pdb"
83 tmp_file_base = os.path.join(tmp_dir_name,
"entity")
84 io.SavePDB(entity_view, os.path.join(tmp_dir_name, tmp_file_name))
85 return (tmp_dir_name, tmp_file_name, tmp_file_base)
88 def _ParseAsaFile(entity, file, asa_atom):
90 Reads Area file (.asa) and attach asa per atom to an entitiy
92 :param entity: EntityHandle or EntityView for attaching sasa on atom level
93 :param file: Filename of area file
94 :param asa_atom: Name of the float property for SASA
98 asa_lines = asa_fh.readlines()
102 if l.startswith(
"ATOM"):
106 res_number = l[22:27]
108 atom_name = atom_name.strip()
110 res_number = res_number.strip()
112 m = re.match(
r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
115 if di[
"ins"] ==
None:
118 resNum =
mol.ResNum(int(di[
"num"]), di[
"ins"])
120 a = entity.FindAtom(chain_id, resNum, atom_name)
122 a.SetFloatProp(asa_atom, float(asa))
124 LogWarning(
"NACCESS: invalid asa entry %s %s %s" \
125 % (chain_id, resNum, atom_name))
127 def _ParseRsaFile(entity, file, asa_abs, asa_rel):
129 Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy
131 :param entity: EntityHandle or EntityView for attaching sasa on atom level
132 :param file: Filename of .rsa file
133 :param asa_atom: Name of the float property for absolute SASA
134 :param asa_atom: Name of the float property for relative SASA
135 :exception: RuntimeError if residue names are not the same
138 area_lines = area_fh.readlines()
141 area_lines = area_lines[4:]
144 if l.startswith(
"RES"):
146 p = re.compile(
r'\s+')
148 res_name = res_name.strip()
151 res_number = res_number.strip()
152 abs_all, rel_all = l[15:28].strip().split()
153 m = re.match(
r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
155 if di[
"ins"] ==
None:
158 resNum =
mol.ResNum(int(di[
"num"]), di[
"ins"])
160 res = entity.FindResidue(chain_id, resNum)
161 if res_name == res.name:
162 res.SetFloatProp(asa_rel, float(rel_all) )
163 res.SetFloatProp(asa_abs, float(abs_all) )
165 raise RuntimeError(
"Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name))
168 def __CleanupFiles(dir_name):
170 Method which recursively deletes a directory
172 :warning: This method removes also non-empty directories without asking, so
176 shutil.rmtree(dir_name)
179 def _RunACCALL(command, temp_dir, query):
181 Fast method to run the Naccess surface calculation.
183 This method starts the accall binary directly and pipes in the input provided
184 in *query*. This is faster than calling the "naccess" script since the script
185 has a constant overhead of roughly 1.3s in each call.
187 :param command: Command to execute
188 :param temp_dir: Command is executed with this working directory
189 :param query: User input to pipe into *command*
190 :returns: stdout of command
191 :exception: CalledProcessError for non-zero return value
194 proc = subprocess.Popen(command, cwd=temp_dir, stdout=subprocess.PIPE,
195 stderr=subprocess.PIPE, stdin=subprocess.PIPE)
196 stdout_value, stderr_value = proc.communicate(query.encode())
199 if proc.returncode != 0:
200 LogWarning(
"WARNING: naccess error\n%s\n%s" % (stdout_value.decode(),
201 stderr_value.decode()))
202 raise subprocess.CalledProcessError(proc.returncode, command)
204 return stdout_value.decode()
207 def _RunNACCESS(command, temp_dir):
209 Method to run the Naccess surface calculation.
211 This method starts the external Naccess executable and returns the stdout.
213 :param command: Command to execute
214 :param temp_dir: Command is executed with this working directory
215 :returns: stdout of command
216 :exception: CalledProcessError for non-zero return value
218 proc = subprocess.Popen(command, cwd=temp_dir, shell=
True,
219 stdout=subprocess.PIPE)
220 stdout_value, stderr_value = proc.communicate()
223 if proc.returncode != 0:
224 LogWarning(
"WARNING: naccess error\n%s" % stdout_value.decode())
225 raise subprocess.CalledProcessError(proc.returncode, command)
227 return stdout_value.decode()
231 include_hydrogens=
False, include_hetatm =
False,
232 include_water=
False, selection=
"", naccess_exe=
None,
233 naccess_root=
None, keep_files=
False, asa_abs=
"asaAbs",
234 asa_rel=
"asaRel", asa_atom=
"asaAtom", scratch_dir=
None,
235 max_number_of_atoms=50000):
237 Calculates analytical the solvent accessible surface area by using the
238 external naccess program
240 This method calculates the molecular surface areas by invoking the external
241 program naccess. First, it is checked if the naccess executable is present, then,
242 the necessary files are prepared in a temporary directory and naccess is
243 executed. The last step is to remove the temporary directory.
246 :param entity: OST entity to calculate surface
247 :param radius: Surface probe radius
248 :param include_hydrogens: Calculate surface including hydrogens
249 :param include_hetatm: Calculate surface including hetatms
250 :param include_water: Calculate surface including water
251 :param selection: Calculate surface for subset of entity
252 :param naccess_exe: naccess executable (full path to executable)
253 :param naccess_root: Path to folder containing "accall" binary and
254 files "vdw.radii" and "standard.data". This is the
255 fastest way to call naccess!
256 :param keep_files: If True, do not delete temporary files
257 :param asa_abs: Attaches per residue absolute SASA to specified
258 FloatProp on residue level
259 :param asa_rel: Attaches per residue relative SASA to specified
260 FloatProp on residue level
261 :param asa_atom: Attaches per atom SASA to specified FloatProp at
263 :param scratch_dir: Scratch directory. A subfolder for temporary files
264 is created in there. If not specified, a default
265 directory is used (see :func:`tempfile.mkdtemp`).
266 :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
267 in the default NACCESS version to 50 000)
269 :returns: absolute SASA calculated using asa_atom
273 if naccess_root
and _CheckNaccessRoot(naccess_root):
278 naccess_executable = _GetExecutable(naccess_exe)
280 naccess_root = os.path.dirname(naccess_executable)
281 fast_mode = _CheckNaccessRoot(naccess_root)
284 (naccess_data_dir, naccess_data_file, naccess_data_base) \
285 = _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)
291 query =
"PDBFILE %s\n" \
296 % (naccess_data_file, os.path.join(naccess_root,
"vdw.radii"),
297 os.path.join(naccess_root,
"standard.data"), radius)
298 if include_hydrogens:
299 query +=
"HYDROGENS\n"
303 query +=
"HETATOMS\n"
305 command = os.path.join(naccess_root,
"accall")
306 _RunACCALL(command, naccess_data_dir, query)
308 LogWarning(
"NACCESS: Falling back to slower call to %s." \
309 % naccess_executable)
311 command =
"%s %s -p %f " % \
312 (naccess_executable, naccess_data_file, radius)
313 if include_hydrogens:
314 command =
"%s -y" % command
316 command =
"%s -w" % command
318 command =
"%s -h" % command
320 _RunNACCESS(command, naccess_data_dir)
323 new_asa = os.path.join(naccess_data_dir,
"%s.asa" % naccess_data_base)
324 _ParseAsaFile(entity, new_asa, asa_atom)
326 new_rsa = os.path.join(naccess_data_dir,
"%s.rsa" % naccess_data_base)
327 _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)
332 __CleanupFiles(naccess_data_dir)
336 for a
in entity.atoms:
337 sasa += a.GetFloatProp(asa_atom, 0.0)