4 Author: Florian Kiefer, Gerardo Tauriello (cleanup/speedup)
6 This module is for calculating surface areas
7 from OpenStructure using the external program naccess
9 How To Use This Module:
10 1. Import it (e.g. as "from ost.bindings import naccess")
11 2. Use it (e.g. as "sasa = naccess.CalculateSurfaceArea(entity)")
21 from ost
import io, mol, settings, LogWarning
23 def _GetExecutable(naccess_exe):
25 Method to check if naccess executable is present
27 :param naccess: Explicit path to naccess executable
28 :returns: Path to the executable
29 :exception: FileNotFound if executable is not found
31 return settings.Locate(
'naccess', explicit_file_name=naccess_exe)
33 def _CheckNaccessRoot(naccess_root):
35 :return: True, if given directory contains "accall" binary and files
36 "vdw.radii" and "standard.data".
37 :param naccess_root: Path to naccess folder to check.
39 accall_exe = os.path.join(naccess_root,
"accall")
40 check = (os.path.exists(accall_exe)
and os.access(accall_exe, os.X_OK) \
41 and os.path.exists(os.path.join(naccess_root,
"vdw.radii")) \
42 and os.path.exists(os.path.join(naccess_root,
"standard.data")))
44 LogWarning(
"NACCESS: Could not find required files to launch accall " \
45 "directly in %s." % naccess_root)
48 def _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms):
50 Setup files for naccess calculation in temporary directory
52 :param entity: OST entity to calculate surface
53 :param selection: Calculate surface for subset of entity
54 :param scratch_dir: Scratch directory. A subfolder for temporary files
55 is created in there. If not specified, a default
56 directory is used (see :func:`tempfile.mkdtemp`).
57 :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
58 in the default NACCESS version to 50 000)
59 :returns: Tuple containing temporary directory, input filename for naccess and
60 directory of the input file
61 :exception: RuntimeError if selection is not valid or too many atoms
66 if scratch_dir !=
None:
67 tmp_dir_name = tempfile.mkdtemp(dir=scratch_dir)
69 tmp_dir_name = tempfile.mkdtemp()
73 entity_view = entity.Select(selection)
76 if len(entity_view.atoms) > max_number_of_atoms:
77 raise RuntimeError,
"Too much atoms for NACCESS (> %s)" % max_number_of_atoms
78 if not entity_view.IsValid():
79 raise RuntimeError,
"Could not create view for selection (%s)"%(selection)
82 tmp_file_name =
"entity.pdb"
83 tmp_file_base = os.path.join(tmp_dir_name,
"entity")
84 io.SavePDB(entity_view, os.path.join(tmp_dir_name, tmp_file_name))
85 return (tmp_dir_name, tmp_file_name, tmp_file_base)
88 def _ParseAsaFile(entity, file, asa_atom):
90 Reads Area file (.asa) and attach asa per atom to an entitiy
92 :param entity: EntityHandle or EntityView for attaching sasa on atom level
93 :param file: Filename of area file
94 :param asa_atom: Name of the float property for SASA
98 asa_lines = asa_fh.readlines()
102 if l.startswith(
"ATOM"):
106 res_number = l[22:27]
108 atom_name = atom_name.strip()
110 res_number = res_number.strip()
112 m = re.match(
r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
115 if di[
"ins"] ==
None:
118 resNum =
mol.ResNum(int(di[
"num"]), di[
"ins"])
120 a = entity.FindAtom(chain_id, resNum, atom_name)
122 a.SetFloatProp(asa_atom, float(asa))
124 LogWarning(
"NACCESS: invalid asa entry %s %s %s" \
125 % (chain_id, resNum, atom_name))
127 def _ParseRsaFile(entity, file, asa_abs, asa_rel):
129 Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy
131 :param entity: EntityHandle or EntityView for attaching sasa on atom level
132 :param file: Filename of .rsa file
133 :param asa_atom: Name of the float property for absolute SASA
134 :param asa_atom: Name of the float property for relative SASA
135 :exception: RuntimeError if residue names are not the same
138 area_lines = area_fh.readlines()
141 area_lines = area_lines[4:]
144 if l.startswith(
"RES"):
146 p = re.compile(
r'\s+')
148 res_name = res_name.strip()
151 res_number = res_number.strip()
152 abs_all, rel_all = l[15:28].strip().split()
153 m = re.match(
r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
155 if di[
"ins"] ==
None:
158 resNum =
mol.ResNum(int(di[
"num"]), di[
"ins"])
160 res = entity.FindResidue(chain_id, resNum)
161 if res_name == res.name:
162 res.SetFloatProp(asa_rel, float(rel_all) )
163 res.SetFloatProp(asa_abs, float(abs_all) )
165 raise RuntimeError,
"Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name)
168 def __CleanupFiles(dir_name):
170 Method which recursively deletes a directory
172 :warning: This method removes also non-empty directories without asking, so
176 shutil.rmtree(dir_name)
179 def _RunACCALL(command, temp_dir, query):
181 Fast method to run the Naccess surface calculation.
183 This method starts the accall binary directly and pipes in the input provided
184 in *query*. This is faster than calling the "naccess" script since the script
185 has a constant overhead of roughly 1.3s in each call.
187 :param command: Command to execute
188 :param temp_dir: Command is executed with this working directory
189 :param query: User input to pipe into *command*
190 :returns: stdout of command
191 :exception: CalledProcessError for non-zero return value
193 proc = subprocess.Popen(command, stdout=subprocess.PIPE,
194 stderr=subprocess.PIPE, stdin=subprocess.PIPE,
196 stdout_value, stderr_value = proc.communicate(query)
199 if proc.returncode != 0:
200 LogWarning(
"WARNING: naccess error\n%s\n%s" % (stdout_value, stderr_value))
201 raise subprocess.CalledProcessError(proc.returncode, command)
206 def _RunNACCESS(command, temp_dir):
208 Method to run the Naccess surface calculation.
210 This method starts the external Naccess executable and returns the stdout.
212 :param command: Command to execute
213 :param temp_dir: Command is executed with this working directory
214 :returns: stdout of command
215 :exception: CalledProcessError for non-zero return value
217 proc = subprocess.Popen(command, shell=
True, stdout=subprocess.PIPE,
219 stdout_value, stderr_value = proc.communicate()
222 if proc.returncode != 0:
223 LogWarning(
"WARNING: naccess error\n%s" % stdout_value)
224 raise subprocess.CalledProcessError(proc.returncode, command)
230 include_hydrogens=
False, include_hetatm =
False,
231 include_water=
False, selection=
"", naccess_exe=
None,
232 naccess_root=
None, keep_files=
False, asa_abs=
"asaAbs",
233 asa_rel=
"asaRel", asa_atom=
"asaAtom", scratch_dir=
None,
234 max_number_of_atoms=50000):
236 Calculates analytical the solvent accessible surface area by using the
237 external naccess program
239 This method calculates the molecular surface areas by invoking the external
240 program naccess. First, it is checked if the naccess executable is present, then,
241 the necessary files are prepared in a temporary directory and naccess is
242 executed. The last step is to remove the temporary directory.
245 :param entity: OST entity to calculate surface
246 :param radius: Surface probe radius
247 :param include_hydrogens: Calculate surface including hydrogens
248 :param include_hetatm: Calculate surface including hetatms
249 :param include_water: Calculate surface including water
250 :param selection: Calculate surface for subset of entity
251 :param naccess_exe: naccess executable (full path to executable)
252 :param naccess_root: Path to folder containing "accall" binary and
253 files "vdw.radii" and "standard.data". This is the
254 fastest way to call naccess!
255 :param keep_files: If True, do not delete temporary files
256 :param asa_abs: Attaches per residue absolute SASA to specified
257 FloatProp on residue level
258 :param asa_rel: Attaches per residue relative SASA to specified
259 FloatProp on residue level
260 :param asa_atom: Attaches per atom SASA to specified FloatProp at
262 :param scratch_dir: Scratch directory. A subfolder for temporary files
263 is created in there. If not specified, a default
264 directory is used (see :func:`tempfile.mkdtemp`).
265 :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
266 in the default NACCESS version to 50 000)
268 :returns: absolute SASA calculated using asa_atom
272 if naccess_root
and _CheckNaccessRoot(naccess_root):
277 naccess_executable = _GetExecutable(naccess_exe)
279 naccess_root = os.path.dirname(naccess_executable)
280 fast_mode = _CheckNaccessRoot(naccess_root)
283 (naccess_data_dir, naccess_data_file, naccess_data_base) \
284 = _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)
290 query =
"PDBFILE %s\n" \
295 % (naccess_data_file, os.path.join(naccess_root,
"vdw.radii"),
296 os.path.join(naccess_root,
"standard.data"), radius)
297 if include_hydrogens:
298 query +=
"HYDROGENS\n"
302 query +=
"HETATOMS\n"
304 command = os.path.join(naccess_root,
"accall")
305 _RunACCALL(command, naccess_data_dir, query)
307 LogWarning(
"NACCESS: Falling back to slower call to %s." \
308 % naccess_executable)
310 command =
"%s %s -p %f " % \
311 (naccess_executable, naccess_data_file, radius)
312 if include_hydrogens:
313 command =
"%s -y" % command
315 command =
"%s -w" % command
317 command =
"%s -h" % command
319 _RunNACCESS(command, naccess_data_dir)
322 new_asa = os.path.join(naccess_data_dir,
"%s.asa" % naccess_data_base)
323 _ParseAsaFile(entity, new_asa, asa_atom)
325 new_rsa = os.path.join(naccess_data_dir,
"%s.rsa" % naccess_data_base)
326 _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)
331 __CleanupFiles(naccess_data_dir)
335 for a
in entity.atoms:
336 sasa += a.GetFloatProp(asa_atom, 0.0)