3 from ost
import settings
5 def createdb(infasta, resultDB, exe_path=None):
7 Convert fasta files containing query and/or target sequences into a mmseqs2 database.
9 :param infasta: The fasta file from which the mmseqs2 database will be created.
10 :type infasta: :class:`string`
12 :param resultDB: The output location for mmseqs2 database.
13 :type resultDB: :class:`string`
15 :param exe_path: The path where mmseqs2 executable is located
16 :type exe_path: :class:`string`
20 mmseqs2_exe = settings.Locate(
'mmseqs2', explicit_file_name=exe_path)
21 args=[mmseqs2_exe,
'createdb', infasta, resultDB]
23 ost.LogInfo(f
"running MMseqs2 {' '.join(args)}")
24 mmseqs2_pipe=subprocess.run(args)
31 An index file of the targetDB is computed for a fast read-in.
32 It is recommended to compute the index if the targetDB is reused for several searches.
33 A directory for temporary files is generated.
34 It is recommended to create this temporary folder on a local drive.
36 :param trg_db: The target database mmseqs2 file.
37 (You need to initially convert a fasta file into a mmseqs2 database using createdb).
38 :type trg_db: :class:`string`
40 :param exe_path: The path where mmseqs2 executable is located.
41 :type exe_path: :class:`string'
43 :param directory: The directory for temperary files.
44 :type directory: :class:`string`
48 mmseqs2_exe = settings.Locate(
'mmseqs2', explicit_file_name=exe_path)
49 args=[mmseqs2_exe,
'createindex', trg_db, directory]
51 ost.LogInfo(f
"running MMseqs2 {' '.join(args)}")
52 mmseqs2_pipe=subprocess.run(args)
56 def alignment(query_db, trg_db, resultDB, directory, resultDB_m8, sen=None, exe_path=None,
57 start_sens=
None, sens_steps=
None, fmt=
None):
60 The alignment consists of two steps the prefilter and alignment.
62 :param query_db: The query database mmseqs2 file.
63 (You need to initially convert a fasta file into a mmseqs2 database using createdb).
64 :type query_db: :class:`string`
66 :param trg_db: The target database mmseqs2 file.
67 (You need to initially convert a fasta file into a mmseqs2 database using createdb).
68 :type trg_db: :class:`string`
70 :param resultDB: The output location.
72 :type resultDB: :class:`string`
74 :param exe_path: The path where mmseqs2 executable is located.
75 :type exe_path: :class:`string`
77 :param directory: The directory for temperary files.
78 :type directory: :class:`string`
80 :param sen: It controls the speed and sensitivity of the search.
81 A very fast search would use a sensitivity of 1.0,
82 while a very sensitive search would use a sensitivity of up to 7.0.
83 :type sen: :class:`float`
85 :param start_sens: Best hit fast. The lowest sensitivity is defined with --start-sens.
86 :type start_sens: :class:`int`
88 :param sens_steps: Best hit fast.
89 The number of steps to reach the highest sensitivity can be defined with --sens-steps.
90 :type sens_steps: :class:`int`
92 Convert the result database into a BLAST tab formatted file.
93 The file is formatted as a tab-separated list with 12 columns:
94 (1,2) identifiers for query and target sequences/profiles,
95 (3) sequence identity,
97 (5) number of mismatches,
98 (6) number of gap openings,
99 (7-8, 9-10) domain start and end-position in query and in target,
103 The option --format-output defines a custom output format.
104 The fields that are supported can be found in the following link:
105 https://github.com/soedinglab/mmseqs2/wiki#custom-alignment-format-with-convertalis
107 :param resultDB_m8: The output location
108 :type resultDB_m8: :class:`string`
110 :param fmt: Format output type, if the default is not used.
111 :type fmt: :class:`string`
114 mmseqs2_exe = settings.Locate(
'mmseqs2', explicit_file_name=exe_path)
115 command=[mmseqs2_exe,
'search', query_db, trg_db, resultDB, directory,
'-a']
122 if start_sens
and sens_steps:
123 start_sens=str(start_sens)
124 command.append(
'--start-sens')
125 command.append(start_sens)
126 sens_steps=str(sens_steps)
127 command.append(
'--sens-steps')
128 command.append(sens_steps)
131 ost.LogInfo(f
"running MMseqs2 {' '.join(command)}")
133 mmseqs2_pipe=subprocess.run(command)
136 args=[mmseqs2_exe,
'convertalis', query_db, trg_db, resultDB, resultDB_m8]
139 args.append(
'--format-output')
143 ost.LogInfo(f
"running MMseqs2 (' '.join(args))")
145 mmseqs2_pipe=subprocess.run(args)