53 """Creates :class:`AAIndexData` from data.
55 :param data: Iterable with strings in data format described for aaindex.
56 :returns: :class:`AAIndexData`, if iterable contains several entries,
57 parsing stops at separation sequence ('//'). None is returned
58 if nothing could be parsed.
59 :raises: descriptive error in case of corrupt data
75 current_data_type =
None
78 if line.startswith(
"//"):
80 elif line.strip() ==
"":
82 elif line[0]
in [
"H",
"D",
"R",
"A",
"T",
"J",
"I",
"M"]:
84 current_data_type = line[0]
85 elif line.startswith(
" "):
88 current_data_type =
None
90 if current_data_type ==
"H":
91 key = line[2:].strip()
92 elif current_data_type ==
"D":
94 elif current_data_type ==
"R":
96 elif current_data_type ==
"A":
98 elif current_data_type ==
"T":
100 elif current_data_type ==
"J":
102 elif current_data_type ==
"I":
103 if anno_type == AnnoType.PAIR:
104 raise RuntimeError(
"Observed single AA and pairwise "
105 "features in the same aaindex entry")
106 anno_type = AnnoType.SINGLE
107 if line.startswith(
"I"):
109 aakeys = [item.strip()
for item
in line[1:].split()]
110 exp_aa_keys = [
"A/L",
"R/K",
"N/M",
"D/F",
"C/P",
"Q/S",
111 "E/T",
"G/W",
"H/Y",
"I/V"]
112 if aakeys != exp_aa_keys:
113 raise RuntimeError(f
"Keys in single AA AAIndex entry "
114 "are expected to be "
115 "I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V "
120 elif current_data_type ==
"M":
121 if anno_type == AnnoType.SINGLE:
122 raise RuntimeError(
"Observed single AA and pairwise "
123 "features in the same aaindex entry")
124 anno_type = AnnoType.PAIR
125 if line.startswith(
"M"):
129 split_line = line[1:].split(
',')
130 split_line = sorted([item.strip()
for item
in split_line])
133 if len(split_line) != 2
or \
134 not split_line[0].startswith(
"cols")
or \
135 not split_line[1].startswith(
"rows"):
136 raise RuntimeError(f
"Expect value header in pair "
137 "AAIndex entry to be of form: "
138 "\"M rows = <x>, cols = <x>\" got: "
140 pair_cols = split_line[0].split(
"=")[1].strip()
141 pair_rows = split_line[1].split(
"=")[1].strip()
142 if len(pair_cols) != len(pair_cols):
143 raise RuntimeError(f
"Expect rows and cols to have same "
144 "number of elements when parsing "
145 "pair AAIndex entry got {line}")
154 raise RuntimeError(
"Cannot parse AAIndex entry without key...")
156 if anno_type == AnnoType.SINGLE:
157 olcs =
"ARNDCQEGHILKMFPSTWYV"
158 if len(olcs) != len(values):
159 raise RuntimeError(f
"Expected {len(olcs)} values in single AA "
160 "AAIndex entry, got {len(values)}")
161 for olc, value
in zip(olcs, values):
163 elif anno_type == AnnoType.PAIR:
169 n_values_match =
False
170 n_cols = len(pair_cols)
171 n_rows = len(pair_rows)
172 n_nonsym = n_cols * n_rows
173 if len(values) == n_nonsym:
174 n_values_match =
True
178 anno[a+b] = values[value_idx]
182 n_values_match =
True
184 n_sym = (N*N - N) / 2
186 if len(values) == n_sym:
188 for row_idx, row
in enumerate(pair_rows):
189 for col
in pair_cols[: row_idx+1]:
190 anno[row+col] = values[value_idx]
191 anno[col+row] = values[value_idx]
193 if not n_values_match:
194 raise RuntimeError(f
"Number of parsed values doesn't match "
195 "parsed rows and cols descriptors")
197 raise RuntimeError(
"Cannot parse AAIndex entry without values...")
203 data.authors = authors
205 data.journal = journal
206 data.anno_type = anno_type