OpenStructure
Loading...
Searching...
No Matches
table_selector.py
Go to the documentation of this file.
2
3
5
6 def __init__(self, col_types, col_names, query):
7
8 self.col_types=col_types
9 self.col_names=col_names
10 self.query=query
11
12 self.valid_operators=dict()
13
14 self.valid_operators['and']={'type':'boolean','precedence':5}
15 self.valid_operators['or']={'type':'boolean','precedence':6}
16
17 self.valid_operators['!']={'type':'comparison','precedence':4,'extension':'!='}
18 self.valid_operators['!=']={'type':'comparison','precedence':4,'extension':None}
19 self.valid_operators['=']={'type':'comparison','precedence':4,'extension':'=='}
20 self.valid_operators['==']={'type':'comparison','precedence':4,'extension':None}
21 self.valid_operators['<=']={'type':'comparison','precedence':3,'extension':None}
22 self.valid_operators['>=']={'type':'comparison','precedence':3,'extension':None}
23 self.valid_operators['>']={'type':'comparison','precedence':3,'extension':'>='}
24 self.valid_operators['<']={'type':'comparison','precedence':3,'extension':'<='}
25
26 self.valid_operators['+']={'type':'arithmetic','precedence':2}
27 self.valid_operators['-']={'type':'arithmetic','precedence':2}
28 self.valid_operators['/']={'type':'arithmetic','precedence':1}
29 self.valid_operators['*']={'type':'arithmetic','precedence':1}
30
31 self.valid_operators['(']={'type':'left_bracket','precedence':float('NaN')}
32 self.valid_operators['[']={'type':'left_bracket','precedence':float('NaN')}
33 self.valid_operators['{']={'type':'left_bracket','precedence':float('NaN')}
34 self.valid_operators[')']={'type':'right_bracket','precedence':float('NaN')}
35 self.valid_operators[']']={'type':'right_bracket','precedence':float('NaN')}
36 self.valid_operators['}']={'type':'right_bracket','precedence':float('NaN')}
37
41
42 self.tab_indices=list()
43 self.exp_indices=list()
44
45 #extract column indices
46 for i, exp in enumerate(self.rpn_expression):
47 if exp in self.col_names:
48 self.tab_indices.append(self._GetIndex(exp))
49 self.exp_indices.append(i)
50
51 def EvaluateRow(self,row):
52 for ti, ei in zip(self.tab_indices, self.exp_indices):
53 #check for NaN
54 if row[ti]!=row[ti]:
55 self.rpn_expression[ei]=None
56 else:
57 self.rpn_expression[ei] = row[ti]
58 if self._EvaluateRPN(list(self.rpn_expression)):
59 return True
60 return False
61
62 def _GetIndex(self, col):
63 if col not in self.col_names:
64 raise ValueError('Table Selector has no column named "%s"' % col)
65 return self.col_names.index(col)
66
67 def _EvaluateAnd(self, lhs, rhs):
68 return lhs==True and rhs==True
69
70 def _EvaluateOr(self, lhs, rhs):
71 return lhs==True or rhs==True
72
73 def _EvaluateEqual(self, lhs, rhs):
74 return lhs==rhs
75
76 def _EvaluateNonEqual(self, lhs, rhs):
77 return lhs!=rhs
78
79 def _EvaluateLower(self, lhs, rhs):
80 if lhs==None or rhs==None:
81 return False
82 return lhs<rhs
83
84 def _EvaluateGreater(self, lhs, rhs):
85 if lhs==None or rhs==None:
86 return False
87 return lhs>rhs
88
89 def _EvaluateLowerEqual(self, lhs, rhs):
90 if lhs==None or rhs==None:
91 return False
92 return lhs<=rhs
93
94 def _EvaluateGreaterEqual(self, lhs, rhs):
95 if lhs==None or rhs==None:
96 return False
97 return lhs>=rhs
98
99 def _EvaluateAdd(self, lhs, rhs):
100 if lhs==None or rhs==None:
101 return None
102 return lhs+rhs
103
104 def _EvaluateSubtract(self, lhs, rhs):
105 if lhs==None or rhs==None:
106 return None
107 return lhs-rhs
108
109 def _EvaluateMultiply(self, lhs, rhs):
110 if lhs==None or rhs==None:
111 return None
112 return lhs*rhs
113
114 def _EvaluateDivide(self, lhs, rhs):
115 if lhs==None or rhs==None:
116 return None
117 return lhs/rhs
118
119
120 def _EvaluateOperator(self, op, lhs, rhs):
121
122 #this function assumes, that all NaN values have been replaced by None!
123
124 if op=='+':
125 return self._EvaluateAdd(lhs, rhs)
126 elif op=='-':
127 return self._EvaluateSubtract(lhs, rhs)
128 elif op=='/':
129 return self._EvaluateDivide(lhs, rhs)
130 elif op=='*':
131 return self._EvaluateMultiply(lhs, rhs)
132 elif op=='and':
133 return self._EvaluateAnd(lhs, rhs)
134 elif op=='or':
135 return self._EvaluateOr(lhs, rhs)
136 elif op=='=' or op=='==':
137 return self._EvaluateEqual(lhs, rhs)
138 elif op=='!=' or op=='!':
139 return self._EvaluateNonEqual(lhs, rhs)
140 elif op=='<':
141 return self._EvaluateLower(lhs, rhs)
142 elif op=='>':
143 return self._EvaluateGreater(lhs, rhs)
144 elif op=='<=':
145 return self._EvaluateLowerEqual(lhs, rhs)
146 elif op=='>=':
147 return self._EvaluateGreaterEqual(lhs, rhs)
148
149 else:
150 raise ValueError('Unknown operator: '+op)
151
152 def _EvaluateRPN(self, RPNExp):
153 #Evaluates the reverse polish notation
154 stack=list()
155 while True:
156 if len(RPNExp)==0:
157 break
158 exp=RPNExp.pop(0)
159 if exp in self.valid_operators:
160 if len(stack)<2:
161 raise ValueError('Cannot evaluate operator on less than two operands!')
162 rhs=stack.pop()
163 lhs=stack.pop()
164 result=self._EvaluateOperator(exp, lhs, rhs)
165 if result==None:
166 return False
167 stack.append(result)
168 else:
169 stack.append(exp)
170 if len(stack)>1:
171 raise ValueError('Too many operands for given operators!')
172 return stack.pop()
173
174 def _ShuntingYard(self, split_expression):
175 #Creates the so called reverse polish notation out of the expression parser output.
176 #note, that there won't be parenthesis anymore and potential parenthesis
177 #mismatches get recognized.
178 #The shunting yard algorithm from dijkstra gets used.
179
180 output_stack=list()
181 operator_stack=list()
182
183 while True:
184 if len(split_expression)==0:
185 while True:
186 if len(operator_stack)==0:
187 break
188 if self.valid_operators[operator_stack[-1]]['type'] in ['left_bracket','right_bracket']:
189 raise ValueError('Parenthesis mismatch!')
190 output_stack.append(operator_stack.pop())
191 break
192
193 exp=split_expression.pop(0)
194
195 if exp in self.valid_operators:
196 if self.valid_operators[exp]['type']=='left_bracket':
197 operator_stack.append(exp)
198 continue
199
200 if exp in self.valid_operators:
201 if self.valid_operators[exp]['type'] == 'right_bracket':
202 while True:
203 if len(operator_stack)==0:
204 raise ValueError('Parenthesis mismatch!')
205 if self.valid_operators[operator_stack[-1]]['type']=='left_bracket':
206 operator_stack.pop()
207 break
208 output_stack.append(operator_stack.pop())
209 continue
210
211 if exp in self.valid_operators:
212 prec=self.valid_operators[exp]['precedence']
213 while len(operator_stack)>0:
214 if self.valid_operators[operator_stack[-1]]['type']=='left_bracket':
215 break
216 elif prec>=self.valid_operators[operator_stack[-1]]['precedence']:
217 output_stack.append(operator_stack.pop())
218 else:
219 break
220 operator_stack.append(exp)
221 continue
222 output_stack.append(exp)
223
224 return output_stack
225
226 def _ParseSubExpression(self, subexpression):
227
228 valid_types={'float':'numeric','int':'numeric','string':'string','bool':'bool'}
229
230 column_names=list()
231 column_types=list()
232
233 final_expression=list()
234
235
236 for item in subexpression:
237 if item in self.col_names:
238 column_names.append(item)
239 column_types.append(valid_types[self.col_types[self._GetIndex(item)]])
240
241 unique_type=list(set(column_types))
242 if len(unique_type)>1:
243 raise ValueError('Try to compare columns '+','.join(column_names)+' which have inconsistent types!')
244 if len(unique_type)==0:
245 raise ValueError('Try to evaluate subexpression '+' '.join(subexpression)+' that contains no valid column name of current table!')
246
247 for item in subexpression:
248 if item in self.valid_operators:
249 final_expression.append(item)
250 continue
251 if item in column_names:
252 final_expression.append(item)
253 continue
254 if unique_type[0]=='numeric':
255 if item in ['NaN','nan','None','none']:
256 final_expression.append(None)
257 continue
258 else:
259 try:
260 final_expression.append(float(item))
261 continue
262 except:
263 raise RuntimeError('Tried to cast '+item+' into numeric type to compare with column(s) '+','.join(column_names)+', but failed!')
264 elif unique_type[0]=='bool':
265 if item in ['None','none']:
266 final_expression.append(None)
267 continue
268 if item in ['true','True']:
269 final_expression.append(True)
270 continue
271 if item in ['false','False']:
272 final_expression.append(False)
273 continue
274 raise RuntimeError('Tried to cast '+item+' into boolean type to compare with column(s) '+','.join(column_names)+', but failed!')
275 elif unique_type[0]=='string':
276 final_expression.append(item)
277
278 return final_expression
279
280
281 def _ParseExpression(self, split_expression):
282
283 #check for problematic cases like 'a<=b<=c'. We don't know which operator to evaluate first
284 for i in range(len(split_expression)-3):
285 if (split_expression[i] in self.valid_operators) and (split_expression[i+2] in self.valid_operators):
286 if self.valid_operators[split_expression[i]]['precedence']==self.valid_operators[split_expression[i+2]]['precedence']:
287 raise ValueError('Cannot Evaluate '+' '.join(split_expression[i:i+3])+' since both operators have same precedence!')
288
289 #handle , operator
290 #replaces an expression like 'rnum=1,2,3' with '(rnum=1 or rnum=2 or rnum=3)'
291
292 temp_split_expression=list()
293 skips=0
294
295 for i in range(len(split_expression)):
296 if skips>0:
297 skips-=1
298 continue
299 if ',' in split_expression[i]:
300
301 if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=':
302 raise ValueError('Can evaluate \',\' operator only in combination with \"=\" in subexpression ',' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))]))
303
304 single_operands=split_expression[i].split(',')
305
306 if split_expression[max(0,i-1)]=='=':
307 if i-2<0:
308 raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' starting with an \'=\'')
309 main_operand=split_expression[i-2]
310 temp_split_expression.pop()
311 temp_split_expression.pop()
312 skips=0
313
314 else:
315 if i+2>len(split_expression)-1:
316 raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' ending with an \'=\'')
317 main_operand=split_expression[i+2]
318 skips=2
319
320 temp_expression=list(['('])
321 temp_expression+=' or '.join(['%s = %s'% (a,b) for (a,b) in zip(len(single_operands)*[main_operand],single_operands)]).split()
322 temp_expression.append(')')
323 temp_split_expression+=temp_expression
324 continue
325
326 temp_split_expression.append(split_expression[i])
327
328 split_expression=temp_split_expression
329
330 #handle ':' operator
331 #replaces an expression like 'col_a=x:y' with '(col_a>=x and col_a<=y)'
332
333 temp_split_expression=list()
334 skips=0
335
336 for i in range(len(split_expression)):
337 if skips>0:
338 skips-=1
339 continue
340 if ':' in split_expression[i]:
341 if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=':
342 raise ValueError('Can evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' \':\' sign is only allowed in combination with \'=\'')
343 if len(split_expression[i].split(':')) != 2:
344 raise ValueError('Can operate \':\' operator only on 2 operands in subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))]))
345
346 lhs=split_expression[i].split(':')[0]
347 rhs=split_expression[i].split(':')[1]
348
349 template_expression=['(','','<=','','and','','<=','',')']
350
351 if split_expression[max(0,i-1)] == '=':
352 if i-2<0:
353 raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' starting with an \'=\'')
354 temp_split_expression.pop()
355 temp_split_expression.pop()
356 template_expression[3]=split_expression[i-2]
357 template_expression[5]=split_expression[i-2]
358 skips=0
359
360 else:
361 if i+2>len(split_expression)-1:
362 raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' ending with an \'=\'')
363 template_expression[3]=split_expression[i+2]
364 template_expression[5]=split_expression[i+2]
365 skips=2
366
367 template_expression[1]=lhs
368 template_expression[7]=rhs
369 temp_split_expression+=template_expression
370 continue
371
372 temp_split_expression.append(split_expression[i])
373
374 split_expression=temp_split_expression
375
376 #the whole thing is now split to pieces, we need to cast the types of the operands
377 #into the types of the columns, the operands are compared against.
378
379 final_expression=list()
380 subexpression=list()
381
382 for item in split_expression:
383 if item in self.valid_operators:
384 if self.valid_operators[item]['type'] in ['boolean','left_bracket','right_bracket']:
385 if len(subexpression)>0:
386 #figure out the column type(s) and parse the operands
387 final_expression+=self._ParseSubExpression(subexpression)
388 subexpression=list()
389 final_expression.append(item)
390 continue
391 subexpression.append(item)
392
393 if len(subexpression)>0:
394 final_expression+=self._ParseSubExpression(subexpression)
395
396 return final_expression
397
398
399 def _ExpressionLexer(self, expression):
400
401 #Reads token after token and searches for brackets and valid_operators
402 #everything, that doesn't match the above is assumed to be an operand
403
404 split_expression=list()
405
406 actual_position=0
407 eaten_stuff=''
408
409 while True:
410
411 if actual_position>=len(expression):
412 if len(eaten_stuff)>0:
413 split_expression.append(eaten_stuff)
414 return split_expression
415
416 token=expression[actual_position]
417
418 if token.isspace():
419 if len(eaten_stuff)>0:
420 split_expression.append(eaten_stuff)
421 eaten_stuff=''
422 actual_position+=1
423 continue
424
425 #note, that there is no check for boolean operators. They need to be clearly separated by spaces
426 #or brackets anyway, so they get added with the eaten stuff
427 if token in self.valid_operators:
428 if self.valid_operators[token]['type']=='left_bracket' or self.valid_operators[token]['type']=='right_bracket':
429 if len(eaten_stuff)>0:
430 split_expression.append(eaten_stuff)
431 eaten_stuff=''
432 split_expression.append(token)
433 actual_position+=1
434 continue
435
436 if self.valid_operators[token]['type']=='arithmetic':
437 if len(eaten_stuff)>0:
438 split_expression.append(eaten_stuff)
439 eaten_stuff=''
440 split_expression.append(token)
441 actual_position+=1
442 continue
443
444 if self.valid_operators[token]['type']=='comparison':
445 if len(eaten_stuff)>0:
446 split_expression.append(eaten_stuff)
447 eaten_stuff=''
448 if self.valid_operators[token]['extension']!=None:
449 if actual_position+len(self.valid_operators[token]['extension'])<len(expression):
450 if expression[actual_position:actual_position+len(self.valid_operators[token]['extension'])]==self.valid_operators[token]['extension']:
451 split_expression.append(self.valid_operators[token]['extension'])
452 actual_position+=len(self.valid_operators[token]['extension'])
453 continue
454 split_expression.append(token)
455 actual_position+=1
456 continue
457
458 eaten_stuff+=token
459 actual_position+=1
_EvaluateOperator(self, op, lhs, rhs)
_ParseSubExpression(self, subexpression)
_ParseExpression(self, split_expression)
__init__(self, col_types, col_names, query)
_ShuntingYard(self, split_expression)