OpenStructure
table_selector.py
Go to the documentation of this file.
1 
2 
3 
5 
6  def __init__(self, col_types, col_names, query):
7 
8  self.col_typescol_types=col_types
9  self.col_namescol_names=col_names
10  self.queryquery=query
11 
12  self.valid_operatorsvalid_operators=dict()
13 
14  self.valid_operatorsvalid_operators['and']={'type':'boolean','precedence':5}
15  self.valid_operatorsvalid_operators['or']={'type':'boolean','precedence':6}
16 
17  self.valid_operatorsvalid_operators['!']={'type':'comparison','precedence':4,'extension':'!='}
18  self.valid_operatorsvalid_operators['!=']={'type':'comparison','precedence':4,'extension':None}
19  self.valid_operatorsvalid_operators['=']={'type':'comparison','precedence':4,'extension':'=='}
20  self.valid_operatorsvalid_operators['==']={'type':'comparison','precedence':4,'extension':None}
21  self.valid_operatorsvalid_operators['<=']={'type':'comparison','precedence':3,'extension':None}
22  self.valid_operatorsvalid_operators['>=']={'type':'comparison','precedence':3,'extension':None}
23  self.valid_operatorsvalid_operators['>']={'type':'comparison','precedence':3,'extension':'>='}
24  self.valid_operatorsvalid_operators['<']={'type':'comparison','precedence':3,'extension':'<='}
25 
26  self.valid_operatorsvalid_operators['+']={'type':'arithmetic','precedence':2}
27  self.valid_operatorsvalid_operators['-']={'type':'arithmetic','precedence':2}
28  self.valid_operatorsvalid_operators['/']={'type':'arithmetic','precedence':1}
29  self.valid_operatorsvalid_operators['*']={'type':'arithmetic','precedence':1}
30 
31  self.valid_operatorsvalid_operators['(']={'type':'left_bracket','precedence':float('NaN')}
32  self.valid_operatorsvalid_operators['[']={'type':'left_bracket','precedence':float('NaN')}
33  self.valid_operatorsvalid_operators['{']={'type':'left_bracket','precedence':float('NaN')}
34  self.valid_operatorsvalid_operators[')']={'type':'right_bracket','precedence':float('NaN')}
35  self.valid_operatorsvalid_operators[']']={'type':'right_bracket','precedence':float('NaN')}
36  self.valid_operatorsvalid_operators['}']={'type':'right_bracket','precedence':float('NaN')}
37 
38  self.split_expressionsplit_expression=self._ExpressionLexer_ExpressionLexer(self.queryquery)
39  self.parsed_expressionparsed_expression=self._ParseExpression_ParseExpression(self.split_expressionsplit_expression)
40  self.rpn_expressionrpn_expression=self._ShuntingYard_ShuntingYard(self.parsed_expressionparsed_expression)
41 
42  self.tab_indicestab_indices=list()
43  self.exp_indicesexp_indices=list()
44 
45  #extract column indices
46  for i, exp in enumerate(self.rpn_expressionrpn_expression):
47  if exp in self.col_namescol_names:
48  self.tab_indicestab_indices.append(self._GetIndex_GetIndex(exp))
49  self.exp_indicesexp_indices.append(i)
50 
51  def EvaluateRow(self,row):
52  for ti, ei in zip(self.tab_indicestab_indices, self.exp_indicesexp_indices):
53  #check for NaN
54  if row[ti]!=row[ti]:
55  self.rpn_expressionrpn_expression[ei]=None
56  else:
57  self.rpn_expressionrpn_expression[ei] = row[ti]
58  if self._EvaluateRPN_EvaluateRPN(list(self.rpn_expressionrpn_expression)):
59  return True
60  return False
61 
62  def _GetIndex(self, col):
63  if col not in self.col_namescol_names:
64  raise ValueError('Table Selector has no column named "%s"' % col)
65  return self.col_namescol_names.index(col)
66 
67  def _EvaluateAnd(self, lhs, rhs):
68  return lhs==True and rhs==True
69 
70  def _EvaluateOr(self, lhs, rhs):
71  return lhs==True or rhs==True
72 
73  def _EvaluateEqual(self, lhs, rhs):
74  return lhs==rhs
75 
76  def _EvaluateNonEqual(self, lhs, rhs):
77  return lhs!=rhs
78 
79  def _EvaluateLower(self, lhs, rhs):
80  if lhs==None or rhs==None:
81  return False
82  return lhs<rhs
83 
84  def _EvaluateGreater(self, lhs, rhs):
85  if lhs==None or rhs==None:
86  return False
87  return lhs>rhs
88 
89  def _EvaluateLowerEqual(self, lhs, rhs):
90  if lhs==None or rhs==None:
91  return False
92  return lhs<=rhs
93 
94  def _EvaluateGreaterEqual(self, lhs, rhs):
95  if lhs==None or rhs==None:
96  return False
97  return lhs>=rhs
98 
99  def _EvaluateAdd(self, lhs, rhs):
100  if lhs==None or rhs==None:
101  return None
102  return lhs+rhs
103 
104  def _EvaluateSubtract(self, lhs, rhs):
105  if lhs==None or rhs==None:
106  return None
107  return lhs-rhs
108 
109  def _EvaluateMultiply(self, lhs, rhs):
110  if lhs==None or rhs==None:
111  return None
112  return lhs*rhs
113 
114  def _EvaluateDivide(self, lhs, rhs):
115  if lhs==None or rhs==None:
116  return None
117  return lhs/rhs
118 
119 
120  def _EvaluateOperator(self, op, lhs, rhs):
121 
122  #this function assumes, that all NaN values have been replaced by None!
123 
124  if op=='+':
125  return self._EvaluateAdd_EvaluateAdd(lhs, rhs)
126  elif op=='-':
127  return self._EvaluateSubtract_EvaluateSubtract(lhs, rhs)
128  elif op=='/':
129  return self._EvaluateDivide_EvaluateDivide(lhs, rhs)
130  elif op=='*':
131  return self._EvaluateMultiply_EvaluateMultiply(lhs, rhs)
132  elif op=='and':
133  return self._EvaluateAnd_EvaluateAnd(lhs, rhs)
134  elif op=='or':
135  return self._EvaluateOr_EvaluateOr(lhs, rhs)
136  elif op=='=' or op=='==':
137  return self._EvaluateEqual_EvaluateEqual(lhs, rhs)
138  elif op=='!=' or op=='!':
139  return self._EvaluateNonEqual_EvaluateNonEqual(lhs, rhs)
140  elif op=='<':
141  return self._EvaluateLower_EvaluateLower(lhs, rhs)
142  elif op=='>':
143  return self._EvaluateGreater_EvaluateGreater(lhs, rhs)
144  elif op=='<=':
145  return self._EvaluateLowerEqual_EvaluateLowerEqual(lhs, rhs)
146  elif op=='>=':
147  return self._EvaluateGreaterEqual_EvaluateGreaterEqual(lhs, rhs)
148 
149  else:
150  raise ValueError('Unknown operator: '+op)
151 
152  def _EvaluateRPN(self, RPNExp):
153  #Evaluates the reverse polish notation
154  stack=list()
155  while True:
156  if len(RPNExp)==0:
157  break
158  exp=RPNExp.pop(0)
159  if exp in self.valid_operatorsvalid_operators:
160  if len(stack)<2:
161  raise ValueError('Cannot evaluate operator on less than two operands!')
162  rhs=stack.pop()
163  lhs=stack.pop()
164  result=self._EvaluateOperator_EvaluateOperator(exp, lhs, rhs)
165  if result==None:
166  return False
167  stack.append(result)
168  else:
169  stack.append(exp)
170  if len(stack)>1:
171  raise ValueError('Too many operands for given operators!')
172  return stack.pop()
173 
174  def _ShuntingYard(self, split_expression):
175  #Creates the so called reverse polish notation out of the expression parser output.
176  #note, that there won't be parenthesis anymore and potential parenthesis
177  #mismatches get recognized.
178  #The shunting yard algorithm from dijkstra gets used.
179 
180  output_stack=list()
181  operator_stack=list()
182 
183  while True:
184  if len(split_expression)==0:
185  while True:
186  if len(operator_stack)==0:
187  break
188  if self.valid_operatorsvalid_operators[operator_stack[-1]]['type'] in ['left_bracket','right_bracket']:
189  raise ValueError('Parenthesis mismatch!')
190  output_stack.append(operator_stack.pop())
191  break
192 
193  exp=split_expression.pop(0)
194 
195  if exp in self.valid_operatorsvalid_operators:
196  if self.valid_operatorsvalid_operators[exp]['type']=='left_bracket':
197  operator_stack.append(exp)
198  continue
199 
200  if exp in self.valid_operatorsvalid_operators:
201  if self.valid_operatorsvalid_operators[exp]['type'] == 'right_bracket':
202  while True:
203  if len(operator_stack)==0:
204  raise ValueError('Parenthesis mismatch!')
205  if self.valid_operatorsvalid_operators[operator_stack[-1]]['type']=='left_bracket':
206  operator_stack.pop()
207  break
208  output_stack.append(operator_stack.pop())
209  continue
210 
211  if exp in self.valid_operatorsvalid_operators:
212  prec=self.valid_operatorsvalid_operators[exp]['precedence']
213  while len(operator_stack)>0:
214  if self.valid_operatorsvalid_operators[operator_stack[-1]]['type']=='left_bracket':
215  break
216  elif prec>=self.valid_operatorsvalid_operators[operator_stack[-1]]['precedence']:
217  output_stack.append(operator_stack.pop())
218  else:
219  break
220  operator_stack.append(exp)
221  continue
222  output_stack.append(exp)
223 
224  return output_stack
225 
226  def _ParseSubExpression(self, subexpression):
227 
228  valid_types={'float':'numeric','int':'numeric','string':'string','bool':'bool'}
229 
230  column_names=list()
231  column_types=list()
232 
233  final_expression=list()
234 
235 
236  for item in subexpression:
237  if item in self.col_namescol_names:
238  column_names.append(item)
239  column_types.append(valid_types[self.col_typescol_types[self._GetIndex_GetIndex(item)]])
240 
241  unique_type=list(set(column_types))
242  if len(unique_type)>1:
243  raise ValueError('Try to compare columns '+','.join(column_names)+' which have inconsistent types!')
244  if len(unique_type)==0:
245  raise ValueError('Try to evaluate subexpression '+' '.join(subexpression)+' that contains no valid column name of current table!')
246 
247  for item in subexpression:
248  if item in self.valid_operatorsvalid_operators:
249  final_expression.append(item)
250  continue
251  if item in column_names:
252  final_expression.append(item)
253  continue
254  if unique_type[0]=='numeric':
255  if item in ['NaN','nan','None','none']:
256  final_expression.append(None)
257  continue
258  else:
259  try:
260  final_expression.append(float(item))
261  continue
262  except:
263  raise RuntimeError('Tried to cast '+item+' into numeric type to compare with column(s) '+','.join(column_names)+', but failed!')
264  elif unique_type[0]=='bool':
265  if item in ['None','none']:
266  final_expression.append(None)
267  continue
268  if item in ['true','True']:
269  final_expression.append(True)
270  continue
271  if item in ['false','False']:
272  final_expression.append(False)
273  continue
274  raise RuntimeError('Tried to cast '+item+' into boolean type to compare with column(s) '+','.join(column_names)+', but failed!')
275  elif unique_type[0]=='string':
276  final_expression.append(item)
277 
278  return final_expression
279 
280 
281  def _ParseExpression(self, split_expression):
282 
283  #check for problematic cases like 'a<=b<=c'. We don't know which operator to evaluate first
284  for i in range(len(split_expression)-3):
285  if (split_expression[i] in self.valid_operatorsvalid_operators) and (split_expression[i+2] in self.valid_operatorsvalid_operators):
286  if self.valid_operatorsvalid_operators[split_expression[i]]['precedence']==self.valid_operatorsvalid_operators[split_expression[i+2]]['precedence']:
287  raise ValueError('Cannot Evaluate '+' '.join(split_expression[i:i+3])+' since both operators have same precedence!')
288 
289  #handle , operator
290  #replaces an expression like 'rnum=1,2,3' with '(rnum=1 or rnum=2 or rnum=3)'
291 
292  temp_split_expression=list()
293  skips=0
294 
295  for i in range(len(split_expression)):
296  if skips>0:
297  skips-=1
298  continue
299  if ',' in split_expression[i]:
300 
301  if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=':
302  raise ValueError('Can evaluate \',\' operator only in combination with \"=\" in subexpression ',' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))]))
303 
304  single_operands=split_expression[i].split(',')
305 
306  if split_expression[max(0,i-1)]=='=':
307  if i-2<0:
308  raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' starting with an \'=\'')
309  main_operand=split_expression[i-2]
310  temp_split_expression.pop()
311  temp_split_expression.pop()
312  skips=0
313 
314  else:
315  if i+2>len(split_expression)-1:
316  raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' ending with an \'=\'')
317  main_operand=split_expression[i+2]
318  skips=2
319 
320  temp_expression=list(['('])
321  temp_expression+=' or '.join(['%s = %s'% (a,b) for (a,b) in zip(len(single_operands)*[main_operand],single_operands)]).split()
322  temp_expression.append(')')
323  temp_split_expression+=temp_expression
324  continue
325 
326  temp_split_expression.append(split_expression[i])
327 
328  split_expression=temp_split_expression
329 
330  #handle ':' operator
331  #replaces an expression like 'col_a=x:y' with '(col_a>=x and col_a<=y)'
332 
333  temp_split_expression=list()
334  skips=0
335 
336  for i in range(len(split_expression)):
337  if skips>0:
338  skips-=1
339  continue
340  if ':' in split_expression[i]:
341  if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=':
342  raise ValueError('Can evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' \':\' sign is only allowed in combination with \'=\'')
343  if len(split_expression[i].split(':')) != 2:
344  raise ValueError('Can operate \':\' operator only on 2 operands in subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))]))
345 
346  lhs=split_expression[i].split(':')[0]
347  rhs=split_expression[i].split(':')[1]
348 
349  template_expression=['(','','<=','','and','','<=','',')']
350 
351  if split_expression[max(0,i-1)] == '=':
352  if i-2<0:
353  raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' starting with an \'=\'')
354  temp_split_expression.pop()
355  temp_split_expression.pop()
356  template_expression[3]=split_expression[i-2]
357  template_expression[5]=split_expression[i-2]
358  skips=0
359 
360  else:
361  if i+2>len(split_expression)-1:
362  raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' ending with an \'=\'')
363  template_expression[3]=split_expression[i+2]
364  template_expression[5]=split_expression[i+2]
365  skips=2
366 
367  template_expression[1]=lhs
368  template_expression[7]=rhs
369  temp_split_expression+=template_expression
370  continue
371 
372  temp_split_expression.append(split_expression[i])
373 
374  split_expression=temp_split_expression
375 
376  #the whole thing is now split to pieces, we need to cast the types of the operands
377  #into the types of the columns, the operands are compared against.
378 
379  final_expression=list()
380  subexpression=list()
381 
382  for item in split_expression:
383  if item in self.valid_operatorsvalid_operators:
384  if self.valid_operatorsvalid_operators[item]['type'] in ['boolean','left_bracket','right_bracket']:
385  if len(subexpression)>0:
386  #figure out the column type(s) and parse the operands
387  final_expression+=self._ParseSubExpression_ParseSubExpression(subexpression)
388  subexpression=list()
389  final_expression.append(item)
390  continue
391  subexpression.append(item)
392 
393  if len(subexpression)>0:
394  final_expression+=self._ParseSubExpression_ParseSubExpression(subexpression)
395 
396  return final_expression
397 
398 
399  def _ExpressionLexer(self, expression):
400 
401  #Reads token after token and searches for brackets and valid_operators
402  #everything, that doesn't match the above is assumed to be an operand
403 
404  split_expression=list()
405 
406  actual_position=0
407  eaten_stuff=''
408 
409  while True:
410 
411  if actual_position>=len(expression):
412  if len(eaten_stuff)>0:
413  split_expression.append(eaten_stuff)
414  return split_expression
415 
416  token=expression[actual_position]
417 
418  if token.isspace():
419  if len(eaten_stuff)>0:
420  split_expression.append(eaten_stuff)
421  eaten_stuff=''
422  actual_position+=1
423  continue
424 
425  #note, that there is no check for boolean operators. They need to be clearly separated by spaces
426  #or brackets anyway, so they get added with the eaten stuff
427  if token in self.valid_operatorsvalid_operators:
428  if self.valid_operatorsvalid_operators[token]['type']=='left_bracket' or self.valid_operatorsvalid_operators[token]['type']=='right_bracket':
429  if len(eaten_stuff)>0:
430  split_expression.append(eaten_stuff)
431  eaten_stuff=''
432  split_expression.append(token)
433  actual_position+=1
434  continue
435 
436  if self.valid_operatorsvalid_operators[token]['type']=='arithmetic':
437  if len(eaten_stuff)>0:
438  split_expression.append(eaten_stuff)
439  eaten_stuff=''
440  split_expression.append(token)
441  actual_position+=1
442  continue
443 
444  if self.valid_operatorsvalid_operators[token]['type']=='comparison':
445  if len(eaten_stuff)>0:
446  split_expression.append(eaten_stuff)
447  eaten_stuff=''
448  if self.valid_operatorsvalid_operators[token]['extension']!=None:
449  if actual_position+len(self.valid_operatorsvalid_operators[token]['extension'])<len(expression):
450  if expression[actual_position:actual_position+len(self.valid_operatorsvalid_operators[token]['extension'])]==self.valid_operatorsvalid_operators[token]['extension']:
451  split_expression.append(self.valid_operatorsvalid_operators[token]['extension'])
452  actual_position+=len(self.valid_operatorsvalid_operators[token]['extension'])
453  continue
454  split_expression.append(token)
455  actual_position+=1
456  continue
457 
458  eaten_stuff+=token
459  actual_position+=1
def _ParseExpression(self, split_expression)
def _EvaluateLowerEqual(self, lhs, rhs)
def _EvaluateOr(self, lhs, rhs)
def _EvaluateAdd(self, lhs, rhs)
def _ExpressionLexer(self, expression)
def _EvaluateNonEqual(self, lhs, rhs)
def _EvaluateGreaterEqual(self, lhs, rhs)
def _EvaluateLower(self, lhs, rhs)
def _EvaluateEqual(self, lhs, rhs)
def _EvaluateGreater(self, lhs, rhs)
def _EvaluateAnd(self, lhs, rhs)
def __init__(self, col_types, col_names, query)
def _ShuntingYard(self, split_expression)
def _EvaluateMultiply(self, lhs, rhs)
def _EvaluateSubtract(self, lhs, rhs)
def _EvaluateDivide(self, lhs, rhs)
def _EvaluateOperator(self, op, lhs, rhs)
def _ParseSubExpression(self, subexpression)