00001
00002
00003
00004 class TableSelector:
00005
00006 def __init__(self, col_types, col_names, query):
00007
00008 self.col_types=col_types
00009 self.col_names=col_names
00010 self.query=query
00011
00012 self.valid_operators=dict()
00013
00014 self.valid_operators['and']={'type':'boolean','precedence':5}
00015 self.valid_operators['or']={'type':'boolean','precedence':6}
00016
00017 self.valid_operators['!']={'type':'comparison','precedence':4,'extension':'!='}
00018 self.valid_operators['!=']={'type':'comparison','precedence':4,'extension':None}
00019 self.valid_operators['=']={'type':'comparison','precedence':4,'extension':'=='}
00020 self.valid_operators['==']={'type':'comparison','precedence':4,'extension':None}
00021 self.valid_operators['<=']={'type':'comparison','precedence':3,'extension':None}
00022 self.valid_operators['>=']={'type':'comparison','precedence':3,'extension':None}
00023 self.valid_operators['>']={'type':'comparison','precedence':3,'extension':'>='}
00024 self.valid_operators['<']={'type':'comparison','precedence':3,'extension':'<='}
00025
00026 self.valid_operators['+']={'type':'arithmetic','precedence':2}
00027 self.valid_operators['-']={'type':'arithmetic','precedence':2}
00028 self.valid_operators['/']={'type':'arithmetic','precedence':1}
00029 self.valid_operators['*']={'type':'arithmetic','precedence':1}
00030
00031 self.valid_operators['(']={'type':'left_bracket','precedence':float('NaN')}
00032 self.valid_operators['[']={'type':'left_bracket','precedence':float('NaN')}
00033 self.valid_operators['{']={'type':'left_bracket','precedence':float('NaN')}
00034 self.valid_operators[')']={'type':'right_bracket','precedence':float('NaN')}
00035 self.valid_operators[']']={'type':'right_bracket','precedence':float('NaN')}
00036 self.valid_operators['}']={'type':'right_bracket','precedence':float('NaN')}
00037
00038 self.split_expression=self._ExpressionLexer(self.query)
00039 self.parsed_expression=self._ParseExpression(self.split_expression)
00040 self.rpn_expression=self._ShuntingYard(self.parsed_expression)
00041
00042 self.tab_indices=list()
00043 self.exp_indices=list()
00044
00045
00046 for i, exp in enumerate(self.rpn_expression):
00047 if exp in self.col_names:
00048 self.tab_indices.append(self._GetIndex(exp))
00049 self.exp_indices.append(i)
00050
00051 def EvaluateRow(self,row):
00052 for ti, ei in zip(self.tab_indices, self.exp_indices):
00053
00054 if row[ti]!=row[ti]:
00055 self.rpn_expression[ei]=None
00056 else:
00057 self.rpn_expression[ei] = row[ti]
00058 if self._EvaluateRPN(list(self.rpn_expression)):
00059 return True
00060 return False
00061
00062 def _GetIndex(self, col):
00063 if col not in self.col_names:
00064 raise ValueError('Table Selector has no column named "%s"' % col)
00065 return self.col_names.index(col)
00066
00067 def _EvaluateAnd(self, lhs, rhs):
00068 return lhs==True and rhs==True
00069
00070 def _EvaluateOr(self, lhs, rhs):
00071 return lhs==True or rhs==True
00072
00073 def _EvaluateEqual(self, lhs, rhs):
00074 return lhs==rhs
00075
00076 def _EvaluateNonEqual(self, lhs, rhs):
00077 return lhs!=rhs
00078
00079 def _EvaluateLower(self, lhs, rhs):
00080 if lhs==None or rhs==None:
00081 return False
00082 return lhs<rhs
00083
00084 def _EvaluateGreater(self, lhs, rhs):
00085 if lhs==None or rhs==None:
00086 return False
00087 return lhs>rhs
00088
00089 def _EvaluateLowerEqual(self, lhs, rhs):
00090 if lhs==None or rhs==None:
00091 return False
00092 return lhs<=rhs
00093
00094 def _EvaluateGreaterEqual(self, lhs, rhs):
00095 if lhs==None or rhs==None:
00096 return False
00097 return lhs>=rhs
00098
00099 def _EvaluateAdd(self, lhs, rhs):
00100 if lhs==None or rhs==None:
00101 return None
00102 return lhs+rhs
00103
00104 def _EvaluateSubtract(self, lhs, rhs):
00105 if lhs==None or rhs==None:
00106 return None
00107 return lhs-rhs
00108
00109 def _EvaluateMultiply(self, lhs, rhs):
00110 if lhs==None or rhs==None:
00111 return None
00112 return lhs*rhs
00113
00114 def _EvaluateDivide(self, lhs, rhs):
00115 if lhs==None or rhs==None:
00116 return None
00117 return lhs/rhs
00118
00119
00120 def _EvaluateOperator(self, op, lhs, rhs):
00121
00122
00123
00124 if op=='+':
00125 return self._EvaluateAdd(lhs, rhs)
00126 elif op=='-':
00127 return self._EvaluateSubtract(lhs, rhs)
00128 elif op=='/':
00129 return self._EvaluateDivide(lhs, rhs)
00130 elif op=='*':
00131 return self._EvaluateMultiply(lhs, rhs)
00132 elif op=='and':
00133 return self._EvaluateAnd(lhs, rhs)
00134 elif op=='or':
00135 return self._EvaluateOr(lhs, rhs)
00136 elif op=='=' or op=='==':
00137 return self._EvaluateEqual(lhs, rhs)
00138 elif op=='!=' or op=='!':
00139 return self._EvaluateNonEqual(lhs, rhs)
00140 elif op=='<':
00141 return self._EvaluateLower(lhs, rhs)
00142 elif op=='>':
00143 return self._EvaluateGreater(lhs, rhs)
00144 elif op=='<=':
00145 return self._EvaluateLowerEqual(lhs, rhs)
00146 elif op=='>=':
00147 return self._EvaluateGreaterEqual(lhs, rhs)
00148
00149 else:
00150 raise ValueError('Unknown operator: '+op)
00151
00152 def _EvaluateRPN(self, RPNExp):
00153
00154 stack=list()
00155 while True:
00156 if len(RPNExp)==0:
00157 break
00158 exp=RPNExp.pop(0)
00159 if exp in self.valid_operators:
00160 if len(stack)<2:
00161 raise ValueError('Cannot evaluate operator on less than two operands!')
00162 rhs=stack.pop()
00163 lhs=stack.pop()
00164 result=self._EvaluateOperator(exp, lhs, rhs)
00165 if result==None:
00166 return False
00167 stack.append(result)
00168 else:
00169 stack.append(exp)
00170 if len(stack)>1:
00171 raise ValueError('Too many operands for given operators!')
00172 return stack.pop()
00173
00174 def _ShuntingYard(self, split_expression):
00175
00176
00177
00178
00179
00180 output_stack=list()
00181 operator_stack=list()
00182
00183 while True:
00184 if len(split_expression)==0:
00185 while True:
00186 if len(operator_stack)==0:
00187 break
00188 if self.valid_operators[operator_stack[-1]]['type'] in ['left_bracket','right_bracket']:
00189 raise ValueError('Parenthesis mismatch!')
00190 output_stack.append(operator_stack.pop())
00191 break
00192
00193 exp=split_expression.pop(0)
00194
00195 if exp in self.valid_operators:
00196 if self.valid_operators[exp]['type']=='left_bracket':
00197 operator_stack.append(exp)
00198 continue
00199
00200 if exp in self.valid_operators:
00201 if self.valid_operators[exp]['type'] == 'right_bracket':
00202 while True:
00203 if len(operator_stack)==0:
00204 raise ValueError('Parenthesis mismatch!')
00205 if self.valid_operators[operator_stack[-1]]['type']=='left_bracket':
00206 operator_stack.pop()
00207 break
00208 output_stack.append(operator_stack.pop())
00209 continue
00210
00211 if exp in self.valid_operators:
00212 prec=self.valid_operators[exp]['precedence']
00213 while len(operator_stack)>0:
00214 if self.valid_operators[operator_stack[-1]]['type']=='left_bracket':
00215 break
00216 elif prec>=self.valid_operators[operator_stack[-1]]['precedence']:
00217 output_stack.append(operator_stack.pop())
00218 else:
00219 break
00220 operator_stack.append(exp)
00221 continue
00222 output_stack.append(exp)
00223
00224 return output_stack
00225
00226 def _ParseSubExpression(self, subexpression):
00227
00228 valid_types={'float':'numeric','int':'numeric','string':'string','bool':'bool'}
00229
00230 column_names=list()
00231 column_types=list()
00232
00233 final_expression=list()
00234
00235
00236 for item in subexpression:
00237 if item in self.col_names:
00238 column_names.append(item)
00239 column_types.append(valid_types[self.col_types[self._GetIndex(item)]])
00240
00241 unique_type=list(set(column_types))
00242 if len(unique_type)>1:
00243 raise ValueError('Try to compare columns '+','.join(column_names)+' which have inconsistent types!')
00244 if len(unique_type)==0:
00245 raise ValueError('Try to evaluate subexpression '+' '.join(subexpression)+' that contains no valid column name of current table!')
00246
00247 for item in subexpression:
00248 if item in self.valid_operators:
00249 final_expression.append(item)
00250 continue
00251 if item in column_names:
00252 final_expression.append(item)
00253 continue
00254 if unique_type[0]=='numeric':
00255 if item in ['NaN','nan','None','none']:
00256 final_expression.append(None)
00257 continue
00258 else:
00259 try:
00260 final_expression.append(float(item))
00261 continue
00262 except:
00263 raise RuntimeError('Tried to cast '+item+' into numeric type to compare with column(s) '+','.join(column_names)+', but failed!')
00264 elif unique_type[0]=='bool':
00265 if item in ['None','none']:
00266 final_expression.append(None)
00267 continue
00268 if item in ['true','True']:
00269 final_expression.append(True)
00270 continue
00271 if item in ['false','False']:
00272 final_expression.append(False)
00273 continue
00274 raise RuntimeError('Tried to cast '+item+' into boolean type to compare with column(s) '+','.join(column_names)+', but failed!')
00275 elif unique_type[0]=='string':
00276 final_expression.append(item)
00277
00278 return final_expression
00279
00280
00281 def _ParseExpression(self, split_expression):
00282
00283
00284 for i in range(len(split_expression)-3):
00285 if (split_expression[i] in self.valid_operators) and (split_expression[i+2] in self.valid_operators):
00286 if self.valid_operators[split_expression[i]]['precedence']==self.valid_operators[split_expression[i+2]]['precedence']:
00287 raise ValueError('Cannot Evaluate '+' '.join(split_expression[i:i+3])+' since both operators have same precedence!')
00288
00289
00290
00291
00292 temp_split_expression=list()
00293 skips=0
00294
00295 for i in range(len(split_expression)):
00296 if skips>0:
00297 skips-=1
00298 continue
00299 if ',' in split_expression[i]:
00300
00301 if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=':
00302 raise ValueError('Can evaluate \',\' operator only in combination with \"=\" in subexpression ',' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))]))
00303
00304 single_operands=split_expression[i].split(',')
00305
00306 if split_expression[max(0,i-1)]=='=':
00307 if i-2<0:
00308 raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' starting with an \'=\'')
00309 main_operand=split_expression[i-2]
00310 temp_split_expression.pop()
00311 temp_split_expression.pop()
00312 skips=0
00313
00314 else:
00315 if i+2>len(split_expression)-1:
00316 raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' ending with an \'=\'')
00317 main_operand=split_expression[i+2]
00318 skips=2
00319
00320 temp_expression=list(['('])
00321 temp_expression+=' or '.join(['%s = %s'% (a,b) for (a,b) in zip(len(single_operands)*[main_operand],single_operands)]).split()
00322 temp_expression.append(')')
00323 temp_split_expression+=temp_expression
00324 continue
00325
00326 temp_split_expression.append(split_expression[i])
00327
00328 split_expression=temp_split_expression
00329
00330
00331
00332
00333 temp_split_expression=list()
00334 skips=0
00335
00336 for i in range(len(split_expression)):
00337 if skips>0:
00338 skips-=1
00339 continue
00340 if ':' in split_expression[i]:
00341 if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=':
00342 raise ValueError('Can evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' \':\' sign is only allowed in combination with \'=\'')
00343 if len(split_expression[i].split(':')) != 2:
00344 raise ValueError('Can operate \':\' operator only on 2 operands in subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))]))
00345
00346 lhs=split_expression[i].split(':')[0]
00347 rhs=split_expression[i].split(':')[1]
00348
00349 template_expression=['(','','<=','','and','','<=','',')']
00350
00351 if split_expression[max(0,i-1)] == '=':
00352 if i-2<0:
00353 raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' starting with an \'=\'')
00354 temp_split_expression.pop()
00355 temp_split_expression.pop()
00356 template_expression[3]=split_expression[i-2]
00357 template_expression[5]=split_expression[i-2]
00358 skips=0
00359
00360 else:
00361 if i+2>len(split_expression)-1:
00362 raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' ending with an \'=\'')
00363 template_expression[3]=split_expression[i+2]
00364 template_expression[5]=split_expression[i+2]
00365 skips=2
00366
00367 template_expression[1]=lhs
00368 template_expression[7]=rhs
00369 temp_split_expression+=template_expression
00370 continue
00371
00372 temp_split_expression.append(split_expression[i])
00373
00374 split_expression=temp_split_expression
00375
00376
00377
00378
00379 final_expression=list()
00380 subexpression=list()
00381
00382 for item in split_expression:
00383 if item in self.valid_operators:
00384 if self.valid_operators[item]['type'] in ['boolean','left_bracket','right_bracket']:
00385 if len(subexpression)>0:
00386
00387 final_expression+=self._ParseSubExpression(subexpression)
00388 subexpression=list()
00389 final_expression.append(item)
00390 continue
00391 subexpression.append(item)
00392
00393 if len(subexpression)>0:
00394 final_expression+=self._ParseSubExpression(subexpression)
00395
00396 return final_expression
00397
00398
00399 def _ExpressionLexer(self, expression):
00400
00401
00402
00403
00404 split_expression=list()
00405
00406 actual_position=0
00407 eaten_stuff=''
00408
00409 while True:
00410
00411 if actual_position>=len(expression):
00412 if len(eaten_stuff)>0:
00413 split_expression.append(eaten_stuff)
00414 return split_expression
00415
00416 token=expression[actual_position]
00417
00418 if token.isspace():
00419 if len(eaten_stuff)>0:
00420 split_expression.append(eaten_stuff)
00421 eaten_stuff=''
00422 actual_position+=1
00423 continue
00424
00425
00426
00427 if token in self.valid_operators:
00428 if self.valid_operators[token]['type']=='left_bracket' or self.valid_operators[token]['type']=='right_bracket':
00429 if len(eaten_stuff)>0:
00430 split_expression.append(eaten_stuff)
00431 eaten_stuff=''
00432 split_expression.append(token)
00433 actual_position+=1
00434 continue
00435
00436 if self.valid_operators[token]['type']=='arithmetic':
00437 if len(eaten_stuff)>0:
00438 split_expression.append(eaten_stuff)
00439 eaten_stuff=''
00440 split_expression.append(token)
00441 actual_position+=1
00442 continue
00443
00444 if self.valid_operators[token]['type']=='comparison':
00445 if len(eaten_stuff)>0:
00446 split_expression.append(eaten_stuff)
00447 eaten_stuff=''
00448 if self.valid_operators[token]['extension']!=None:
00449 if actual_position+len(self.valid_operators[token]['extension'])<len(expression):
00450 if expression[actual_position:actual_position+len(self.valid_operators[token]['extension'])]==self.valid_operators[token]['extension']:
00451 split_expression.append(self.valid_operators[token]['extension'])
00452 actual_position+=len(self.valid_operators[token]['extension'])
00453 continue
00454 split_expression.append(token)
00455 actual_position+=1
00456 continue
00457
00458 eaten_stuff+=token
00459 actual_position+=1