9 from ost
import LogError, LogWarning, LogInfo, LogVerbose
12 return col_name.replace(
'_',
' ')
15 if isinstance(value, TableCol)
or isinstance(value, BinaryColExpr):
24 value=value.strip().upper()
25 return value
in (
'',
'NULL',
'NONE',
'NA')
31 if isinstance(value, TableCol)
or isinstance(value, BinaryColExpr):
40 possibilities=set([
'bool',
'int',
'float'])
42 str_ele=str(ele).upper()
46 if 'int' in possibilities:
50 possibilities.remove(
'int')
52 if 'float' in possibilities:
56 possibilities.remove(
'float')
57 if 'bool' in possibilities:
58 if str_ele
not in set([
'YES',
'NO',
'TRUE',
'FALSE']):
59 possibilities.remove(
'bool')
61 if len(possibilities)==0:
63 if len(possibilities)==2:
68 return possibilities.pop()
76 self.
lhs=itertools.cyle([self.
lhs])
78 self.
rhs=itertools.cycle([self.
rhs])
80 for l, r
in zip(self.
lhs, self.
rhs):
81 if l!=
None and r!=
None:
106 for row
in self._table.rows:
110 return len(self._table.rows)
113 return self._table.rows[index][self.
col_index]
116 self._table.rows[index][self.
col_index]=value
132 Essentially a named tuple, but allows column names that are not valid
133 python variable names.
136 self.__dict__[
'tab'] = weakref.proxy(tab)
137 self.__dict__[
'row_data'] = row_data
140 if type(col_name)==int:
141 return self.row_data[col_name]
142 return self.row_data[self.tab.GetColIndex(col_name)]
146 for k, v
in zip(self.__dict__[
'tab'].col_names, self.__dict__[
'row_data']):
147 s.append(
'%s=%s' % (k, str(v)))
152 return len(self.row_data)
155 if type(col_name)==int:
156 self.row_data[col_name] = val
158 self.row_data[self.tab.GetColIndex(col_name)] = val
161 if 'col_names' not in self.tab.__dict__
or col_name
not in self.tab.col_names:
162 raise AttributeError(col_name)
163 return self.row_data[self.tab.GetColIndex(col_name)]
166 if 'col_names' not in self.tab.__dict__
or col_name
not in self.tab.col_names:
167 raise AttributeError(col_name)
168 self.row_data[self.tab.GetColIndex(col_name)] = val
173 The table class provides convenient access to data in tabular form. An empty
174 table can be easily constructed as follows
176 .. code-block:: python
180 If you want to add columns directly when creating the table, column names
181 and *column types* can be specified as follows
183 .. code-block:: python
185 tab = Table(['nameX','nameY','nameZ'], 'sfb')
187 this will create three columns called nameX, nameY and nameZ of type string,
188 float and bool, respectively. There will be no data in the table and thus,
189 the table will not contain any rows.
191 The following *column types* are supported:
202 If you want to add data to the table in addition, use the following:
204 .. code-block:: python
206 tab=Table(['nameX','nameY','nameZ'],
208 nameX = ['a','b','c'],
209 nameY = [0.1, 1.2, 3.414],
210 nameZ = [True, False, False])
212 if values for one column is left out, they will be filled with NA, but if
213 values are specified, all values must be specified (i.e. same number of
218 SUPPORTED_TYPES=(
'int',
'float',
'bool',
'string',)
221 def __init__(self, col_names=[], col_types=None, **kwargs):
231 self.
col_names=[v
for v
in list(kwargs.keys())]
242 if 'col_names' not in self.__dict__
or col_name
not in self.
col_names:
243 raise AttributeError(col_name)
247 def _ParseColTypes(types, exp_num=None):
251 short2long = {
's' :
'string',
'i':
'int',
'b' :
'bool',
'f' :
'float'}
252 allowed_short = list(short2long.keys())
253 allowed_long = list(short2long.values())
260 types = types.lower()
263 if types
in allowed_long:
264 type_list.append(types)
265 elif types
in allowed_short:
266 type_list.append(short2long[types])
269 elif types.find(
',')!=-1:
270 for t
in types.split(
','):
271 if t
in allowed_long:
273 elif t
in allowed_short:
274 type_list.append(short2long[t])
276 raise ValueError(
'Unknown type %s in types %s'%(t,types))
281 if t
in allowed_short:
282 type_list.append(short2long[t])
284 raise ValueError(
'Unknown type %s in types %s'%(t,types))
288 raise ValueError(
'Col type %s must be string or list'%types)
296 if t
in allowed_long:
298 elif t
in allowed_short:
299 type_list.append(short2long[t])
301 raise ValueError(
'Unknown type %s in types %s'%(t,types))
305 raise ValueError(
'Col type %s must be string or list'%types)
308 if len(type_list)!=exp_num:
309 raise ValueError(
'Parsed number of col types (%i) differs from ' + \
310 'expected (%i) in types %s'%(len(type_list),exp_num,types))
316 Set name of the table
319 :type name: :class:`str`
331 Rename column *old_name* to *new_name*.
333 :param old_name: Name of the old column
334 :param new_name: Name of the new column
335 :raises: :exc:`ValueError` when *old_name* is not a valid column
337 if old_name==new_name:
342 def _Coerce(self, value, ty):
344 Try to convert values (e.g. from :class:`str` type) to the specified type
346 :param value: the value
347 :type value: any type
349 :param ty: name of type to convert it to (i.e. *int*, *float*, *string*,
351 :type ty: :class:`str`
353 if value==
'NA' or value==
None:
362 if isinstance(value, str)
or isinstance(value, str):
363 if value.upper()
in (
'FALSE',
'NO',):
367 raise ValueError(
'Unknown type %s' % ty)
371 Returns the column index for the column with the given name.
373 :raises: ValueError if no column with the name is found.
376 raise ValueError(
'Table has no column named "%s"' % col)
377 return self.col_names.index(col)
381 Returns a list containing all column names.
387 Returns a list of column names matching the regex.
389 :param regex: regex pattern
390 :type regex: :class:`str`
392 :returns: :class:`list` of column names (:class:`str`)
396 matches = re.search(regex, name)
398 matching_names.append(name)
399 return matching_names
403 Checks if the column with a given name is present in the table.
418 value=itertools.cycle([value])
419 for r, v
in zip(self.
rows, value):
422 def ToString(self, float_format='%.3f', int_format='%d', rows=None):
424 Convert the table into a string representation.
426 The output format can be modified for int and float type columns by
427 specifying a formatting string for the parameters *float_format* and
430 The option *rows* specify the range of rows to be printed. The parameter
431 must be a type that supports indexing (e.g. a :class:`list`) containing the
432 start and end row *index*, e.g. [start_row_idx, end_row_idx].
434 :param float_format: formatting string for float columns
435 :type float_format: :class:`str`
437 :param int_format: formatting string for int columns
438 :type int_format: :class:`str`
440 :param rows: iterable containing start and end row *index*
441 :type rows: iterable containing :class:`ints <int>`
443 widths=[len(cn)
for cn
in self.
col_names]
446 sel_rows=self.
rows[rows[0]:rows[1]]
448 for i, (ty, col)
in enumerate(zip(self.
col_types, row)):
450 widths[i]=max(widths[i], len(
'NA'))
452 widths[i]=max(widths[i], len(float_format % col))
454 widths[i]=max(widths[i], len(int_format % col))
456 widths[i]=max(widths[i], len(str(col)))
459 s+=
''.join([
'# %s\n' % l
for l
in self.comment.split(
'\n')])
460 total_width=sum(widths)+2*len(widths)
461 for width, col_name
in zip(widths, self.
col_names):
462 s+=col_name.center(width+2)
463 s+=
'\n%s\n' % (
'-'*total_width)
465 for width, ty, col
in zip(widths, self.
col_types, row):
468 cs=
'NA'.center(width+2)
470 cs=(float_format % col).rjust(width+2)
472 cs=(int_format % col).rjust(width+2)
474 cs=
' '+str(col).ljust(width+1)
485 Statistics for column %(col)s
487 Number of Rows : %(num)d
488 Number of Rows Not None: %(num_non_null)d
491 Standard Deviation : %(stddev)f
497 'num' : len(self.
rows),
498 'num_non_null' : self.
Count(col),
499 'median' : self.
Median(col),
500 'mean' : self.
Mean(col),
501 'stddev' : self.
StdDev(col),
502 'min' : self.
Min(col),
503 'max' : self.
Max(col),
507 def _AddRowsFromDict(self, d, overwrite=None):
509 Add one or more rows from a :class:`dictionary <dict>`.
511 If *overwrite* is not None and set to an existing column name, the specified
512 column in the table is searched for the first occurrence of a value matching
513 the value of the column with the same name in the dictionary. If a matching
514 value is found, the row is overwritten with the dictionary. If no matching
515 row is found, a new row is appended to the table.
517 :param d: dictionary containing the data
518 :type d: :class:`dict`
520 :param overwrite: column name to overwrite existing row if value in
521 column *overwrite* matches
522 :type overwrite: :class:`str`
524 :raises: :class:`ValueError` if multiple rows are added but the number of
525 data items is different for different columns.
528 idxs = [self.
GetColIndex(k)
for k
in list(d.keys())]
532 for k,v
in d.items():
538 elif old_len!=len(v):
539 raise ValueError(
"Cannot add rows: length of data must be equal " + \
540 "for all columns in %s"%str(d))
543 for i,data
in enumerate(zip(*list(d.values()))):
544 new_row = [
None for a
in range(len(self.
col_names))]
545 for idx,v
in zip(idxs,data):
552 for i,r
in enumerate(self.
rows):
553 if r[overwrite_idx]==new_row[overwrite_idx]:
554 for j,e
in enumerate(self.
rows[i]):
557 self.
rows[i] = new_row
562 if not overwrite
or not added:
563 self.rows.append(new_row)
567 Two-sided test for the null-hypothesis that two related samples
568 have the same average (expected values).
570 :param col_a: First column
571 :type col_a: :class:`str`
572 :param col_b: Second column
573 :type col_b: :class:`str`
575 :returns: P-value between 0 and 1 that the two columns have the
576 same average. The smaller the value, the less related the two
579 from scipy.stats
import ttest_rel
582 for x, y
in self.
Zip(col_a, col_b):
583 if x!=
None and y!=
None:
586 result = ttest_rel(xs, ys)
591 Add a row to the table.
593 *data* may either be a dictionary or a list-like object:
595 - If *data* is a dictionary, the keys in the dictionary must match the
596 column names. Columns not found in the dict will be initialized to None.
597 If the dict contains list-like objects, multiple rows will be added, if
598 the number of items in all list-like objects is the same, otherwise a
599 :class:`ValueError` is raised.
601 - If *data* is a list-like object, the row is initialized from the values
602 in *data*. The number of items in *data* must match the number of
603 columns in the table. A :class:`ValuerError` is raised otherwise. The
604 values are added in the order specified in the list, thus, the order of
605 the data must match the columns.
607 If *overwrite* is not None and set to an existing column name, the specified
608 column in the table is searched for the first occurrence of a value matching
609 the value of the column with the same name in the dictionary. If a matching
610 value is found, the row is overwritten with the dictionary. If no matching
611 row is found, a new row is appended to the table.
613 :param data: data to add
614 :type data: :class:`dict` or *list-like* object
616 :param overwrite: column name to overwrite existing row if value in
617 column *overwrite* matches
618 :type overwrite: :class:`str`
620 :raises: :class:`ValueError` if *list-like* object is used and number of
621 items does *not* match number of columns in table.
623 :raises: :class:`ValueError` if *dict* is used and multiple rows are added
624 but the number of data items is different for different columns.
626 **Example:** add multiple data rows to a subset of columns using a dictionary
628 .. code-block:: python
630 # create table with three float columns
631 tab = Table(['x','y','z'], 'fff')
634 data = {'x': [1.2, 1.6], 'z': [1.6, 5.3]}
639 will produce the table
649 # overwrite the row with x=1.2 and add row with x=1.9
650 data = {'x': [1.2, 1.9], 'z': [7.9, 3.5]}
651 tab.AddRow(data, overwrite='x')
655 will produce the table
670 msg=
'data array must have %d elements, not %d'
671 raise ValueError(msg % (len(self.
col_names), len(data)))
678 for i,r
in enumerate(self.
rows):
679 if r[overwrite_idx]==new_row[overwrite_idx]:
680 self.
rows[i] = new_row
685 if not overwrite
or not added:
686 self.rows.append(new_row)
690 Remove column with the given name from the table.
692 :param col: name of column to remove
693 :type col: :class:`str`
698 for row
in self.
rows:
701 def AddCol(self, col_name, col_type, data=None):
703 Add a column to the right of the table.
705 :param col_name: name of new column
706 :type col_name: :class:`str`
708 :param col_type: type of new column (long versions: *int*, *float*, *bool*,
709 *string* or short versions: *i*, *f*, *b*, *s*)
710 :type col_type: :class:`str`
712 :param data: data to add to new column
713 :type data: scalar or iterable
717 .. code-block:: python
719 tab = Table(['x'], 'f', x=range(5))
720 tab.AddCol('even', 'bool', itertools.cycle([True, False]))
724 will produce the table
737 If data is a constant instead of an iterable object, it's value
738 will be written into each row:
740 .. code-block:: python
742 tab = Table(['x'], 'f', x=range(5))
743 tab.AddCol('num', 'i', 1)
747 will produce the table
760 As a special case, if there are no previous rows, and data is not
761 None, rows are added for every item in data.
765 raise ValueError(
'Column with name %s already exists'%col_name)
768 self.col_names.append(col_name)
769 self.col_types.append(col_type)
773 for row
in self.
rows:
776 if hasattr(data,
'__len__')
and len(data)!=len(self.
rows):
779 raise ValueError(
'Length of data (%i) must correspond to number of '%len(data) +\
780 'existing rows (%i)'%len(self.
rows))
781 for row, d
in zip(self.
rows, data):
784 elif data!=
None and len(self.
col_names)==1:
786 self.
AddRow({col_name : data})
789 self.
AddRow({col_name : v})
793 Returns a filtered table only containing rows matching all the predicates
794 in kwargs and args For example,
796 .. code-block:: python
798 tab.Filter(town='Basel')
800 will return all the rows where the value of the column "town" is equal to
801 "Basel". Several predicates may be combined, i.e.
803 .. code-block:: python
805 tab.Filter(town='Basel', male=True)
807 will return the rows with "town" equal to "Basel" and "male" equal to true.
808 args are unary callables returning true if the row should be included in the
809 result and false if not.
812 for row
in self.
rows:
818 for key, val
in kwargs.items():
830 Returns a new table object containing all rows matching a logical query
833 *query* is a string containing the logical expression, that will be
834 evaluated for every row.
836 Operands have to be the name of a column or an expression that can be
837 parsed to float, int, bool or string.
838 Valid operators are: and, or, !=, !, <=, >=, ==, =, <, >, +, -, \\*, /
840 .. code-block:: python
842 subtab = tab.Select('col_a>0.5 and (col_b=5 or col_c=5)')
844 The selection query should be self explaining. Allowed parenthesis are:
845 (), [], {}, whereas parenthesis mismatches get recognized. Expressions like
846 '3<=col_a>=col_b' throw an error, due to problems in figuring out the
849 There are two special expressions:
851 .. code-block:: python
853 #selects rows, where 1.0<=col_a<=1.5
854 subtab = tab.Select('col_a=1.0:1.5')
856 #selects rows, where col_a=1 or col_a=2 or col_a=3
857 subtab = tab.Select('col_a=1,2,3')
859 Only consistent types can be compared. If col_a is of type string and col_b
860 is of type int, following expression would throw an error: 'col_a<col_b'
864 from .table_selector
import TableSelector
866 raise ImportError(
"Tried to import from the file table_selector.py, but could not find it!")
872 for row
in self.
rows:
873 if selector.EvaluateRow(row):
874 selected_tab.AddRow(row)
880 def _LoadOST(stream_or_filename):
881 fieldname_pattern=re.compile(
r'(?P<name>[^[]+)(\[(?P<type>\w+)\])?')
882 values_pattern=re.compile(
"([^\" ]+|\"[^\"]*\")+")
884 if not hasattr(stream_or_filename,
'read'):
885 stream=open(stream_or_filename,
'r')
888 stream=stream_or_filename
893 if line.startswith(
'#'):
901 for col
in line.split():
902 match=fieldname_pattern.match(col)
904 if match.group(
'type'):
905 fieldtypes.append(match.group(
'type'))
907 fieldtypes.append(
'string')
908 fieldnames.append(match.group(
'name'))
910 tab=
Table(fieldnames, fieldtypes)
911 except Exception
as e:
918 tab.AddRow([x.strip(
'"')
for x
in values_pattern.findall(line)])
922 raise IOError(
"Cannot read table from empty stream")
925 def _GuessColumnTypes(self):
926 for col_idx
in range(len(self.
col_names)):
928 for row
in self.
rows:
929 for idx
in range(len(row)):
933 def _LoadCSV(stream_or_filename, sep):
935 if not hasattr(stream_or_filename,
'read'):
936 stream=open(stream_or_filename,
'r')
939 stream=stream_or_filename
940 reader=csv.reader(stream, delimiter=sep)
946 tab=
Table(header, types)
953 raise IOError(
'trying to load table from empty CSV stream/file')
955 tab._GuessColumnTypes()
959 def _LoadPickle(stream_or_filename):
961 if not hasattr(stream_or_filename,
'read'):
962 stream=open(stream_or_filename,
'rb')
965 stream=stream_or_filename
966 tab = pickle.load(stream)
972 def _GuessFormat(filename):
974 filename = filename.name
975 except AttributeError
as e:
977 if filename.endswith(
'.csv'):
979 elif filename.endswith(
'.pickle'):
986 def Load(stream_or_filename, format='auto', sep=','):
988 Load table from stream or file with given name.
990 By default, the file format is set to *auto*, which tries to guess the file
991 format from the file extension. The following file extensions are
994 ============ ======================
995 extension recognized format
996 ============ ======================
997 .csv comma separated values
998 .pickle pickled byte stream
999 <all others> ost-specific format
1000 ============ ======================
1002 Thus, *format* must be specified for reading file with different filename
1005 The following file formats are understood:
1009 This is an ost-specific, but still human readable file format. The file
1010 (stream) must start with header line of the form
1012 col_name1[type1] <col_name2[type2]>...
1014 The types given in brackets must be one of the data types the
1015 :class:`Table` class understands. Each following line in the file then must
1016 contains exactly the same number of data items as listed in the header. The
1017 data items are automatically converted to the column format. Lines starting
1018 with a '#' and empty lines are ignored.
1022 Deserializes the table from a pickled byte stream.
1026 Reads the table from comma separated values stream. Since there is no
1027 explicit type information in the csv file, the column types are guessed,
1028 using the following simple rules:
1030 * if all values are either NA/NULL/NONE the type is set to string.
1031 * if all non-null values are convertible to float/int the type is set to
1033 * if all non-null values are true/false/yes/no, the value is set to bool.
1034 * for all other cases, the column type is set to string.
1036 :returns: A new :class:`Table` instance
1038 format=format.lower()
1040 format = Table._GuessFormat(stream_or_filename)
1043 return Table._LoadOST(stream_or_filename)
1045 return Table._LoadCSV(stream_or_filename, sep=sep)
1046 if format==
'pickle':
1047 return Table._LoadPickle(stream_or_filename)
1048 raise ValueError(
'unknown format ""' % format)
1052 Performs an in-place sort of the table, based on column *by*.
1054 :param by: column name by which to sort
1055 :type by: :class:`str`
1057 :param order: ascending (``-``) or descending (``+``) order
1058 :type order: :class:`str` (i.e. *+*, *-*)
1064 def _key_cmp(lhs, rhs):
1069 if a
is None or b
is None:
1070 if a
is None and b
is not None:
1072 if b
is None and a
is not None:
1075 return sign*((a > b) - (a < b))
1078 self.
rows=sorted(self.
rows, key=functools.cmp_to_key(_key_cmp))
1082 Extract a list of all unique values from one column.
1084 :param col: column name
1085 :type col: :class:`str`
1087 :param ignore_nan: ignore all *None* values
1088 :type ignore_nan: :class:`bool`
1093 for row
in self.
rows:
1095 if item!=
None or ignore_nan==
False:
1096 if item
in seen:
continue
1103 Allows to conveniently iterate over a selection of columns, e.g.
1105 .. code-block:: python
1107 tab = Table.Load('...')
1108 for col1, col2 in tab.Zip('col1', 'col2'):
1113 .. code-block:: python
1115 tab = Table.Load('...')
1116 for col1, col2 in zip(tab['col1'], tab['col2']):
1119 return list(zip(*[self[arg]
for arg
in args]))
1121 def Plot(self, x, y=None, z=None, style='.', x_title=None, y_title=None,
1122 z_title=
None, x_range=
None, y_range=
None, z_range=
None,
1123 color=
None, plot_if=
None, legend=
None,
1124 num_z_levels=10, z_contour=
True, z_interpol=
'nn', diag_line=
False,
1125 labels=
None, max_num_labels=
None, title=
None, clear=
True, save=
False,
1128 Function to plot values from your table in 1, 2 or 3 dimensions using
1129 `Matplotlib <http://matplotlib.sourceforge.net>`__
1131 :param x: column name for first dimension
1132 :type x: :class:`str`
1134 :param y: column name for second dimension
1135 :type y: :class:`str`
1137 :param z: column name for third dimension
1138 :type z: :class:`str`
1140 :param style: symbol style (e.g. *.*, *-*, *x*, *o*, *+*, *\\**). For a
1141 complete list check (`matplotlib docu <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.plot>`__).
1142 :type style: :class:`str`
1144 :param x_title: title for first dimension, if not specified it is
1145 automatically derived from column name
1146 :type x_title: :class:`str`
1148 :param y_title: title for second dimension, if not specified it is
1149 automatically derived from column name
1150 :type y_title: :class:`str`
1152 :param z_title: title for third dimension, if not specified it is
1153 automatically derived from column name
1154 :type z_title: :class:`str`
1156 :param x_range: start and end value for first dimension (e.g. [start_x, end_x])
1157 :type x_range: :class:`list` of length two
1159 :param y_range: start and end value for second dimension (e.g. [start_y, end_y])
1160 :type y_range: :class:`list` of length two
1162 :param z_range: start and end value for third dimension (e.g. [start_z, end_z])
1163 :type z_range: :class:`list` of length two
1165 :param color: color for data (e.g. *b*, *g*, *r*). For a complete list check
1166 (`matplotlib docu <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.plot>`__).
1167 :type color: :class:`str`
1169 :param plot_if: callable which returnes *True* if row should be plotted. Is
1170 invoked like ``plot_if(self, row)``
1171 :type plot_if: callable
1173 :param legend: legend label for data series
1174 :type legend: :class:`str`
1176 :param num_z_levels: number of levels for third dimension
1177 :type num_z_levels: :class:`int`
1179 :param diag_line: draw diagonal line
1180 :type diag_line: :class:`bool`
1182 :param labels: column name containing labels to put on x-axis for one
1184 :type labels: :class:`str`
1186 :param max_num_labels: limit maximum number of labels
1187 :type max_num_labels: :class:`int`
1189 :param title: plot title, if not specified it is automatically derived from
1190 plotted column names
1191 :type title: :class:`str`
1193 :param clear: clear old data from plot
1194 :type clear: :class:`bool`
1196 :param save: filename for saving plot
1197 :type save: :class:`str`
1199 :param z_contour: draw contour lines
1200 :type z_contour: :class:`bool`
1202 :param z_interpol: interpolation method for 3-dimensional plot (one of 'nn',
1204 :type z_interpol: :class:`str`
1206 :param \\*\\*kwargs: additional arguments passed to matplotlib
1208 :returns: the ``matplotlib.pyplot`` module
1210 **Examples:** simple plotting functions
1212 .. code-block:: python
1214 tab = Table(['a','b','c','d'],'iffi', a=range(5,0,-1),
1215 b=[x/2.0 for x in range(1,6)],
1216 c=[math.cos(x) for x in range(0,5)],
1219 # one dimensional plot of column 'd' vs. index
1223 # two dimensional plot of 'a' vs. 'c'
1224 plt = tab.Plot('a', y='c', style='o-')
1227 # three dimensional plot of 'a' vs. 'c' with values 'b'
1228 plt = tab.Plot('a', y='c', z='b')
1229 # manually save plot to file
1230 plt.savefig("plot.png")
1233 import matplotlib.pyplot
as plt
1234 import matplotlib.mlab
as mlab
1242 plt.figure(figsize=[8, 6])
1265 if x_range
and (
IsScalar(x_range)
or len(x_range)!=2):
1266 raise ValueError(
'parameter x_range must contain exactly two elements')
1267 if y_range
and (
IsScalar(y_range)
or len(y_range)!=2):
1268 raise ValueError(
'parameter y_range must contain exactly two elements')
1269 if z_range
and (
IsScalar(z_range)
or len(z_range)!=2):
1270 raise ValueError(
'parameter z_range must contain exactly two elements')
1273 kwargs[
'color']=color
1275 kwargs[
'label']=legend
1279 for row
in self.
rows:
1280 if row[idx1]!=
None and row[idx2]!=
None and row[idx3]!=
None:
1281 if plot_if
and not plot_if(self, row):
1283 xs.append(row[idx1])
1284 ys.append(row[idx2])
1285 zs.append(row[idx3])
1288 z_spacing = (z_range[1] - z_range[0]) / num_z_levels
1292 z_spacing = (self.
Max(z) - l) / num_z_levels
1294 for i
in range(0,num_z_levels+1):
1298 xi = np.linspace(min(xs),max(xs),len(xs)*10)
1299 yi = np.linspace(min(ys),max(ys),len(ys)*10)
1300 zi = mlab.griddata(xs, ys, zs, xi, yi, interp=z_interpol)
1303 plt.contour(xi,yi,zi,levels,linewidths=0.5,colors=
'k')
1305 plt.contourf(xi,yi,zi,levels,cmap=plt.cm.jet)
1306 plt.colorbar(ticks=levels)
1310 for row
in self.
rows:
1311 if row[idx1]!=
None and row[idx2]!=
None:
1312 if plot_if
and not plot_if(self, row):
1314 xs.append(row[idx1])
1315 ys.append(row[idx2])
1316 plt.plot(xs, ys, style, **kwargs)
1323 for row
in self.
rows:
1325 if plot_if
and not plot_if(self, row):
1327 xs.append(row[idx1])
1329 label_vals.append(row[label_idx])
1330 plt.plot(xs, style, **kwargs)
1334 if len(label_vals)>max_num_labels:
1335 interval = int(math.ceil(float(len(label_vals))/max_num_labels))
1336 label_vals = label_vals[::interval]
1337 plt.xticks(np.arange(0, len(xs), interval), label_vals, rotation=45,
1342 title =
'%s of %s vs. %s' % (nice_z, nice_x, nice_y)
1344 title =
'%s vs. %s' % (nice_x, nice_y)
1348 plt.title(title, size=
'x-large', fontweight=
'bold',
1349 verticalalignment=
'bottom')
1355 plt.xlabel(nice_x, size=
'x-large')
1357 plt.xlim(x_range[0], x_range[1])
1359 plt.ylim(y_range[0], y_range[1])
1361 plt.plot(x_range, y_range,
'-', color=
'black')
1363 plt.ylabel(nice_y, size=
'x-large')
1366 plt.ylim(y_range[0], y_range[1])
1368 plt.xlabel(x_title, size=
'x-large')
1369 plt.ylabel(nice_y, size=
'x-large')
1374 LogError(
"Function needs numpy and matplotlib, but I could not import it.")
1377 def PlotHistogram(self, col, x_range=None, num_bins=10, normed=False,
1378 histtype=
'stepfilled', align=
'mid', x_title=
None,
1379 y_title=
None, title=
None, clear=
True, save=
False,
1380 color=
None, y_range=
None):
1382 Create a histogram of the data in col for the range *x_range*, split into
1383 *num_bins* bins and plot it using Matplotlib.
1385 :param col: column name with data
1386 :type col: :class:`str`
1388 :param x_range: start and end value for first dimension (e.g. [start_x, end_x])
1389 :type x_range: :class:`list` of length two
1391 :param y_range: start and end value for second dimension (e.g. [start_y, end_y])
1392 :type y_range: :class:`list` of length two
1394 :param num_bins: number of bins in range
1395 :type num_bins: :class:`int`
1397 :param color: Color to be used for the histogram. If not set, color will be
1398 determined by matplotlib
1399 :type color: :class:`str`
1401 :param normed: normalize histogram
1402 :type normed: :class:`bool`
1404 :param histtype: type of histogram (i.e. *bar*, *barstacked*, *step*,
1405 *stepfilled*). See (`matplotlib docu <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.hist>`__).
1406 :type histtype: :class:`str`
1408 :param align: style of histogram (*left*, *mid*, *right*). See
1409 (`matplotlib docu <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.hist>`__).
1410 :type align: :class:`str`
1412 :param x_title: title for first dimension, if not specified it is
1413 automatically derived from column name
1414 :type x_title: :class:`str`
1416 :param y_title: title for second dimension, if not specified it is
1417 automatically derived from column name
1418 :type y_title: :class:`str`
1420 :param title: plot title, if not specified it is automatically derived from
1421 plotted column names
1422 :type title: :class:`str`
1424 :param clear: clear old data from plot
1425 :type clear: :class:`bool`
1427 :param save: filename for saving plot
1428 :type save: :class:`str`
1430 **Examples:** simple plotting functions
1432 .. code-block:: python
1434 tab = Table(['a'],'f', a=[math.cos(x*0.01) for x in range(100)])
1436 # one dimensional plot of column 'd' vs. index
1437 plt = tab.PlotHistogram('a')
1442 import matplotlib.pyplot
as plt
1445 if len(self.
rows)==0:
1449 kwargs[
'color']=color
1459 n, bins, patches = plt.hist(data, bins=num_bins, range=x_range,
1460 normed=normed, histtype=histtype, align=align,
1467 plt.xlabel(nice_x, size=
'x-large')
1474 plt.ylabel(nice_y, size=
'x-large')
1479 nice_title=
"Histogram of %s"%nice_x
1480 plt.title(nice_title, size=
'x-large', fontweight=
'bold')
1486 LogError(
"Function needs numpy and matplotlib, but I could not import it.")
1489 def _Max(self, col):
1490 if len(self.
rows)==0:
1494 if col_type==
'int' or col_type==
'float':
1495 max_val = -float(
'inf')
1496 elif col_type==
'bool':
1498 elif col_type==
'string':
1501 for i
in range(0, len(self.
rows)):
1502 val = self.
rows[i][idx]
1503 if val
and val > max_val:
1504 max_val = self.
rows[i][idx]
1506 return max_val, max_idx
1508 def PlotBar(self, cols=None, rows=None, xlabels=None, set_xlabels=True, xlabels_rotation='horizontal', y_title=None, title=None,
1509 colors=
None, width=0.8, bottom=0, legend=
False, legend_names=
None, show=
False, save=
False):
1512 Create a barplot of the data in cols. Every column will be represented
1513 at one position. If there are several rows, each column will be grouped
1516 :param cols: List of column names. Every column will be represented as a
1517 single bar. If cols is None, every column of the table gets
1519 :type cols: :class:`list`
1521 :param rows: List of row indices. Values from given rows will be plotted
1522 in parallel at one column position. If set to None, all rows
1523 of the table will be plotted. Note, that the maximum number
1525 :type rows: :class:`list`
1527 :param xlabels: Label for every col on x-axis. If set to None, the column
1528 names are used. The xlabel plotting can be supressed by
1529 the parameter set_xlabel.
1530 :type xlabels: :class:`list`
1532 :param set_xlabels: Controls whether xlabels are plotted or not.
1533 :type set_xlabels: :class:`bool`
1535 :param x_labels_rotation: Can either be 'horizontal', 'vertical' or an
1536 integer, that describes the rotation in degrees.
1538 :param y_title: Y-axis description
1539 :type y_title: :class:`str`
1541 :title: Title of the plot. No title appears if set to None
1542 :type title: :class:`str`
1544 :param colors: Colors of the different bars in each group. Must be a list
1545 of valid colors in matplotlib. Length of color and rows must
1547 :type colors: :class:`list`
1549 :param width: The available space for the groups on the x-axis is divided
1550 by the exact number of groups. The parameters width is the
1551 fraction of what is actually used. If it would be 1.0 the
1552 bars of the different groups would touch each other.
1553 Value must be between [0;1]
1554 :type width: :class:`float`
1556 :param bottom: Bottom
1557 :type bottom: :class:`float`
1559 :param legend: Legend for color explanation, the corresponding row
1560 respectively. If set to True, legend_names must be provided.
1561 :type legend: :class:`bool`
1563 :param legend_names: List of names, that describe the differently colored
1564 bars. Length must be consistent with number of rows.
1566 :param show: If set to True, the plot is directly displayed.
1568 :param save: If set, a png image with name save in the current working
1569 directory will be saved.
1570 :type save: :class:`str`
1575 import matplotlib.pyplot
as plt
1577 raise ImportError(
'PlotBar relies on numpy and matplotlib, but I could' \
1580 standard_colors=[
'b',
'g',
'y',
'c',
'm',
'r','k']
1586 if width<=0
or width>1:
1587 raise ValueError(
'Width must be in [0;1]')
1590 if len(self.
rows)>7:
1591 raise ValueError(
'Table contains too many rows to represent them at one '\
1592 'bar position in parallel. You can Select a Subtable or '\
1593 'specify the parameter rows with a list of row indices '\
1596 rows=list(range(len(self.
rows)))
1598 if not isinstance(rows,list):
1601 raise ValueError(
'Too many rows to represent (max 7). Please note, that '\
1602 'data from multiple rows from one column gets '\
1603 'represented at one position in parallel.')
1606 row=self.
rows[r_idx]
1612 raise ValueError(
'Cannot find column with name '+str(c))
1613 temp.append(row[c_idx])
1617 colors=standard_colors[:len(rows)]
1619 if len(rows)!=len(colors):
1620 raise ValueError(
"Number of rows and number of colors must be consistent!")
1622 ind=np.arange(len(data[0]))
1623 single_bar_width=float(width)/len(data)
1626 ax=fig.add_subplot(111)
1629 for i
in range(len(data)):
1630 legend_data.append(ax.bar(ind+i*single_bar_width+(1-width)/2,data[i],single_bar_width,bottom=bottom,color=colors[i])[0])
1633 ax.set_title(title, size=
'x-large', fontweight=
'bold')
1639 ax.set_ylabel(nice_y)
1642 if len(data[0])!=len(xlabels):
1643 raise ValueError(
'Number of xlabels is not consistent with number of cols!')
1648 ax.set_xticks(ind+0.5)
1649 ax.set_xticklabels(xlabels, rotation = xlabels_rotation)
1654 if legend_names==
None:
1655 raise ValueError(
'You must provide legend names! e.g. names for the rows, '\
1656 'that are printed in parallel.')
1657 if len(legend_names)!=len(data):
1658 raise ValueError(
'length of legend_names must be consistent with number '\
1660 ax.legend(legend_data, legend_names)
1670 def PlotHexbin(self, x, y, title=None, x_title=None, y_title=None, x_range=None, y_range=None, binning='log',
1671 colormap=
'jet', show_scalebar=
False, scalebar_label=
None, clear=
True, save=
False, show=
False):
1674 Create a heatplot of the data in col x vs the data in col y using matplotlib
1676 :param x: column name with x data
1677 :type x: :class:`str`
1679 :param y: column name with y data
1680 :type y: :class:`str`
1682 :param title: title of the plot, will be generated automatically if set to None
1683 :type title: :class:`str`
1685 :param x_title: label of x-axis, will be generated automatically if set to None
1686 :type title: :class:`str`
1688 :param y_title: label of y-axis, will be generated automatically if set to None
1689 :type title: :class:`str`
1691 :param x_range: start and end value for first dimension (e.g. [start_x, end_x])
1692 :type x_range: :class:`list` of length two
1694 :param y_range: start and end value for second dimension (e.g. [start_y, end_y])
1695 :type y_range: :class:`list` of length two
1697 :param binning: type of binning. If set to None, the value of a hexbin will
1698 correspond to the number of datapoints falling into it. If
1699 set to 'log', the value will be the log with base 10 of the above
1700 value (log(i+1)). If an integer is provided, the number of a
1701 hexbin is equal the number of datapoints falling into it divided
1702 by the integer. If a list of values is provided, these values
1703 will be the lower bounds of the bins.
1705 :param colormap: colormap, that will be used. Value can be every colormap defined
1706 in matplotlib or an own defined colormap. You can either pass a
1707 string with the name of the matplotlib colormap or a colormap
1710 :param show_scalebar: If set to True, a scalebar according to the chosen colormap is shown
1711 :type show_scalebar: :class:`bool`
1713 :param scalebar_label: Label of the scalebar
1714 :type scalebar_label: :class:`str`
1716 :param clear: clear old data from plot
1717 :type clear: :class:`bool`
1719 :param save: filename for saving plot
1720 :type save: :class:`str`
1722 :param show: directly show plot
1723 :type show: :class:`bool`
1728 import matplotlib.pyplot
as plt
1729 import matplotlib.cm
as cm
1731 raise ImportError(
'PlotHexbin relies on matplotlib, but I could not import it')
1739 if r[idx]!=
None and r[idy]!=
None:
1740 xdata.append(r[idx])
1741 ydata.append(r[idy])
1757 title =
'%s vs. %s' % (nice_x, nice_y)
1760 colormap=getattr(cm, colormap)
1762 if x_range
and (
IsScalar(x_range)
or len(x_range)!=2):
1763 raise ValueError(
'parameter x_range must contain exactly two elements')
1764 if y_range
and (
IsScalar(y_range)
or len(y_range)!=2):
1765 raise ValueError(
'parameter y_range must contain exactly two elements')
1767 ext = [min(xdata),max(xdata),min(ydata),max(ydata)]
1770 plt.xlim((x_range[0], x_range[1]))
1774 plt.ylim(y_range[0], y_range[1])
1779 plt.hexbin(xdata, ydata, bins=binning, cmap=colormap, extent=ext)
1781 plt.title(title, size=
'x-large', fontweight=
'bold',
1782 verticalalignment=
'bottom')
1790 cb.set_label(scalebar_label)
1802 Returns the row containing the cell with the maximal value in col. If
1803 several rows have the highest value, only the first one is returned.
1804 ''None'' values are ignored.
1806 :param col: column name
1807 :type col: :class:`str`
1809 :returns: row with maximal col value or None if the table is empty
1811 val, idx = self.
_Max(col)
1813 return self.
rows[idx]
1817 Returns the maximum value in col. If several rows have the highest value,
1818 only the first one is returned. ''None'' values are ignored.
1820 :param col: column name
1821 :type col: :class:`str`
1823 val, idx = self.
_Max(col)
1828 Returns the row index of the cell with the maximal value in col. If
1829 several rows have the highest value, only the first one is returned.
1830 ''None'' values are ignored.
1832 :param col: column name
1833 :type col: :class:`str`
1835 val, idx = self.
_Max(col)
1838 def _Min(self, col):
1839 if len(self.
rows)==0:
1843 if col_type==
'int' or col_type==
'float':
1844 min_val=float(
'inf')
1845 elif col_type==
'bool':
1847 elif col_type==
'string':
1850 for i,row
in enumerate(self.
rows):
1851 if row[idx]!=
None and row[idx]<min_val:
1854 return min_val, min_idx
1858 Returns the minimal value in col. If several rows have the lowest value,
1859 only the first one is returned. ''None'' values are ignored.
1861 :param col: column name
1862 :type col: :class:`str`
1864 val, idx = self.
_Min(col)
1869 Returns the row containing the cell with the minimal value in col. If
1870 several rows have the lowest value, only the first one is returned.
1871 ''None'' values are ignored.
1873 :param col: column name
1874 :type col: :class:`str`
1876 :returns: row with minimal col value or None if the table is empty
1878 val, idx = self.
_Min(col)
1880 return self.
rows[idx]
1884 Returns the row index of the cell with the minimal value in col. If
1885 several rows have the lowest value, only the first one is returned.
1886 ''None'' values are ignored.
1888 :param col: column name
1889 :type col: :class:`str`
1891 val, idx = self.
_Min(col)
1896 Returns the sum of the given column. Cells with ''None'' are ignored. Returns
1897 0.0, if the column doesn't contain any elements. Col must be of numeric
1898 column type ('float', 'int') or boolean column type.
1900 :param col: column name
1901 :type col: :class:`str`
1903 :raises: :class:`TypeError` if column type is ``string``
1907 if col_type!=
'int' and col_type!=
'float' and col_type!=
'bool':
1908 raise TypeError(
"Sum can only be used on numeric column types")
1917 Returns the mean of the given column. Cells with ''None'' are ignored. Returns
1918 None, if the column doesn't contain any elements. Col must be of numeric
1919 ('float', 'int') or boolean column type.
1921 If column type is *bool*, the function returns the ratio of
1922 number of 'Trues' by total number of elements.
1924 :param col: column name
1925 :type col: :class:`str`
1927 :raises: :class:`TypeError` if column type is ``string``
1931 if col_type!=
'int' and col_type!=
'float' and col_type!=
'bool':
1932 raise TypeError(
"Mean can only be used on numeric or bool column types")
1939 return stutil.Mean(vals)
1945 Adds a new column of type 'float' with a specified name (*mean_col_name*),
1946 containing the mean of all specified columns for each row.
1948 Cols are specified by their names and must be of numeric column
1949 type ('float', 'int') or boolean column type. Cells with None are ignored.
1950 Adds ''None'' if the row doesn't contain any values.
1952 :param mean_col_name: name of new column containing mean values
1953 :type mean_col_name: :class:`str`
1955 :param cols: name or list of names of columns to include in computation of
1957 :type cols: :class:`str` or :class:`list` of strings
1959 :raises: :class:`TypeError` if column type of columns in *col* is ``string``
1963 Staring with the following table:
1973 the code here adds a column with the name 'mean' to yield the table below:
1975 .. code-block::python
1977 tab.RowMean('mean', ['x', 'u'])
1980 ==== ==== ==== =====
1982 ==== ==== ==== =====
1986 ==== ==== ==== =====
1995 idx = self.GetColIndex(col)
1996 col_type = self.col_types[idx]
1997 if col_type!='int' and col_type!='float' and col_type!='bool':
1998 raise TypeError("RowMean can only be used on numeric column types")
1999 cols_idxs.append(idx)
2002 for row in self.rows:
2004 for idx in cols_idxs:
2009 mean = stutil.Mean(vals)
2010 mean_rows.append(mean)
2012 mean_rows.append(None)
2014 self.AddCol(mean_col_name, 'f', mean_rows)
2016 def Percentiles(self, col, nths):
2018 Returns the percentiles of column *col* given
in *nths*.
2020 The percentiles are calculated
as
2022 .. code-block:: python
2024 values[min(len(values)-1, int(math.floor(len(values)*nth/100.0)))]
2026 where values are the sorted values of *col*
not equal to
''None''
2028 :param col: column name
2029 :type col: :
class:`str`
2030 :param nths: list of percentiles to be calculated. Each percentile
is a
2031 number between 0
and 100.
2032 :type nths: :
class:`list` of numbers
2034 :raises: :
class:`TypeError`
if column type
is ``string``
2035 :returns: List of percentiles
in the same order
as given
in *nths*
2037 idx = self.GetColIndex(col)
2038 col_type = self.col_types[idx]
2039 if col_type!='int' and col_type!='float' and col_type!='bool':
2040 raise TypeError("Median can only be used on numeric column types")
2043 if nth < 0 or nth > 100:
2044 raise ValueError("percentiles must be between 0 and 100")
2051 return [None]*len(nths)
2055 # rounding behaviour between Python2 and Python3 changed....
2056 # p=vals[min(len(vals)-1, int(round(len(vals)*nth/100.0+0.5)-1))]
2057 p=vals[min(len(vals)-1, int(math.floor(len(vals)*nth/100.0)))]
2058 percentiles.append(p)
2061 def Median(self, col):
2063 Returns the median of the given column. Cells with
''None'' are ignored. Returns
2064 ''None'',
if the column doesn
't contain any elements. Col must be of numeric
2065 column type ('float',
'int')
or boolean column type.
2067 :param col: column name
2068 :type col: :
class:`str`
2070 :raises: :
class:`TypeError`
if column type
is ``string``
2072 idx = self.GetColIndex(col)
2073 col_type = self.col_types[idx]
2074 if col_type!='int' and col_type!='float' and col_type!='bool':
2075 raise TypeError("Median can only be used on numeric column types")
2083 return stutil.Median(vals)
2087 def StdDev(self, col):
2089 Returns the standard deviation of the given column. Cells with
''None'' are
2090 ignored. Returns
''None'',
if the column doesn
't contain any elements. Col must
2091 be of numeric column type ('float',
'int')
or boolean column type.
2093 :param col: column name
2094 :type col: :
class:`str`
2096 :raises: :
class:`TypeError`
if column type
is ``string``
2098 idx = self.GetColIndex(col)
2099 col_type = self.col_types[idx]
2100 if col_type!='int' and col_type!='float' and col_type!='bool':
2101 raise TypeError("StdDev can only be used on numeric column types")
2108 return stutil.StdDev(vals)
2112 def Count(self, col, ignore_nan=True):
2114 Count the number of cells
in column that are
not equal to
''None''.
2116 :param col: column name
2117 :type col: :
class:`str`
2119 :param ignore_nan: ignore all *
None* values
2120 :type ignore_nan: :
class:`bool`
2123 idx=self.GetColIndex(col)
2132 def Correl(self, col1, col2):
2134 Calculate the Pearson correlation coefficient between *col1*
and *col2*, only
2135 taking rows into account where both of the values are
not equal to *
None*.
2136 If there are
not enough data points to calculate a correlation coefficient,
2139 :param col1: column name
for first column
2140 :type col1: :
class:`str`
2142 :param col2: column name
for second column
2143 :type col2: :
class:`str`
2145 if IsStringLike(col1) and IsStringLike(col2):
2146 col1 = self.GetColIndex(col1)
2147 col2 = self.GetColIndex(col2)
2148 vals1, vals2=([],[])
2149 for v1, v2 in zip(self[col1], self[col2]):
2150 if v1!=None and v2!=None:
2154 return stutil.Correl(vals1, vals2)
2158 def SpearmanCorrel(self, col1, col2):
2160 Calculate the Spearman correlation coefficient between col1
and col2, only
2161 taking rows into account where both of the values are
not equal to
None. If
2162 there are
not enough data points to calculate a correlation coefficient,
2165 :warning: The function depends on the following module: *scipy.stats.mstats*
2167 :param col1: column name
for first column
2168 :type col1: :
class:`str`
2170 :param col2: column name
for second column
2171 :type col2: :
class:`str`
2174 import scipy.stats.mstats
2176 if IsStringLike(col1) and IsStringLike(col2):
2177 col1 = self.GetColIndex(col1)
2178 col2 = self.GetColIndex(col2)
2179 vals1, vals2=([],[])
2180 for v1, v2 in zip(self[col1], self[col2]):
2181 if v1!=None and v2!=None:
2185 correl = scipy.stats.mstats.spearmanr(vals1, vals2)[0]
2186 if scipy.isnan(correl):
2193 LogError("Function needs scipy.stats.mstats, but I could not import it.")
2197 def Save(self, stream_or_filename, format='ost', sep=','):
2199 Save the table to stream
or filename. The following three file formats
2200 are supported (
for more information on file formats, see :meth:`Load`):
2202 ============= =======================================
2203 ost ost-specific format (human readable)
2204 csv comma separated values (human readable)
2205 pickle pickled byte stream (binary)
2207 context ConTeXt table
2208 ============= =======================================
2210 :param stream_or_filename: filename
or stream
for writing output
2211 :type stream_or_filename: :
class:`str`
or :
class:`file`
2213 :param format: output format (i.e. *ost*, *csv*, *pickle*)
2214 :type format: :
class:`str`
2216 :raises: :
class:`ValueError`
if format
is unknown
2218 format=format.lower()
2220 return self._SaveOST(stream_or_filename)
2222 return self._SaveCSV(stream_or_filename, sep=sep)
2223 if format=='pickle':
2224 return self._SavePickle(stream_or_filename)
2226 return self._SaveHTML(stream_or_filename)
2227 if format=='context':
2228 return self._SaveContext(stream_or_filename)
2229 raise ValueError('unknown format "%s"' % format)
2231 def _SavePickle(self, stream):
2233 if not hasattr(stream, 'write'):
2234 stream=open(stream, 'wb')
2236 pickle.dump(self, stream, pickle.HIGHEST_PROTOCOL)
2240 def _SaveHTML(self, stream_or_filename):
2242 return s.replace('&', '&').replace('>', '>').replace('<', '<')
2245 if not hasattr(stream_or_filename, 'write'):
2246 stream = open(stream_or_filename, 'w')
2249 stream = stream_or_filename
2250 stream.write('<table>')
2251 stream.write('<tr>')
2252 for col_name in self.col_names:
2253 stream.write('<th>%s</th>' % _escape(col_name))
2254 stream.write('</tr>')
2255 for row in self.rows:
2256 stream.write('<tr>')
2257 for i, col in enumerate(row):
2260 if self.col_types[i] == 'float':
2262 elif self.col_types[i] == 'int':
2264 elif self.col_types[i] == 'bool':
2265 val = col and 'true' or 'false'
2268 stream.write('<td>%s</td>' % _escape(val))
2269 stream.write('</tr>')
2270 stream.write('</table>')
2273 def _SaveContext(self, stream_or_filename):
2275 if not hasattr(stream_or_filename, 'write'):
2276 stream = open(stream_or_filename, 'w')
2279 stream = stream_or_filename
2280 stream.write('\\starttable[')
2281 for col_type in self.col_types:
2282 if col_type =='string':
2284 elif col_type=='int':
2286 elif col_type =='float':
2287 stream.write('i3r|')
2290 stream.write(']\n\\HL\n')
2291 for col_name in self.col_names:
2292 stream.write('\\NC \\bf %s' % col_name)
2293 stream.write(' \\AR\\HL\n')
2294 for row in self.rows:
2295 for i, col in enumerate(row):
2298 if self.col_types[i] == 'float':
2300 elif self.col_types[i] == 'int':
2302 elif self.col_types[i] == 'bool':
2303 val = col and 'true' or 'false'
2306 stream.write('\\NC %s' % val)
2307 stream.write(' \\AR\n')
2308 stream.write('\\HL\n')
2309 stream.write('\\stoptable')
2313 def _SaveCSV(self, stream, sep):
2315 if not hasattr(stream, 'write'):
2316 stream=open(stream, 'w')
2319 writer=csv.writer(stream, delimiter=sep)
2320 writer.writerow(['%s' % n for n in self.col_names])
2321 for row in self.rows:
2323 for i, c in enumerate(row):
2326 writer.writerow(row)
2331 def _SaveOST(self, stream):
2333 if hasattr(stream, 'write'):
2334 writer=csv.writer(stream, delimiter=' ')
2336 stream=open(stream, 'w')
2337 writer=csv.writer(stream, delimiter=' ')
2340 stream.write(''.join(['# %s\n' % l for l in self.comment.split('\n')]))
2341 writer.writerow(['%s[%s]' % t for t in zip(self.col_names, self.col_types)])
2342 for row in self.rows:
2344 for i, c in enumerate(row):
2347 writer.writerow(row)
2351 def GetNumpyMatrixAsArray(self, *args):
2353 Returns a numpy array containing the selected columns from the table as
2354 columns as a matrix.
2356 Only columns of type *int* or *float* are supported. *NA* values in the
2357 table will be converted to *None* values.
2359 Originally the function used the numpy matrix class but that is going to be
2360 deprecated in the future. Numpy itself suggests replacing numpy matrix by
2363 :param \\*args: column names to include in numpy array
2365 :warning: The function depends on *numpy*
2371 raise RuntimeError("At least one column must be specified.")
2375 idx = self.GetColIndex(arg)
2376 col_type = self.col_types[idx]
2377 if col_type!='int' and col_type!='float':
2378 raise TypeError("Numpy matrix can only be generated from numeric "+\
2382 a = np.array([list(self[i]) for i in idxs])
2386 LogError("Function needs numpy, but I could not import it.")
2389 def GetNumpyMatrix(self, *args):
2391 *Caution*: Numpy is deprecating the use of the numpy matrix class.
2393 Returns a numpy matrix containing the selected columns from the table as
2394 columns in the matrix.
2396 Only columns of type *int* or *float* are supported. *NA* values in the
2397 table will be converted to *None* values.
2399 :param \\*args: column names to include in numpy matrix
2401 :warning: The function depends on *numpy*
2403 LogWarning("table.GetNumpyMatrix is deprecated, please use "+
2404 "table.GetNumpyMatrixAsArray instead")
2407 m = self.GetNumpyMatrixAsArray(*args)
2410 LogError("Function needs numpy, but I could not import it.")
2413 def GaussianSmooth(self, col, std=1.0, na_value=0.0, padding='reflect', c=0.0):
2416 In place Gaussian smooth of a column in the table with a given standard deviation.
2417 All nan are set to nan_value before smoothing.
2419 :param col: column name
2420 :type col: :class:`str`
2422 :param std: standard deviation for gaussian kernel
2425 :param na_value: all na (None) values of the speciefied column are set to na_value before smoothing
2426 :type na_value: `scalar`
2428 :param padding: allows to handle padding behaviour see scipy ndimage.gaussian_filter1d documentation for more information. standard is reflect
2429 :type padding: :class:`str`
2431 :param c: constant value used for padding if padding mode is constant
2436 :warning: The function depends on *scipy*
2440 from scipy import ndimage
2443 LogError("I need scipy.ndimage and numpy, but could not import it")
2446 idx = self.GetColIndex(col)
2447 col_type = self.col_types[idx]
2448 if col_type!='int' and col_type!='float':
2449 raise TypeError("GaussianSmooth can only be used on numeric column types")
2456 vals.append(na_value)
2459 smoothed_values_ndarray=ndimage.gaussian_filter1d(vals,std, mode=padding, cval=c)
2463 for v in smoothed_values_ndarray:
2469 def GetOptimalPrefactors(self, ref_col, *args, **kwargs):
2471 This returns the optimal prefactor values (i.e. :math:`a, b, c, ...`) for
2472 the following equation
2477 a*u + b*v + c*w + ... = z
2479 where :math:`u, v, w` and :math:`z` are vectors. In matrix notation
2486 where :math:`A` contains the data from the table :math:`(u,v,w,...)`,
2487 :math:`p` are the prefactors to optimize :math:`(a,b,c,...)` and :math:`z`
2488 is the vector containing the result of equation :eq:`op1`.
2490 The parameter ref_col equals to :math:`z` in both equations, and \\*args
2491 are columns :math:`u`, :math:`v` and :math:`w` (or :math:`A` in :eq:`op2`).
2492 All columns must be specified by their names.
2496 .. code-block:: python
2498 tab.GetOptimalPrefactors('colC', 'colA', 'colB')
2500 The function returns a list containing the prefactors
2501 :math:`a, b, c, ...` in the correct order (i.e. same as columns were
2502 specified in \\*args).
2505 If the kwarg weights="columX" is specified, the equations are weighted by
2506 the values in that column. Each row is multiplied by the weight in that
2507 row, which leads to :eq:`op3`:
2512 \\textit{weight}*a*u + \\textit{weight}*b*v + \\textit{weight}*c*w + ...
2513 = \\textit{weight}*z
2515 Weights must be float or int and can have any value. A value of 0 ignores
2516 this equation, a value of 1 means the same as no weight. If all weights are
2517 the same for each row, the same result will be obtained as with no weights.
2521 .. code-block:: python
2523 tab.GetOptimalPrefactors('colC', 'colA', 'colB', weights='colD')
2530 raise RuntimeError("At least one column must be specified.")
2532 b = self.GetNumpyMatrixAsArray(ref_col)
2533 a = self.GetNumpyMatrixAsArray(*args)
2536 if 'weights' in kwargs:
2537 w = self.GetNumpyMatrixAsArray(kwargs['weights'])
2538 b = np.multiply(b,w)
2539 a = np.multiply(a,w)
2542 raise RuntimeError("specified unrecognized kwargs, use weights as key")
2544 k = np.linalg.inv(a.T@a)@a.T@b
2545 return list(k.T.reshape(-1))
2548 LogError("Function needs numpy, but I could not import it.")
2551 def PlotEnrichment(self, score_col, class_col, score_dir='-',
2552 class_dir='-', class_cutoff=2.0,
2553 style='-', title=None, x_title=None, y_title=None,
2554 clear=True, save=None):
2556 Plot an enrichment curve using matplotlib of column *score_col* classified
2557 according to *class_col*.
2559 For more information about parameters of the enrichment, see
2560 :meth:`ComputeEnrichment`, and for plotting see :meth:`Plot`.
2562 :warning: The function depends on *matplotlib*
2565 import matplotlib.pyplot as plt
2567 enrx, enry = self.ComputeEnrichment(score_col, class_col, score_dir,
2568 class_dir, class_cutoff)
2571 title = 'Enrichment of %s'%score_col
2574 x_title = '% database'
2577 y_title = '% positives'
2582 plt.plot(enrx, enry, style)
2584 plt.title(title, size='x-large', fontweight='bold')
2585 plt.ylabel(y_title, size='x-large')
2586 plt.xlabel(x_title, size='x-large')
2593 LogError("Function needs matplotlib, but I could not import it.")
2596 def ComputeEnrichment(self, score_col, class_col, score_dir='-',
2597 class_dir='-', class_cutoff=2.0):
2599 Computes the enrichment of column *score_col* classified according to
2602 For this it is necessary, that the datapoints are classified into positive
2603 and negative points. This can be done in two ways:
2605 - by using one 'bool' type column (*class_col*) which contains *True* for
2606 positives and *False* for negatives
2608 - by specifying a classification column (*class_col*), a cutoff value
2609 (*class_cutoff*) and the classification columns direction (*class_dir*).
2610 This will generate the classification on the fly
2612 * if ``class_dir=='-'``: values in the classification column that are less than or equal to class_cutoff will be counted as positives
2613 * if ``class_dir=='+'``: values in the classification column that are larger than or equal to class_cutoff will be counted as positives
2615 During the calculation, the table will be sorted according to *score_dir*,
2616 where a '-' values means smallest values first and therefore, the smaller
2617 the value, the better.
2619 :warning: If either the value of *class_col* or *score_col* is *None*, the
2620 data in this row is ignored.
2623 ALLOWED_DIR = ['+','-']
2625 score_idx = self.GetColIndex(score_col)
2626 score_type = self.col_types[score_idx]
2627 if score_type!='int' and score_type!='float':
2628 raise TypeError("Score column must be numeric type")
2630 class_idx = self.GetColIndex(class_col)
2631 class_type = self.col_types[class_idx]
2632 if class_type!='int' and class_type!='float' and class_type!='bool':
2633 raise TypeError("Classifier column must be numeric or bool type")
2635 if (score_dir not in ALLOWED_DIR) or (class_dir not in ALLOWED_DIR):
2636 raise ValueError("Direction must be one of %s"%str(ALLOWED_DIR))
2638 self.Sort(score_col, score_dir)
2643 old_score_val = None
2646 for row in self.rows:
2647 class_val = row[class_idx]
2648 score_val = row[score_idx]
2649 if class_val==None or score_val==None:
2652 if old_score_val==None:
2653 old_score_val = score_val
2654 if score_val!=old_score_val:
2657 old_score_val = score_val
2659 if class_type=='bool':
2663 if (class_dir=='-' and class_val<=class_cutoff) or (class_dir=='+' and class_val>=class_cutoff):
2668 # if no false positives or false negatives values are found return None
2669 if x[-1]==0 or y[-1]==0:
2672 x = [float(v)/x[-1] for v in x]
2673 y = [float(v)/y[-1] for v in y]
2676 def ComputeEnrichmentAUC(self, score_col, class_col, score_dir='-',
2677 class_dir='-', class_cutoff=2.0):
2679 Computes the area under the curve of the enrichment using the trapezoidal
2682 For more information about parameters of the enrichment, see
2683 :meth:`ComputeEnrichment`.
2685 :warning: The function depends on *numpy*
2690 enr = self.ComputeEnrichment(score_col, class_col, score_dir,
2691 class_dir, class_cutoff)
2695 return np.trapz(enr[1], enr[0])
2697 LogError("Function needs numpy, but I could not import it.")
2700 def ComputeROC(self, score_col, class_col, score_dir='-',
2701 class_dir='-', class_cutoff=2.0):
2703 Computes the receiver operating characteristics (ROC) of column *score_col*
2704 classified according to *class_col*.
2706 For this it is necessary, that the datapoints are classified into positive
2707 and negative points. This can be done in two ways:
2709 - by using one 'bool' column (*class_col*) which contains True for positives
2710 and False for negatives
2711 - by using a non-bool column (*class_col*), a cutoff value (*class_cutoff*)
2712 and the classification columns direction (*class_dir*). This will generate
2713 the classification on the fly
2715 - if ``class_dir=='-'``: values in the classification column that are less than or equal to *class_cutoff* will be counted as positives
2716 - if ``class_dir=='+'``: values in the classification column that are larger than or equal to *class_cutoff* will be counted as positives
2718 During the calculation, the table will be sorted according to *score_dir*,
2719 where a '-' values means smallest values first and therefore, the smaller
2720 the value, the better.
2722 If *class_col* does not contain any positives (i.e. value is True (if column
2723 is of type bool) or evaluated to True (if column is of type int or float
2724 (depending on *class_dir* and *class_cutoff*))) the ROC is not defined and
2725 the function will return *None*.
2727 :warning: If either the value of *class_col* or *score_col* is *None*, the
2728 data in this row is ignored.
2731 ALLOWED_DIR = ['+','-']
2733 score_idx = self.GetColIndex(score_col)
2734 score_type = self.col_types[score_idx]
2735 if score_type!='int' and score_type!='float':
2736 raise TypeError("Score column must be numeric type")
2738 class_idx = self.GetColIndex(class_col)
2739 class_type = self.col_types[class_idx]
2740 if class_type!='int' and class_type!='float' and class_type!='bool':
2741 raise TypeError("Classifier column must be numeric or bool type")
2743 if (score_dir not in ALLOWED_DIR) or (class_dir not in ALLOWED_DIR):
2744 raise ValueError("Direction must be one of %s"%str(ALLOWED_DIR))
2746 self.Sort(score_col, score_dir)
2752 old_score_val = None
2754 for i,row in enumerate(self.rows):
2755 class_val = row[class_idx]
2756 score_val = row[score_idx]
2757 if class_val==None or score_val==None:
2760 if old_score_val==None:
2761 old_score_val = score_val
2762 if score_val!=old_score_val:
2765 old_score_val = score_val
2766 if class_type=='bool':
2772 if (class_dir=='-' and class_val<=class_cutoff) or (class_dir=='+' and class_val>=class_cutoff):
2779 # if no false positives or false negatives values are found return None
2780 if x[-1]==0 or y[-1]==0:
2783 x = [float(v)/x[-1] for v in x]
2784 y = [float(v)/y[-1] for v in y]
2787 def ComputeROCAUC(self, score_col, class_col, score_dir='-',
2788 class_dir='-', class_cutoff=2.0):
2790 Computes the area under the curve of the receiver operating characteristics
2791 using the trapezoidal rule.
2793 For more information about parameters of the ROC, see
2796 :warning: The function depends on *numpy*
2801 roc = self.ComputeROC(score_col, class_col, score_dir,
2802 class_dir, class_cutoff)
2806 return np.trapz(roc[1], roc[0])
2808 LogError("Function needs numpy, but I could not import it.")
2811 def ComputeLogROCAUC(self, score_col, class_col, score_dir='-',
2812 class_dir='-', class_cutoff=2.0):
2814 Computes the area under the curve of the log receiver operating
2815 characteristics (logROC) where the x-axis is semilogarithmic
2816 using the trapezoidal rule.
2818 The logROC is computed with a lambda of 0.001 according to
2819 Rapid Context-Dependent Ligand Desolvation in Molecular Docking
2820 Mysinger M. and Shoichet B., Journal of Chemical Information and Modeling
2821 2010 50 (9), 1561-1573
2823 For more information about parameters of the ROC, see
2826 :warning: The function depends on *numpy*
2831 roc = self.ComputeROC(score_col, class_col, score_dir,
2832 class_dir, class_cutoff)
2844 # remove all duplicate x-values
2845 rocxt = [x if x>0 else l for x in rocxt]
2846 for i in range(len(rocxt)-1):
2847 if rocxt[i]==rocxt[i+1]:
2849 rocx.append(rocxt[i])
2850 rocy.append(rocyt[i])
2856 for i in range(len(rocx)-1):
2858 if rocx[i]==rocx[i+1]:
2860 b = rocy[i+1]-rocx[i+1]*((rocy[i+1]-rocy[i])/(rocx[i+1]-rocx[i]))
2861 value += ((rocy[i+1]-rocy[i])/math.log(10))+b*(math.log10(rocx[i+1])-math.log10(rocx[i]))
2862 return value/math.log10(1.0/l)
2865 LogError("Function needs numpy, but I could not import it.")
2868 def PlotROC(self, score_col, class_col, score_dir='-',
2869 class_dir='-', class_cutoff=2.0,
2870 style='-', title=None, x_title=None, y_title=None,
2871 clear=True, save=None):
2873 Plot an ROC curve using matplotlib.
2875 For more information about parameters of the ROC, see
2876 :meth:`ComputeROC`, and for plotting see :meth:`Plot`.
2878 :warning: The function depends on *matplotlib*
2882 import matplotlib.pyplot as plt
2884 roc = self.ComputeROC(score_col, class_col, score_dir,
2885 class_dir, class_cutoff)
2893 title = 'ROC of %s'%score_col
2896 x_title = 'false positive rate'
2899 y_title = 'true positive rate'
2904 plt.plot(enrx, enry, style)
2906 plt.title(title, size='x-large', fontweight='bold')
2907 plt.ylabel(y_title, size='x-large')
2908 plt.xlabel(x_title, size='x-large')
2915 LogError("Function needs matplotlib, but I could not import it.")
2918 def PlotLogROC(self, score_col, class_col, score_dir='-',
2919 class_dir='-', class_cutoff=2.0,
2920 style='-', title=None, x_title=None, y_title=None,
2921 clear=True, save=None):
2923 Plot an logROC curve where the x-axis is semilogarithmic using matplotlib
2925 For more information about parameters of the ROC, see
2926 :meth:`ComputeROC`, and for plotting see :meth:`Plot`.
2928 :warning: The function depends on *matplotlib*
2932 import matplotlib.pyplot as plt
2934 roc = self.ComputeROC(score_col, class_col, score_dir,
2935 class_dir, class_cutoff)
2943 title = 'logROC of %s'%score_col
2946 x_title = 'false positive rate'
2949 y_title = 'true positive rate'
2954 rocx = [x if x>0 else 0.001 for x in rocx]
2957 plt.plot(rocx, rocy, style)
2959 plt.title(title, size='x-large', fontweight='bold')
2960 plt.ylabel(y_title, size='x-large')
2961 plt.xlabel(x_title, size='x-large')
2963 plt.xscale('log', basex=10)
2965 plt.xscale('log', base=10) # breaking change in matplotlib 3.5
2966 plt.xlim(0.001, 1.0)
2973 LogError("Function needs matplotlib, but I could not import it.")
2976 def ComputeMCC(self, score_col, class_col, score_dir='-',
2977 class_dir='-', score_cutoff=2.0, class_cutoff=2.0):
2979 Compute Matthews correlation coefficient (MCC) for one column (*score_col*)
2980 with the points classified into true positives, false positives, true
2981 negatives and false negatives according to a specified classification
2982 column (*class_col*).
2984 The datapoints in *score_col* and *class_col* are classified into
2985 positive and negative points. This can be done in two ways:
2987 - by using 'bool' columns which contains True for positives and False
2990 - by using 'float' or 'int' columns and specifying a cutoff value and the
2991 columns direction. This will generate the classification on the fly
2993 * if ``class_dir``/``score_dir=='-'``: values in the classification column that are less than or equal to *class_cutoff*/*score_cutoff* will be counted as positives
2994 * if ``class_dir``/``score_dir=='+'``: values in the classification column that are larger than or equal to *class_cutoff*/*score_cutoff* will be counted as positives
2996 The two possibilities can be used together, i.e. 'bool' type for one column
2997 and 'float'/'int' type and cutoff/direction for the other column.
2999 ALLOWED_DIR = ['+','-']
3001 score_idx = self.GetColIndex(score_col)
3002 score_type = self.col_types[score_idx]
3003 if score_type!='int' and score_type!='float' and score_type!='bool':
3004 raise TypeError("Score column must be numeric or bool type")
3006 class_idx = self.GetColIndex(class_col)
3007 class_type = self.col_types[class_idx]
3008 if class_type!='int' and class_type!='float' and class_type!='bool':
3009 raise TypeError("Classifier column must be numeric or bool type")
3011 if (score_dir not in ALLOWED_DIR) or (class_dir not in ALLOWED_DIR):
3012 raise ValueError("Direction must be one of %s"%str(ALLOWED_DIR))
3019 for i,row in enumerate(self.rows):
3020 class_val = row[class_idx]
3021 score_val = row[score_idx]
3023 if (class_type=='bool' and class_val==True) or (class_type!='bool' and ((class_dir=='-' and class_val<=class_cutoff) or (class_dir=='+' and class_val>=class_cutoff))):
3024 if (score_type=='bool' and score_val==True) or (score_type!='bool' and ((score_dir=='-' and score_val<=score_cutoff) or (score_dir=='+' and score_val>=score_cutoff))):
3029 if (score_type=='bool' and score_val==False) or (score_type!='bool' and ((score_dir=='-' and score_val>score_cutoff) or (score_dir=='+' and score_val<score_cutoff))):
3037 msg = 'factor (tp + fn) is zero'
3039 msg = 'factor (tp + fp) is zero'
3041 msg = 'factor (tn + fn) is zero'
3043 msg = 'factor (tn + fp) is zero'
3046 LogWarning("Could not compute MCC: MCC is not defined since %s"%msg)
3048 mcc = ((tp*tn)-(fp*fn)) / math.sqrt((tp+fn)*(tp+fp)*(tn+fn)*(tn+fp))
3052 def IsEmpty(self, col_name=None, ignore_nan=True):
3054 Checks if a table is empty.
3056 If no column name is specified, the whole table is checked for being empty,
3057 whereas if a column name is specified, only this column is checked.
3059 By default, all NAN (or None) values are ignored, and thus, a table
3060 containing only NAN values is considered as empty. By specifying the
3061 option ignore_nan=False, NAN values are counted as 'normal' values.
3064 # table with no columns and no rows
3065 if len(self.col_names)==0:
3067 raise ValueError('Table has no column named "%s"' % col_name)
3070 # column name specified
3072 if self.Count(col_name, ignore_nan=ignore_nan)==0:
3077 # no column name specified -> test whole table
3079 for row in self.rows:
3089 def Extend(self, tab, overwrite=None):
3091 Append each row of *tab* to the current table. The data
is appended based
3092 on the column names, thus the order of the table columns
is *
not* relevant,
3093 only the header names.
3095 If there
is a column
in *tab* that
is not present
in the current table,
3096 it
is added to the current table
and filled with *
None*
for all the rows
3097 present
in the current table.
3099 If the type of any column
in *tab*
is not the same
as in the current table
3100 a *TypeError*
is raised.
3102 If *overwrite*
is not None and set to an existing column name, the specified
3103 column
in the table
is searched
for the first occurrence of a value matching
3104 the value of the column with the same name
in the dictionary. If a matching
3105 value
is found, the row
is overwritten with the dictionary. If no matching
3106 row
is found, a new row
is appended to the table.
3108 # add column to current table if it doesn't exist
3109 for name,typ in zip(tab.col_names, tab.col_types):
3110 if not name in self.col_names:
3111 self.AddCol(name, typ)
3113 # check that column types are the same in current and new table
3114 for name in self.col_names:
3115 if name in tab.col_names:
3116 curr_type = self.col_types[self.GetColIndex(name)]
3117 new_type = tab.col_types[tab.GetColIndex(name)]
3118 if curr_type!=new_type:
3119 raise TypeError('cannot extend table, column %s in new '%name +\
3120 'table different type (%s) than in '%new_type +\
3121 'current table (%s)'%curr_type)
3123 num_rows = len(tab.rows)
3124 for i in range(0,num_rows):
3126 data = dict(list(zip(tab.col_names,row)))
3127 self.AddRow(data, overwrite)
3130 def Merge(table1, table2, by, only_matching=False):
3132 Returns a new table containing the data
from both tables. The rows are
3133 combined based on the common values
in the column(s) by. The option
'by' can
3134 be a list of column names. When this
is the case, merging
is based on
3136 For example, the two tables below
3154 when merged by column x, produce the following output:
3167 def _key(row, indices):
3168 return tuple([row[i] for i in indices])
3169 def _keep(indices, cn, ct, ni):
3170 ncn, nct, nni=([],[],[])
3171 for i in range(len(cn)):
3172 if i not in indices:
3176 return ncn, nct, nni
3177 col_names=list(table2.col_names)
3178 col_types=list(table2.col_types)
3179 new_index=[i for i in range(len(col_names))]
3180 if isinstance(by, str):
3181 common2_indices=[col_names.index(by)]
3183 common2_indices=[col_names.index(b) for b in by]
3184 col_names, col_types, new_index=_keep(common2_indices, col_names,
3185 col_types, new_index)
3187 for i, name in enumerate(col_names):
3190 while try_name in table1.col_names:
3192 try_name='%s_%d' % (name, counter)
3193 col_names[i]=try_name
3195 if isinstance(by, str):
3196 common1_indices=[table1.col_names.index(by)]
3198 common1_indices=[table1.col_names.index(b) for b in by]
3199 for row in table1.rows:
3200 key=_key(row, common1_indices)
3202 raise ValueError('duplicate key "%s in first table"' % (str(key)))
3205 for row in table2.rows:
3206 key=_key(row, common2_indices)
3208 raise ValueError('duplicate key "%s" in second table' % (str(key)))
3210 new_tab=Table(table1.col_names+col_names, table1.col_types+col_types)
3211 for k, v in common1.items():
3212 row=v+[None for i in range(len(table2.col_names)-len(common2_indices))]
3217 for i, index in enumerate(new_index):
3218 row[len(table1.col_names)+i]=row2[index]
3219 if only_matching and not matched:
3224 for k, v in common2.items():
3225 if not k in common1:
3226 v2=[v[i] for i in new_index]
3227 row=[None for i in range(len(table1.col_names))]+v2
3228 for common1_index, common2_index in zip(common1_indices, common2_indices):
3229 row[common1_index]=v[common2_index]
3234 # LocalWords: numpy Numpy