9 from ost
import LogError, LogWarning, LogInfo, LogVerbose
12 return col_name.replace(
'_',
' ')
15 if isinstance(value, TableCol)
or isinstance(value, BinaryColExpr):
24 value=value.strip().upper()
25 return value
in (
'',
'NULL',
'NONE',
'NA')
31 if isinstance(value, TableCol)
or isinstance(value, BinaryColExpr):
40 possibilities=set([
'bool',
'int',
'float'])
42 str_ele=str(ele).upper()
46 if 'int' in possibilities:
50 possibilities.remove(
'int')
52 if 'float' in possibilities:
56 possibilities.remove(
'float')
57 if 'bool' in possibilities:
58 if str_ele
not in set([
'YES',
'NO',
'TRUE',
'FALSE']):
59 possibilities.remove(
'bool')
61 if len(possibilities)==0:
63 if len(possibilities)==2:
68 return possibilities.pop()
76 self.
lhslhs=itertools.cyle([self.
lhslhs])
78 self.
rhsrhs=itertools.cycle([self.
rhsrhs])
80 for l, r
in zip(self.
lhslhs, self.
rhsrhs):
81 if l!=
None and r!=
None:
106 for row
in self.
_table_table.rows:
110 return len(self.
_table_table.rows)
132 Essentially a named tuple, but allows column names that are not valid
133 python variable names.
136 self.__dict__[
'tab'] = weakref.proxy(tab)
137 self.__dict__[
'row_data'] = row_data
140 if type(col_name)==int:
141 return self.row_data[col_name]
142 return self.row_data[self.tab.GetColIndex(col_name)]
146 for k, v
in zip(self.__dict__[
'tab'].col_names, self.__dict__[
'row_data']):
147 s.append(
'%s=%s' % (k, str(v)))
152 return len(self.row_data)
155 if type(col_name)==int:
156 self.row_data[col_name] = val
158 self.row_data[self.tab.GetColIndex(col_name)] = val
161 if 'col_names' not in self.tab.__dict__
or col_name
not in self.tab.col_names:
162 raise AttributeError(col_name)
163 return self.row_data[self.tab.GetColIndex(col_name)]
166 if 'col_names' not in self.tab.__dict__
or col_name
not in self.tab.col_names:
167 raise AttributeError(col_name)
168 self.row_data[self.tab.GetColIndex(col_name)] = val
173 The table class provides convenient access to data in tabular form. An empty
174 table can be easily constructed as follows
176 .. code-block:: python
180 If you want to add columns directly when creating the table, column names
181 and *column types* can be specified as follows
183 .. code-block:: python
185 tab = Table(['nameX','nameY','nameZ'], 'sfb')
187 this will create three columns called nameX, nameY and nameZ of type string,
188 float and bool, respectively. There will be no data in the table and thus,
189 the table will not contain any rows.
191 The following *column types* are supported:
202 If you want to add data to the table in addition, use the following:
204 .. code-block:: python
206 tab=Table(['nameX','nameY','nameZ'],
208 nameX = ['a','b','c'],
209 nameY = [0.1, 1.2, 3.414],
210 nameZ = [True, False, False])
212 if values for one column is left out, they will be filled with NA, but if
213 values are specified, all values must be specified (i.e. same number of
218 SUPPORTED_TYPES=(
'int',
'float',
'bool',
'string',)
221 def __init__(self, col_names=[], col_types=None, **kwargs):
231 self.
col_namescol_names=[v
for v
in list(kwargs.keys())]
242 if 'col_names' not in self.__dict__
or col_name
not in self.
col_namescol_names:
243 raise AttributeError(col_name)
247 def _ParseColTypes(types, exp_num=None):
251 short2long = {
's' :
'string',
'i':
'int',
'b' :
'bool',
'f' :
'float'}
252 allowed_short = list(short2long.keys())
253 allowed_long = list(short2long.values())
260 types = types.lower()
263 if types
in allowed_long:
264 type_list.append(types)
265 elif types
in allowed_short:
266 type_list.append(short2long[types])
269 elif types.find(
',')!=-1:
270 for t
in types.split(
','):
271 if t
in allowed_long:
273 elif t
in allowed_short:
274 type_list.append(short2long[t])
276 raise ValueError(
'Unknown type %s in types %s'%(t,types))
281 if t
in allowed_short:
282 type_list.append(short2long[t])
284 raise ValueError(
'Unknown type %s in types %s'%(t,types))
288 raise ValueError(
'Col type %s must be string or list'%types)
296 if t
in allowed_long:
298 elif t
in allowed_short:
299 type_list.append(short2long[t])
301 raise ValueError(
'Unknown type %s in types %s'%(t,types))
305 raise ValueError(
'Col type %s must be string or list'%types)
308 if len(type_list)!=exp_num:
309 raise ValueError(
'Parsed number of col types (%i) differs from ' + \
310 'expected (%i) in types %s'%(len(type_list),exp_num,types))
316 Set name of the table
319 :type name: :class:`str`
331 Rename column *old_name* to *new_name*.
333 :param old_name: Name of the old column
334 :param new_name: Name of the new column
335 :raises: :exc:`ValueError` when *old_name* is not a valid column
337 if old_name==new_name:
342 def _Coerce(self, value, ty):
344 Try to convert values (e.g. from :class:`str` type) to the specified type
346 :param value: the value
347 :type value: any type
349 :param ty: name of type to convert it to (i.e. *int*, *float*, *string*,
351 :type ty: :class:`str`
353 if value==
'NA' or value==
None:
362 if isinstance(value, str)
or isinstance(value, str):
363 if value.upper()
in (
'FALSE',
'NO',):
367 raise ValueError(
'Unknown type %s' % ty)
371 Returns the column index for the column with the given name.
373 :raises: ValueError if no column with the name is found.
376 raise ValueError(
'Table has no column named "%s"' % col)
377 return self.
col_namescol_names.index(col)
381 Returns a list containing all column names.
387 Returns a list of column names matching the regex.
389 :param regex: regex pattern
390 :type regex: :class:`str`
392 :returns: :class:`list` of column names (:class:`str`)
396 matches = re.search(regex, name)
398 matching_names.append(name)
399 return matching_names
403 Checks if the column with a given name is present in the table.
418 value=itertools.cycle([value])
419 for r, v
in zip(self.
rowsrows, value):
422 def ToString(self, float_format='%.3f', int_format='%d', rows=None):
424 Convert the table into a string representation.
426 The output format can be modified for int and float type columns by
427 specifying a formatting string for the parameters *float_format* and
430 The option *rows* specify the range of rows to be printed. The parameter
431 must be a type that supports indexing (e.g. a :class:`list`) containing the
432 start and end row *index*, e.g. [start_row_idx, end_row_idx].
434 :param float_format: formatting string for float columns
435 :type float_format: :class:`str`
437 :param int_format: formatting string for int columns
438 :type int_format: :class:`str`
440 :param rows: iterable containing start and end row *index*
441 :type rows: iterable containing :class:`ints <int>`
443 widths=[len(cn)
for cn
in self.
col_namescol_names]
444 sel_rows=self.
rowsrows
446 sel_rows=self.
rowsrows[rows[0]:rows[1]]
448 for i, (ty, col)
in enumerate(zip(self.
col_typescol_types, row)):
450 widths[i]=max(widths[i], len(
'NA'))
452 widths[i]=max(widths[i], len(float_format % col))
454 widths[i]=max(widths[i], len(int_format % col))
456 widths[i]=max(widths[i], len(str(col)))
459 s+=
''.join([
'# %s\n' % l
for l
in self.
commentcomment.split(
'\n')])
460 total_width=sum(widths)+2*len(widths)
461 for width, col_name
in zip(widths, self.
col_namescol_names):
462 s+=col_name.center(width+2)
463 s+=
'\n%s\n' % (
'-'*total_width)
465 for width, ty, col
in zip(widths, self.
col_typescol_types, row):
468 cs=
'NA'.center(width+2)
470 cs=(float_format % col).rjust(width+2)
472 cs=(int_format % col).rjust(width+2)
474 cs=
' '+str(col).ljust(width+1)
485 Statistics for column %(col)s
487 Number of Rows : %(num)d
488 Number of Rows Not None: %(num_non_null)d
491 Standard Deviation : %(stddev)f
497 'num' : len(self.
rowsrows),
498 'num_non_null' : self.
CountCount(col),
499 'median' : self.
MedianMedian(col),
500 'mean' : self.
MeanMean(col),
501 'stddev' : self.
StdDevStdDev(col),
502 'min' : self.
MinMin(col),
503 'max' : self.
MaxMax(col),
507 def _AddRowsFromDict(self, d, overwrite=None):
509 Add one or more rows from a :class:`dictionary <dict>`.
511 If *overwrite* is not None and set to an existing column name, the specified
512 column in the table is searched for the first occurrence of a value matching
513 the value of the column with the same name in the dictionary. If a matching
514 value is found, the row is overwritten with the dictionary. If no matching
515 row is found, a new row is appended to the table.
517 :param d: dictionary containing the data
518 :type d: :class:`dict`
520 :param overwrite: column name to overwrite existing row if value in
521 column *overwrite* matches
522 :type overwrite: :class:`str`
524 :raises: :class:`ValueError` if multiple rows are added but the number of
525 data items is different for different columns.
528 idxs = [self.
GetColIndexGetColIndex(k)
for k
in list(d.keys())]
532 for k,v
in d.items():
538 elif old_len!=len(v):
539 raise ValueError(
"Cannot add rows: length of data must be equal " + \
540 "for all columns in %s"%str(d))
543 for i,data
in enumerate(zip(*list(d.values()))):
544 new_row = [
None for a
in range(len(self.
col_namescol_names))]
545 for idx,v
in zip(idxs,data):
550 overwrite_idx = self.
GetColIndexGetColIndex(overwrite)
552 for i,r
in enumerate(self.
rowsrows):
553 if r[overwrite_idx]==new_row[overwrite_idx]:
554 for j,e
in enumerate(self.
rowsrows[i]):
557 self.
rowsrows[i] = new_row
562 if not overwrite
or not added:
563 self.
rowsrows.append(new_row)
567 Two-sided test for the null-hypothesis that two related samples
568 have the same average (expected values).
570 :param col_a: First column
571 :type col_a: :class:`str`
572 :param col_b: Second column
573 :type col_b: :class:`str`
575 :returns: P-value between 0 and 1 that the two columns have the
576 same average. The smaller the value, the less related the two
579 from scipy.stats
import ttest_rel
582 for x, y
in self.
ZipZip(col_a, col_b):
583 if x!=
None and y!=
None:
586 result = ttest_rel(xs, ys)
591 Add a row to the table.
593 *data* may either be a dictionary or a list-like object:
595 - If *data* is a dictionary, the keys in the dictionary must match the
596 column names. Columns not found in the dict will be initialized to None.
597 If the dict contains list-like objects, multiple rows will be added, if
598 the number of items in all list-like objects is the same, otherwise a
599 :class:`ValueError` is raised.
601 - If *data* is a list-like object, the row is initialized from the values
602 in *data*. The number of items in *data* must match the number of
603 columns in the table. A :class:`ValuerError` is raised otherwise. The
604 values are added in the order specified in the list, thus, the order of
605 the data must match the columns.
607 If *overwrite* is not None and set to an existing column name, the specified
608 column in the table is searched for the first occurrence of a value matching
609 the value of the column with the same name in the dictionary. If a matching
610 value is found, the row is overwritten with the dictionary. If no matching
611 row is found, a new row is appended to the table.
613 :param data: data to add
614 :type data: :class:`dict` or *list-like* object
616 :param overwrite: column name to overwrite existing row if value in
617 column *overwrite* matches
618 :type overwrite: :class:`str`
620 :raises: :class:`ValueError` if *list-like* object is used and number of
621 items does *not* match number of columns in table.
623 :raises: :class:`ValueError` if *dict* is used and multiple rows are added
624 but the number of data items is different for different columns.
626 **Example:** add multiple data rows to a subset of columns using a dictionary
628 .. code-block:: python
630 # create table with three float columns
631 tab = Table(['x','y','z'], 'fff')
634 data = {'x': [1.2, 1.6], 'z': [1.6, 5.3]}
639 will produce the table
649 # overwrite the row with x=1.2 and add row with x=1.9
650 data = {'x': [1.2, 1.9], 'z': [7.9, 3.5]}
651 tab.AddRow(data, overwrite='x')
655 will produce the table
669 if len(data)!=len(self.
col_namescol_names):
670 msg=
'data array must have %d elements, not %d'
671 raise ValueError(msg % (len(self.
col_namescol_names), len(data)))
672 new_row = [self.
_Coerce_Coerce(v, t)
for v, t
in zip(data, self.
col_typescol_types)]
676 overwrite_idx = self.
GetColIndexGetColIndex(overwrite)
678 for i,r
in enumerate(self.
rowsrows):
679 if r[overwrite_idx]==new_row[overwrite_idx]:
680 self.
rowsrows[i] = new_row
685 if not overwrite
or not added:
686 self.
rowsrows.append(new_row)
690 Remove column with the given name from the table.
692 :param col: name of column to remove
693 :type col: :class:`str`
698 for row
in self.
rowsrows:
701 def AddCol(self, col_name, col_type, data=None):
703 Add a column to the right of the table.
705 :param col_name: name of new column
706 :type col_name: :class:`str`
708 :param col_type: type of new column (long versions: *int*, *float*, *bool*,
709 *string* or short versions: *i*, *f*, *b*, *s*)
710 :type col_type: :class:`str`
712 :param data: data to add to new column
713 :type data: scalar or iterable
717 .. code-block:: python
719 tab = Table(['x'], 'f', x=range(5))
720 tab.AddCol('even', 'bool', itertools.cycle([True, False]))
724 will produce the table
737 If data is a constant instead of an iterable object, it's value
738 will be written into each row:
740 .. code-block:: python
742 tab = Table(['x'], 'f', x=range(5))
743 tab.AddCol('num', 'i', 1)
747 will produce the table
760 As a special case, if there are no previous rows, and data is not
761 None, rows are added for every item in data.
765 raise ValueError(
'Column with name %s already exists'%col_name)
767 col_type = self.
_ParseColTypes_ParseColTypes(col_type, exp_num=1)[0]
771 if len(self.
rowsrows)>0:
773 for row
in self.
rowsrows:
776 if hasattr(data,
'__len__')
and len(data)!=len(self.
rowsrows):
779 raise ValueError(
'Length of data (%i) must correspond to number of '%len(data) +\
780 'existing rows (%i)'%len(self.
rowsrows))
781 for row, d
in zip(self.
rowsrows, data):
784 elif data!=
None and len(self.
col_namescol_names)==1:
786 self.
AddRowAddRow({col_name : data})
789 self.
AddRowAddRow({col_name : v})
793 Returns a filtered table only containing rows matching all the predicates
794 in kwargs and args For example,
796 .. code-block:: python
798 tab.Filter(town='Basel')
800 will return all the rows where the value of the column "town" is equal to
801 "Basel". Several predicates may be combined, i.e.
803 .. code-block:: python
805 tab.Filter(town='Basel', male=True)
807 will return the rows with "town" equal to "Basel" and "male" equal to true.
808 args are unary callables returning true if the row should be included in the
809 result and false if not.
812 for row
in self.
rowsrows:
818 for key, val
in kwargs.items():
830 Returns a new table object containing all rows matching a logical query
833 *query* is a string containing the logical expression, that will be
834 evaluated for every row.
836 Operands have to be the name of a column or an expression that can be
837 parsed to float, int, bool or string.
838 Valid operators are: and, or, !=, !, <=, >=, ==, =, <, >, +, -, \\*, /
840 .. code-block:: python
842 subtab = tab.Select('col_a>0.5 and (col_b=5 or col_c=5)')
844 The selection query should be self explaining. Allowed parenthesis are:
845 (), [], {}, whereas parenthesis mismatches get recognized. Expressions like
846 '3<=col_a>=col_b' throw an error, due to problems in figuring out the
849 There are two special expressions:
851 .. code-block:: python
853 #selects rows, where 1.0<=col_a<=1.5
854 subtab = tab.Select('col_a=1.0:1.5')
856 #selects rows, where col_a=1 or col_a=2 or col_a=3
857 subtab = tab.Select('col_a=1,2,3')
859 Only consistent types can be compared. If col_a is of type string and col_b
860 is of type int, following expression would throw an error: 'col_a<col_b'
864 from .table_selector
import TableSelector
866 raise ImportError(
"Tried to import from the file table_selector.py, but could not find it!")
872 for row
in self.
rowsrows:
873 if selector.EvaluateRow(row):
874 selected_tab.AddRow(row)
880 def _LoadOST(stream_or_filename):
881 fieldname_pattern=re.compile(
r'(?P<name>[^[]+)(\[(?P<type>\w+)\])?')
882 values_pattern=re.compile(
"([^\" ]+|\"[^\"]*\")+")
884 if not hasattr(stream_or_filename,
'read'):
885 stream=open(stream_or_filename,
'r')
888 stream=stream_or_filename
893 if line.startswith(
'#'):
901 for col
in line.split():
902 match=fieldname_pattern.match(col)
904 if match.group(
'type'):
905 fieldtypes.append(match.group(
'type'))
907 fieldtypes.append(
'string')
908 fieldnames.append(match.group(
'name'))
910 tab=
Table(fieldnames, fieldtypes)
911 except Exception
as e:
918 tab.AddRow([x.strip(
'"')
for x
in values_pattern.findall(line)])
922 raise IOError(
"Cannot read table from empty stream")
925 def _GuessColumnTypes(self):
926 for col_idx
in range(len(self.
col_namescol_names)):
928 for row
in self.
rowsrows:
929 for idx
in range(len(row)):
933 def _LoadCSV(stream_or_filename, sep):
935 if not hasattr(stream_or_filename,
'read'):
936 stream=open(stream_or_filename,
'r')
939 stream=stream_or_filename
940 reader=csv.reader(stream, delimiter=sep)
946 tab=
Table(header, types)
953 raise IOError(
'trying to load table from empty CSV stream/file')
955 tab._GuessColumnTypes()
959 def _LoadPickle(stream_or_filename):
961 if not hasattr(stream_or_filename,
'read'):
962 stream=open(stream_or_filename,
'rb')
965 stream=stream_or_filename
966 tab = pickle.load(stream)
972 def _GuessFormat(filename):
974 filename = filename.name
975 except AttributeError
as e:
977 if filename.endswith(
'.csv'):
979 elif filename.endswith(
'.pickle'):
986 def Load(stream_or_filename, format='auto', sep=','):
988 Load table from stream or file with given name.
990 By default, the file format is set to *auto*, which tries to guess the file
991 format from the file extension. The following file extensions are
994 ============ ======================
995 extension recognized format
996 ============ ======================
997 .csv comma separated values
998 .pickle pickled byte stream
999 <all others> ost-specific format
1000 ============ ======================
1002 Thus, *format* must be specified for reading file with different filename
1005 The following file formats are understood:
1009 This is an ost-specific, but still human readable file format. The file
1010 (stream) must start with header line of the form
1012 col_name1[type1] <col_name2[type2]>...
1014 The types given in brackets must be one of the data types the
1015 :class:`Table` class understands. Each following line in the file then must
1016 contains exactly the same number of data items as listed in the header. The
1017 data items are automatically converted to the column format. Lines starting
1018 with a '#' and empty lines are ignored.
1022 Deserializes the table from a pickled byte stream.
1026 Reads the table from comma separated values stream. Since there is no
1027 explicit type information in the csv file, the column types are guessed,
1028 using the following simple rules:
1030 * if all values are either NA/NULL/NONE the type is set to string.
1031 * if all non-null values are convertible to float/int the type is set to
1033 * if all non-null values are true/false/yes/no, the value is set to bool.
1034 * for all other cases, the column type is set to string.
1036 :returns: A new :class:`Table` instance
1038 format=format.lower()
1040 format = Table._GuessFormat(stream_or_filename)
1043 return Table._LoadOST(stream_or_filename)
1045 return Table._LoadCSV(stream_or_filename, sep=sep)
1046 if format==
'pickle':
1047 return Table._LoadPickle(stream_or_filename)
1048 raise ValueError(
'unknown format ""' % format)
1052 Performs an in-place sort of the table, based on column *by*.
1054 :param by: column name by which to sort
1055 :type by: :class:`str`
1057 :param order: ascending (``-``) or descending (``+``) order
1058 :type order: :class:`str` (i.e. *+*, *-*)
1064 def _key_cmp(lhs, rhs):
1069 if a
is None or b
is None:
1070 if a
is None and b
is not None:
1072 if b
is None and a
is not None:
1075 return sign*((a > b) - (a < b))
1078 self.
rowsrows=sorted(self.
rowsrows, key=functools.cmp_to_key(_key_cmp))
1082 Extract a list of all unique values from one column.
1084 :param col: column name
1085 :type col: :class:`str`
1087 :param ignore_nan: ignore all *None* values
1088 :type ignore_nan: :class:`bool`
1093 for row
in self.
rowsrows:
1095 if item!=
None or ignore_nan==
False:
1096 if item
in seen:
continue
1103 Allows to conveniently iterate over a selection of columns, e.g.
1105 .. code-block:: python
1107 tab = Table.Load('...')
1108 for col1, col2 in tab.Zip('col1', 'col2'):
1113 .. code-block:: python
1115 tab = Table.Load('...')
1116 for col1, col2 in zip(tab['col1'], tab['col2']):
1119 return list(zip(*[self[arg]
for arg
in args]))
1121 def Plot(self, x, y=None, z=None, style='.', x_title=None, y_title=None,
1122 z_title=None, x_range=None, y_range=None, z_range=None,
1123 color=None, plot_if=None, legend=None,
1124 num_z_levels=10, z_contour=True, z_interpol='nn', diag_line=False,
1125 labels=None, max_num_labels=None, title=None, clear=True, save=False,
1128 Function to plot values from your table in 1, 2 or 3 dimensions using
1129 `Matplotlib <http://matplotlib.sourceforge.net>`__
1131 :param x: column name for first dimension
1132 :type x: :class:`str`
1134 :param y: column name for second dimension
1135 :type y: :class:`str`
1137 :param z: column name for third dimension
1138 :type z: :class:`str`
1140 :param style: symbol style (e.g. *.*, *-*, *x*, *o*, *+*, *\\**). For a
1141 complete list check (`matplotlib docu <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.plot>`__).
1142 :type style: :class:`str`
1144 :param x_title: title for first dimension, if not specified it is
1145 automatically derived from column name
1146 :type x_title: :class:`str`
1148 :param y_title: title for second dimension, if not specified it is
1149 automatically derived from column name
1150 :type y_title: :class:`str`
1152 :param z_title: title for third dimension, if not specified it is
1153 automatically derived from column name
1154 :type z_title: :class:`str`
1156 :param x_range: start and end value for first dimension (e.g. [start_x, end_x])
1157 :type x_range: :class:`list` of length two
1159 :param y_range: start and end value for second dimension (e.g. [start_y, end_y])
1160 :type y_range: :class:`list` of length two
1162 :param z_range: start and end value for third dimension (e.g. [start_z, end_z])
1163 :type z_range: :class:`list` of length two
1165 :param color: color for data (e.g. *b*, *g*, *r*). For a complete list check
1166 (`matplotlib docu <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.plot>`__).
1167 :type color: :class:`str`
1169 :param plot_if: callable which returnes *True* if row should be plotted. Is
1170 invoked like ``plot_if(self, row)``
1171 :type plot_if: callable
1173 :param legend: legend label for data series
1174 :type legend: :class:`str`
1176 :param num_z_levels: number of levels for third dimension
1177 :type num_z_levels: :class:`int`
1179 :param diag_line: draw diagonal line
1180 :type diag_line: :class:`bool`
1182 :param labels: column name containing labels to put on x-axis for one
1184 :type labels: :class:`str`
1186 :param max_num_labels: limit maximum number of labels
1187 :type max_num_labels: :class:`int`
1189 :param title: plot title, if not specified it is automatically derived from
1190 plotted column names
1191 :type title: :class:`str`
1193 :param clear: clear old data from plot
1194 :type clear: :class:`bool`
1196 :param save: filename for saving plot
1197 :type save: :class:`str`
1199 :param z_contour: draw contour lines
1200 :type z_contour: :class:`bool`
1202 :param z_interpol: interpolation method for 3-dimensional plot (one of 'nn',
1204 :type z_interpol: :class:`str`
1206 :param \\*\\*kwargs: additional arguments passed to matplotlib
1208 :returns: the ``matplotlib.pyplot`` module
1210 **Examples:** simple plotting functions
1212 .. code-block:: python
1214 tab = Table(['a','b','c','d'],'iffi', a=range(5,0,-1),
1215 b=[x/2.0 for x in range(1,6)],
1216 c=[math.cos(x) for x in range(0,5)],
1219 # one dimensional plot of column 'd' vs. index
1223 # two dimensional plot of 'a' vs. 'c'
1224 plt = tab.Plot('a', y='c', style='o-')
1227 # three dimensional plot of 'a' vs. 'c' with values 'b'
1228 plt = tab.Plot('a', y='c', z='b')
1229 # manually save plot to file
1230 plt.savefig("plot.png")
1233 import matplotlib.pyplot
as plt
1234 import matplotlib.mlab
as mlab
1242 plt.figure(figsize=[8, 6])
1265 if x_range
and (
IsScalar(x_range)
or len(x_range)!=2):
1266 raise ValueError(
'parameter x_range must contain exactly two elements')
1267 if y_range
and (
IsScalar(y_range)
or len(y_range)!=2):
1268 raise ValueError(
'parameter y_range must contain exactly two elements')
1269 if z_range
and (
IsScalar(z_range)
or len(z_range)!=2):
1270 raise ValueError(
'parameter z_range must contain exactly two elements')
1273 kwargs[
'color']=color
1275 kwargs[
'label']=legend
1279 for row
in self.
rowsrows:
1280 if row[idx1]!=
None and row[idx2]!=
None and row[idx3]!=
None:
1281 if plot_if
and not plot_if(self, row):
1283 xs.append(row[idx1])
1284 ys.append(row[idx2])
1285 zs.append(row[idx3])
1288 z_spacing = (z_range[1] - z_range[0]) / num_z_levels
1292 z_spacing = (self.
MaxMax(z) - l) / num_z_levels
1294 for i
in range(0,num_z_levels+1):
1298 xi = np.linspace(min(xs),max(xs),len(xs)*10)
1299 yi = np.linspace(min(ys),max(ys),len(ys)*10)
1300 zi = mlab.griddata(xs, ys, zs, xi, yi, interp=z_interpol)
1303 plt.contour(xi,yi,zi,levels,linewidths=0.5,colors=
'k')
1305 plt.contourf(xi,yi,zi,levels,cmap=plt.cm.jet)
1306 plt.colorbar(ticks=levels)
1310 for row
in self.
rowsrows:
1311 if row[idx1]!=
None and row[idx2]!=
None:
1312 if plot_if
and not plot_if(self, row):
1314 xs.append(row[idx1])
1315 ys.append(row[idx2])
1316 plt.plot(xs, ys, style, **kwargs)
1323 for row
in self.
rowsrows:
1325 if plot_if
and not plot_if(self, row):
1327 xs.append(row[idx1])
1329 label_vals.append(row[label_idx])
1330 plt.plot(xs, style, **kwargs)
1334 if len(label_vals)>max_num_labels:
1335 interval = int(math.ceil(float(len(label_vals))/max_num_labels))
1336 label_vals = label_vals[::interval]
1337 plt.xticks(np.arange(0, len(xs), interval), label_vals, rotation=45,
1342 title =
'%s of %s vs. %s' % (nice_z, nice_x, nice_y)
1344 title =
'%s vs. %s' % (nice_x, nice_y)
1348 plt.title(title, size=
'x-large', fontweight=
'bold',
1349 verticalalignment=
'bottom')
1355 plt.xlabel(nice_x, size=
'x-large')
1357 plt.xlim(x_range[0], x_range[1])
1359 plt.ylim(y_range[0], y_range[1])
1361 plt.plot(x_range, y_range,
'-', color=
'black')
1363 plt.ylabel(nice_y, size=
'x-large')
1366 plt.ylim(y_range[0], y_range[1])
1368 plt.xlabel(x_title, size=
'x-large')
1369 plt.ylabel(nice_y, size=
'x-large')
1374 LogError(
"Function needs numpy and matplotlib, but I could not import it.")
1378 histtype='stepfilled', align='mid', x_title=None,
1379 y_title=None, title=None, clear=True, save=False,
1380 color=None, y_range=None):
1382 Create a histogram of the data in col for the range *x_range*, split into
1383 *num_bins* bins and plot it using Matplotlib.
1385 :param col: column name with data
1386 :type col: :class:`str`
1388 :param x_range: start and end value for first dimension (e.g. [start_x, end_x])
1389 :type x_range: :class:`list` of length two
1391 :param y_range: start and end value for second dimension (e.g. [start_y, end_y])
1392 :type y_range: :class:`list` of length two
1394 :param num_bins: number of bins in range
1395 :type num_bins: :class:`int`
1397 :param color: Color to be used for the histogram. If not set, color will be
1398 determined by matplotlib
1399 :type color: :class:`str`
1401 :param normed: normalize histogram
1402 :type normed: :class:`bool`
1404 :param histtype: type of histogram (i.e. *bar*, *barstacked*, *step*,
1405 *stepfilled*). See (`matplotlib docu <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.hist>`__).
1406 :type histtype: :class:`str`
1408 :param align: style of histogram (*left*, *mid*, *right*). See
1409 (`matplotlib docu <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.hist>`__).
1410 :type align: :class:`str`
1412 :param x_title: title for first dimension, if not specified it is
1413 automatically derived from column name
1414 :type x_title: :class:`str`
1416 :param y_title: title for second dimension, if not specified it is
1417 automatically derived from column name
1418 :type y_title: :class:`str`
1420 :param title: plot title, if not specified it is automatically derived from
1421 plotted column names
1422 :type title: :class:`str`
1424 :param clear: clear old data from plot
1425 :type clear: :class:`bool`
1427 :param save: filename for saving plot
1428 :type save: :class:`str`
1430 **Examples:** simple plotting functions
1432 .. code-block:: python
1434 tab = Table(['a'],'f', a=[math.cos(x*0.01) for x in range(100)])
1436 # one dimensional plot of column 'd' vs. index
1437 plt = tab.PlotHistogram('a')
1442 import matplotlib.pyplot
as plt
1445 if len(self.
rowsrows)==0:
1449 kwargs[
'color']=color
1452 for r
in self.
rowsrows:
1459 n, bins, patches = plt.hist(data, bins=num_bins, range=x_range,
1460 normed=normed, histtype=histtype, align=align,
1467 plt.xlabel(nice_x, size=
'x-large')
1474 plt.ylabel(nice_y, size=
'x-large')
1479 nice_title=
"Histogram of %s"%nice_x
1480 plt.title(nice_title, size=
'x-large', fontweight=
'bold')
1486 LogError(
"Function needs numpy and matplotlib, but I could not import it.")
1489 def _Max(self, col):
1490 if len(self.
rowsrows)==0:
1494 if col_type==
'int' or col_type==
'float':
1495 max_val = -float(
'inf')
1496 elif col_type==
'bool':
1498 elif col_type==
'string':
1501 for i
in range(0, len(self.
rowsrows)):
1502 val = self.
rowsrows[i][idx]
1503 if val
and val > max_val:
1504 max_val = self.
rowsrows[i][idx]
1506 return max_val, max_idx
1508 def PlotBar(self, cols=None, rows=None, xlabels=None, set_xlabels=True, xlabels_rotation='horizontal', y_title=None, title=None,
1509 colors=None, width=0.8, bottom=0, legend=False, legend_names=None, show=False, save=False):
1512 Create a barplot of the data in cols. Every column will be represented
1513 at one position. If there are several rows, each column will be grouped
1516 :param cols: List of column names. Every column will be represented as a
1517 single bar. If cols is None, every column of the table gets
1519 :type cols: :class:`list`
1521 :param rows: List of row indices. Values from given rows will be plotted
1522 in parallel at one column position. If set to None, all rows
1523 of the table will be plotted. Note, that the maximum number
1525 :type rows: :class:`list`
1527 :param xlabels: Label for every col on x-axis. If set to None, the column
1528 names are used. The xlabel plotting can be supressed by
1529 the parameter set_xlabel.
1530 :type xlabels: :class:`list`
1532 :param set_xlabels: Controls whether xlabels are plotted or not.
1533 :type set_xlabels: :class:`bool`
1535 :param x_labels_rotation: Can either be 'horizontal', 'vertical' or an
1536 integer, that describes the rotation in degrees.
1538 :param y_title: Y-axis description
1539 :type y_title: :class:`str`
1541 :title: Title of the plot. No title appears if set to None
1542 :type title: :class:`str`
1544 :param colors: Colors of the different bars in each group. Must be a list
1545 of valid colors in matplotlib. Length of color and rows must
1547 :type colors: :class:`list`
1549 :param width: The available space for the groups on the x-axis is divided
1550 by the exact number of groups. The parameters width is the
1551 fraction of what is actually used. If it would be 1.0 the
1552 bars of the different groups would touch each other.
1553 Value must be between [0;1]
1554 :type width: :class:`float`
1556 :param bottom: Bottom
1557 :type bottom: :class:`float`
1559 :param legend: Legend for color explanation, the corresponding row
1560 respectively. If set to True, legend_names must be provided.
1561 :type legend: :class:`bool`
1563 :param legend_names: List of names, that describe the differently colored
1564 bars. Length must be consistent with number of rows.
1566 :param show: If set to True, the plot is directly displayed.
1568 :param save: If set, a png image with name save in the current working
1569 directory will be saved.
1570 :type save: :class:`str`
1575 import matplotlib.pyplot
as plt
1577 raise ImportError(
'PlotBar relies on numpy and matplotlib, but I could' \
1580 standard_colors=[
'b',
'g',
'y',
'c',
'm',
'r',
'k']
1586 if width<=0
or width>1:
1587 raise ValueError(
'Width must be in [0;1]')
1590 if len(self.
rowsrows)>7:
1591 raise ValueError(
'Table contains too many rows to represent them at one '\
1592 'bar position in parallel. You can Select a Subtable or '\
1593 'specify the parameter rows with a list of row indices '\
1596 rows=list(range(len(self.
rowsrows)))
1598 if not isinstance(rows,list):
1601 raise ValueError(
'Too many rows to represent (max 7). Please note, that '\
1602 'data from multiple rows from one column gets '\
1603 'represented at one position in parallel.')
1606 row=self.
rowsrows[r_idx]
1612 raise ValueError(
'Cannot find column with name '+str(c))
1613 temp.append(row[c_idx])
1617 colors=standard_colors[:len(rows)]
1619 if len(rows)!=len(colors):
1620 raise ValueError(
"Number of rows and number of colors must be consistent!")
1622 ind=np.arange(len(data[0]))
1623 single_bar_width=float(width)/len(data)
1626 ax=fig.add_subplot(111)
1629 for i
in range(len(data)):
1630 legend_data.append(ax.bar(ind+i*single_bar_width+(1-width)/2,data[i],single_bar_width,bottom=bottom,color=colors[i])[0])
1633 ax.set_title(title, size=
'x-large', fontweight=
'bold')
1639 ax.set_ylabel(nice_y)
1642 if len(data[0])!=len(xlabels):
1643 raise ValueError(
'Number of xlabels is not consistent with number of cols!')
1648 ax.set_xticks(ind+0.5)
1649 ax.set_xticklabels(xlabels, rotation = xlabels_rotation)
1654 if legend_names==
None:
1655 raise ValueError(
'You must provide legend names! e.g. names for the rows, '\
1656 'that are printed in parallel.')
1657 if len(legend_names)!=len(data):
1658 raise ValueError(
'length of legend_names must be consistent with number '\
1660 ax.legend(legend_data, legend_names)
1670 def PlotHexbin(self, x, y, title=None, x_title=None, y_title=None, x_range=None, y_range=None, binning='log',
1671 colormap='jet', show_scalebar=False, scalebar_label=None, clear=True, save=False, show=False):
1674 Create a heatplot of the data in col x vs the data in col y using matplotlib
1676 :param x: column name with x data
1677 :type x: :class:`str`
1679 :param y: column name with y data
1680 :type y: :class:`str`
1682 :param title: title of the plot, will be generated automatically if set to None
1683 :type title: :class:`str`
1685 :param x_title: label of x-axis, will be generated automatically if set to None
1686 :type title: :class:`str`
1688 :param y_title: label of y-axis, will be generated automatically if set to None
1689 :type title: :class:`str`
1691 :param x_range: start and end value for first dimension (e.g. [start_x, end_x])
1692 :type x_range: :class:`list` of length two
1694 :param y_range: start and end value for second dimension (e.g. [start_y, end_y])
1695 :type y_range: :class:`list` of length two
1697 :param binning: type of binning. If set to None, the value of a hexbin will
1698 correspond to the number of datapoints falling into it. If
1699 set to 'log', the value will be the log with base 10 of the above
1700 value (log(i+1)). If an integer is provided, the number of a
1701 hexbin is equal the number of datapoints falling into it divided
1702 by the integer. If a list of values is provided, these values
1703 will be the lower bounds of the bins.
1705 :param colormap: colormap, that will be used. Value can be every colormap defined
1706 in matplotlib or an own defined colormap. You can either pass a
1707 string with the name of the matplotlib colormap or a colormap
1710 :param show_scalebar: If set to True, a scalebar according to the chosen colormap is shown
1711 :type show_scalebar: :class:`bool`
1713 :param scalebar_label: Label of the scalebar
1714 :type scalebar_label: :class:`str`
1716 :param clear: clear old data from plot
1717 :type clear: :class:`bool`
1719 :param save: filename for saving plot
1720 :type save: :class:`str`
1722 :param show: directly show plot
1723 :type show: :class:`bool`
1728 import matplotlib.pyplot
as plt
1729 import matplotlib.cm
as cm
1731 raise ImportError(
'PlotHexbin relies on matplotlib, but I could not import it')
1738 for r
in self.
rowsrows:
1739 if r[idx]!=
None and r[idy]!=
None:
1740 xdata.append(r[idx])
1741 ydata.append(r[idy])
1757 title =
'%s vs. %s' % (nice_x, nice_y)
1760 colormap=getattr(cm, colormap)
1762 if x_range
and (
IsScalar(x_range)
or len(x_range)!=2):
1763 raise ValueError(
'parameter x_range must contain exactly two elements')
1764 if y_range
and (
IsScalar(y_range)
or len(y_range)!=2):
1765 raise ValueError(
'parameter y_range must contain exactly two elements')
1767 ext = [min(xdata),max(xdata),min(ydata),max(ydata)]
1770 plt.xlim((x_range[0], x_range[1]))
1774 plt.ylim(y_range[0], y_range[1])
1779 plt.hexbin(xdata, ydata, bins=binning, cmap=colormap, extent=ext)
1781 plt.title(title, size=
'x-large', fontweight=
'bold',
1782 verticalalignment=
'bottom')
1790 cb.set_label(scalebar_label)
1802 Returns the row containing the cell with the maximal value in col. If
1803 several rows have the highest value, only the first one is returned.
1804 ''None'' values are ignored.
1806 :param col: column name
1807 :type col: :class:`str`
1809 :returns: row with maximal col value or None if the table is empty
1811 val, idx = self.
_Max_Max(col)
1813 return self.
rowsrows[idx]
1817 Returns the maximum value in col. If several rows have the highest value,
1818 only the first one is returned. ''None'' values are ignored.
1820 :param col: column name
1821 :type col: :class:`str`
1823 val, idx = self.
_Max_Max(col)
1828 Returns the row index of the cell with the maximal value in col. If
1829 several rows have the highest value, only the first one is returned.
1830 ''None'' values are ignored.
1832 :param col: column name
1833 :type col: :class:`str`
1835 val, idx = self.
_Max_Max(col)
1838 def _Min(self, col):
1839 if len(self.
rowsrows)==0:
1843 if col_type==
'int' or col_type==
'float':
1844 min_val=float(
'inf')
1845 elif col_type==
'bool':
1847 elif col_type==
'string':
1850 for i,row
in enumerate(self.
rowsrows):
1851 if row[idx]!=
None and row[idx]<min_val:
1854 return min_val, min_idx
1858 Returns the minimal value in col. If several rows have the lowest value,
1859 only the first one is returned. ''None'' values are ignored.
1861 :param col: column name
1862 :type col: :class:`str`
1864 val, idx = self.
_Min_Min(col)
1869 Returns the row containing the cell with the minimal value in col. If
1870 several rows have the lowest value, only the first one is returned.
1871 ''None'' values are ignored.
1873 :param col: column name
1874 :type col: :class:`str`
1876 :returns: row with minimal col value or None if the table is empty
1878 val, idx = self.
_Min_Min(col)
1880 return self.
rowsrows[idx]
1884 Returns the row index of the cell with the minimal value in col. If
1885 several rows have the lowest value, only the first one is returned.
1886 ''None'' values are ignored.
1888 :param col: column name
1889 :type col: :class:`str`
1891 val, idx = self.
_Min_Min(col)
1896 Returns the sum of the given column. Cells with ''None'' are ignored. Returns
1897 0.0, if the column doesn't contain any elements. Col must be of numeric
1898 column type ('float', 'int') or boolean column type.
1900 :param col: column name
1901 :type col: :class:`str`
1903 :raises: :class:`TypeError` if column type is ``string``
1907 if col_type!=
'int' and col_type!=
'float' and col_type!=
'bool':
1908 raise TypeError(
"Sum can only be used on numeric column types")
1910 for r
in self.
rowsrows:
1917 Returns the mean of the given column. Cells with ''None'' are ignored. Returns
1918 None, if the column doesn't contain any elements. Col must be of numeric
1919 ('float', 'int') or boolean column type.
1921 If column type is *bool*, the function returns the ratio of
1922 number of 'Trues' by total number of elements.
1924 :param col: column name
1925 :type col: :class:`str`
1927 :raises: :class:`TypeError` if column type is ``string``
1931 if col_type!=
'int' and col_type!=
'float' and col_type!=
'bool':
1932 raise TypeError(
"Mean can only be used on numeric or bool column types")
1939 return stutil.Mean(vals)
1945 Adds a new column of type 'float' with a specified name (*mean_col_name*),
1946 containing the mean of all specified columns for each row.
1948 Cols are specified by their names and must be of numeric column
1949 type ('float', 'int') or boolean column type. Cells with None are ignored.
1950 Adds ''None'' if the row doesn't contain any values.
1952 :param mean_col_name: name of new column containing mean values
1953 :type mean_col_name: :class:`str`
1955 :param cols: name or list of names of columns to include in computation of
1957 :type cols: :class:`str` or :class:`list` of strings
1959 :raises: :class:`TypeError` if column type of columns in *col* is ``string``
1963 Staring with the following table:
1973 the code here adds a column with the name 'mean' to yield the table below:
1975 .. code-block::python
1977 tab.RowMean('mean', ['x', 'u'])
1980 ==== ==== ==== =====
1982 ==== ==== ==== =====
1986 ==== ==== ==== =====
1997 if col_type!=
'int' and col_type!=
'float' and col_type!=
'bool':
1998 raise TypeError(
"RowMean can only be used on numeric column types")
1999 cols_idxs.append(idx)
2002 for row
in self.
rowsrows:
2004 for idx
in cols_idxs:
2009 mean = stutil.Mean(vals)
2010 mean_rows.append(mean)
2012 mean_rows.append(
None)
2014 self.
AddColAddCol(mean_col_name,
'f', mean_rows)
2018 Returns the percentiles of column *col* given in *nths*.
2020 The percentiles are calculated as
2022 .. code-block:: python
2024 values[min(len(values)-1, int(math.floor(len(values)*nth/100.0)))]
2026 where values are the sorted values of *col* not equal to ''None''
2028 :param col: column name
2029 :type col: :class:`str`
2030 :param nths: list of percentiles to be calculated. Each percentile is a
2031 number between 0 and 100.
2032 :type nths: :class:`list` of numbers
2034 :raises: :class:`TypeError` if column type is ``string``
2035 :returns: List of percentiles in the same order as given in *nths*
2039 if col_type!=
'int' and col_type!=
'float' and col_type!=
'bool':
2040 raise TypeError(
"Median can only be used on numeric column types")
2043 if nth < 0
or nth > 100:
2044 raise ValueError(
"percentiles must be between 0 and 100")
2051 return [
None]*len(nths)
2057 p=vals[min(len(vals)-1, int(math.floor(len(vals)*nth/100.0)))]
2058 percentiles.append(p)
2063 Returns the median of the given column. Cells with ''None'' are ignored. Returns
2064 ''None'', if the column doesn't contain any elements. Col must be of numeric
2065 column type ('float', 'int') or boolean column type.
2067 :param col: column name
2068 :type col: :class:`str`
2070 :raises: :class:`TypeError` if column type is ``string``
2074 if col_type!=
'int' and col_type!=
'float' and col_type!=
'bool':
2075 raise TypeError(
"Median can only be used on numeric column types")
2083 return stutil.Median(vals)
2089 Returns the standard deviation of the given column. Cells with ''None'' are
2090 ignored. Returns ''None'', if the column doesn't contain any elements. Col must
2091 be of numeric column type ('float', 'int') or boolean column type.
2093 :param col: column name
2094 :type col: :class:`str`
2096 :raises: :class:`TypeError` if column type is ``string``
2100 if col_type!=
'int' and col_type!=
'float' and col_type!=
'bool':
2101 raise TypeError(
"StdDev can only be used on numeric column types")
2108 return stutil.StdDev(vals)
2114 Count the number of cells in column that are not equal to ''None''.
2116 :param col: column name
2117 :type col: :class:`str`
2119 :param ignore_nan: ignore all *None* values
2120 :type ignore_nan: :class:`bool`
2124 for r
in self.
rowsrows:
2134 Calculate the Pearson correlation coefficient between *col1* and *col2*, only
2135 taking rows into account where both of the values are not equal to *None*.
2136 If there are not enough data points to calculate a correlation coefficient,
2139 :param col1: column name for first column
2140 :type col1: :class:`str`
2142 :param col2: column name for second column
2143 :type col2: :class:`str`
2148 vals1, vals2=([],[])
2149 for v1, v2
in zip(self[col1], self[col2]):
2150 if v1!=
None and v2!=
None:
2154 return stutil.Correl(vals1, vals2)
2160 Calculate the Spearman correlation coefficient between col1 and col2, only
2161 taking rows into account where both of the values are not equal to None. If
2162 there are not enough data points to calculate a correlation coefficient,
2165 :warning: The function depends on the following module: *scipy.stats.mstats*
2167 :param col1: column name for first column
2168 :type col1: :class:`str`
2170 :param col2: column name for second column
2171 :type col2: :class:`str`
2174 import scipy.stats.mstats
2180 vals1, vals2=([],[])
2181 for v1, v2
in zip(self[col1], self[col2]):
2182 if v1!=
None and v2!=
None:
2186 correl = scipy.stats.mstats.spearmanr(vals1, vals2)[0]
2187 if np.isnan(correl):
2194 LogError(
"Function needs scipy.stats.mstats, but I could not import it.")
2198 def Save(self, stream_or_filename, format='ost', sep=','):
2200 Save the table to stream or filename. The following three file formats
2201 are supported (for more information on file formats, see :meth:`Load`):
2203 ============= =======================================
2204 ost ost-specific format (human readable)
2205 csv comma separated values (human readable)
2206 pickle pickled byte stream (binary)
2208 context ConTeXt table
2209 ============= =======================================
2211 :param stream_or_filename: filename or stream for writing output
2212 :type stream_or_filename: :class:`str` or :class:`file`
2214 :param format: output format (i.e. *ost*, *csv*, *pickle*)
2215 :type format: :class:`str`
2217 :raises: :class:`ValueError` if format is unknown
2219 format=format.lower()
2221 return self.
_SaveOST_SaveOST(stream_or_filename)
2223 return self.
_SaveCSV_SaveCSV(stream_or_filename, sep=sep)
2224 if format==
'pickle':
2225 return self.
_SavePickle_SavePickle(stream_or_filename)
2227 return self.
_SaveHTML_SaveHTML(stream_or_filename)
2228 if format==
'context':
2229 return self.
_SaveContext_SaveContext(stream_or_filename)
2230 raise ValueError(
'unknown format "%s"' % format)
2232 def _SavePickle(self, stream):
2234 if not hasattr(stream,
'write'):
2235 stream=open(stream,
'wb')
2237 pickle.dump(self, stream, pickle.HIGHEST_PROTOCOL)
2241 def _SaveHTML(self, stream_or_filename):
2243 return s.replace(
'&',
'&').replace(
'>',
'>').replace(
'<',
'<')
2246 if not hasattr(stream_or_filename,
'write'):
2247 stream = open(stream_or_filename,
'w')
2250 stream = stream_or_filename
2251 stream.write(
'<table>')
2252 stream.write(
'<tr>')
2253 for col_name
in self.
col_namescol_names:
2254 stream.write(
'<th>%s</th>' % _escape(col_name))
2255 stream.write(
'</tr>')
2256 for row
in self.
rowsrows:
2257 stream.write(
'<tr>')
2258 for i, col
in enumerate(row):
2261 if self.
col_typescol_types[i] ==
'float':
2263 elif self.
col_typescol_types[i] ==
'int':
2265 elif self.
col_typescol_types[i] ==
'bool':
2266 val = col
and 'true' or 'false'
2269 stream.write(
'<td>%s</td>' % _escape(val))
2270 stream.write(
'</tr>')
2271 stream.write(
'</table>')
2274 def _SaveContext(self, stream_or_filename):
2276 if not hasattr(stream_or_filename,
'write'):
2277 stream = open(stream_or_filename,
'w')
2280 stream = stream_or_filename
2281 stream.write(
'\\starttable[')
2282 for col_type
in self.
col_typescol_types:
2283 if col_type ==
'string':
2285 elif col_type==
'int':
2287 elif col_type ==
'float':
2288 stream.write(
'i3r|')
2291 stream.write(
']\n\\HL\n')
2292 for col_name
in self.
col_namescol_names:
2293 stream.write(
'\\NC \\bf %s' % col_name)
2294 stream.write(
' \\AR\\HL\n')
2295 for row
in self.
rowsrows:
2296 for i, col
in enumerate(row):
2299 if self.
col_typescol_types[i] ==
'float':
2301 elif self.
col_typescol_types[i] ==
'int':
2303 elif self.
col_typescol_types[i] ==
'bool':
2304 val = col
and 'true' or 'false'
2307 stream.write(
'\\NC %s' % val)
2308 stream.write(
' \\AR\n')
2309 stream.write(
'\\HL\n')
2310 stream.write(
'\\stoptable')
2314 def _SaveCSV(self, stream, sep):
2316 if not hasattr(stream,
'write'):
2317 stream=open(stream,
'w')
2320 writer=csv.writer(stream, delimiter=sep)
2321 writer.writerow([
'%s' % n
for n
in self.
col_namescol_names])
2322 for row
in self.
rowsrows:
2324 for i, c
in enumerate(row):
2327 writer.writerow(row)
2332 def _SaveOST(self, stream):
2334 if hasattr(stream,
'write'):
2335 writer=csv.writer(stream, delimiter=
' ')
2337 stream=open(stream,
'w')
2338 writer=csv.writer(stream, delimiter=
' ')
2341 stream.write(
''.join([
'# %s\n' % l
for l
in self.
commentcomment.split(
'\n')]))
2342 writer.writerow([
'%s[%s]' % t
for t
in zip(self.
col_namescol_names, self.
col_typescol_types)])
2343 for row
in self.
rowsrows:
2345 for i, c
in enumerate(row):
2348 writer.writerow(row)
2354 Returns a numpy array containing the selected columns from the table as
2355 columns as a matrix.
2357 Only columns of type *int* or *float* are supported. *NA* values in the
2358 table will be converted to *None* values.
2360 Originally the function used the numpy matrix class but that is going to be
2361 deprecated in the future. Numpy itself suggests replacing numpy matrix by
2364 :param \\*args: column names to include in numpy array
2366 :warning: The function depends on *numpy*
2372 raise RuntimeError(
"At least one column must be specified.")
2378 if col_type!=
'int' and col_type!=
'float':
2379 raise TypeError(
"Numpy matrix can only be generated from numeric "+\
2383 a = np.array([list(self[i])
for i
in idxs])
2387 LogError(
"Function needs numpy, but I could not import it.")
2392 *Caution*: Numpy is deprecating the use of the numpy matrix class.
2394 Returns a numpy matrix containing the selected columns from the table as
2395 columns in the matrix.
2397 Only columns of type *int* or *float* are supported. *NA* values in the
2398 table will be converted to *None* values.
2400 :param \\*args: column names to include in numpy matrix
2402 :warning: The function depends on *numpy*
2404 LogWarning(
"table.GetNumpyMatrix is deprecated, please use "+
2405 "table.GetNumpyMatrixAsArray instead")
2411 LogError(
"Function needs numpy, but I could not import it.")
2417 In place Gaussian smooth of a column in the table with a given standard deviation.
2418 All nan are set to nan_value before smoothing.
2420 :param col: column name
2421 :type col: :class:`str`
2423 :param std: standard deviation for gaussian kernel
2426 :param na_value: all na (None) values of the speciefied column are set to na_value before smoothing
2427 :type na_value: `scalar`
2429 :param padding: allows to handle padding behaviour see scipy ndimage.gaussian_filter1d documentation for more information. standard is reflect
2430 :type padding: :class:`str`
2432 :param c: constant value used for padding if padding mode is constant
2437 :warning: The function depends on *scipy*
2441 from scipy
import ndimage
2444 LogError(
"I need scipy.ndimage and numpy, but could not import it")
2449 if col_type!=
'int' and col_type!=
'float':
2450 raise TypeError(
"GaussianSmooth can only be used on numeric column types")
2457 vals.append(na_value)
2460 smoothed_values_ndarray=ndimage.gaussian_filter1d(vals,std, mode=padding, cval=c)
2464 for v
in smoothed_values_ndarray:
2472 This returns the optimal prefactor values (i.e. :math:`a, b, c, ...`) for
2473 the following equation
2478 a*u + b*v + c*w + ... = z
2480 where :math:`u, v, w` and :math:`z` are vectors. In matrix notation
2487 where :math:`A` contains the data from the table :math:`(u,v,w,...)`,
2488 :math:`p` are the prefactors to optimize :math:`(a,b,c,...)` and :math:`z`
2489 is the vector containing the result of equation :eq:`op1`.
2491 The parameter ref_col equals to :math:`z` in both equations, and \\*args
2492 are columns :math:`u`, :math:`v` and :math:`w` (or :math:`A` in :eq:`op2`).
2493 All columns must be specified by their names.
2497 .. code-block:: python
2499 tab.GetOptimalPrefactors('colC', 'colA', 'colB')
2501 The function returns a list containing the prefactors
2502 :math:`a, b, c, ...` in the correct order (i.e. same as columns were
2503 specified in \\*args).
2506 If the kwarg weights="columX" is specified, the equations are weighted by
2507 the values in that column. Each row is multiplied by the weight in that
2508 row, which leads to :eq:`op3`:
2513 \\textit{weight}*a*u + \\textit{weight}*b*v + \\textit{weight}*c*w + ...
2514 = \\textit{weight}*z
2516 Weights must be float or int and can have any value. A value of 0 ignores
2517 this equation, a value of 1 means the same as no weight. If all weights are
2518 the same for each row, the same result will be obtained as with no weights.
2522 .. code-block:: python
2524 tab.GetOptimalPrefactors('colC', 'colA', 'colB', weights='colD')
2531 raise RuntimeError(
"At least one column must be specified.")
2537 if 'weights' in kwargs:
2539 b = np.multiply(b,w)
2540 a = np.multiply(a,w)
2543 raise RuntimeError(
"specified unrecognized kwargs, use weights as key")
2545 k = np.linalg.inv(a.T
@a)
@a.T@b
2546 return list(k.T.reshape(-1))
2549 LogError(
"Function needs numpy, but I could not import it.")
2553 class_dir='-', class_cutoff=2.0,
2554 style='-', title=None, x_title=None, y_title=None,
2555 clear=True, save=None):
2557 Plot an enrichment curve using matplotlib of column *score_col* classified
2558 according to *class_col*.
2560 For more information about parameters of the enrichment, see
2561 :meth:`ComputeEnrichment`, and for plotting see :meth:`Plot`.
2563 :warning: The function depends on *matplotlib*
2566 import matplotlib.pyplot
as plt
2568 enrx, enry = self.
ComputeEnrichmentComputeEnrichment(score_col, class_col, score_dir,
2569 class_dir, class_cutoff)
2572 title =
'Enrichment of %s'%score_col
2575 x_title =
'% database'
2578 y_title =
'% positives'
2583 plt.plot(enrx, enry, style)
2585 plt.title(title, size=
'x-large', fontweight=
'bold')
2586 plt.ylabel(y_title, size=
'x-large')
2587 plt.xlabel(x_title, size=
'x-large')
2594 LogError(
"Function needs matplotlib, but I could not import it.")
2598 class_dir='-', class_cutoff=2.0):
2600 Computes the enrichment of column *score_col* classified according to
2603 For this it is necessary, that the datapoints are classified into positive
2604 and negative points. This can be done in two ways:
2606 - by using one 'bool' type column (*class_col*) which contains *True* for
2607 positives and *False* for negatives
2609 - by specifying a classification column (*class_col*), a cutoff value
2610 (*class_cutoff*) and the classification columns direction (*class_dir*).
2611 This will generate the classification on the fly
2613 * if ``class_dir=='-'``: values in the classification column that are less than or equal to class_cutoff will be counted as positives
2614 * if ``class_dir=='+'``: values in the classification column that are larger than or equal to class_cutoff will be counted as positives
2616 During the calculation, the table will be sorted according to *score_dir*,
2617 where a '-' values means smallest values first and therefore, the smaller
2618 the value, the better.
2620 :warning: If either the value of *class_col* or *score_col* is *None*, the
2621 data in this row is ignored.
2624 ALLOWED_DIR = [
'+',
'-']
2626 score_idx = self.
GetColIndexGetColIndex(score_col)
2627 score_type = self.
col_typescol_types[score_idx]
2628 if score_type!=
'int' and score_type!=
'float':
2629 raise TypeError(
"Score column must be numeric type")
2631 class_idx = self.
GetColIndexGetColIndex(class_col)
2632 class_type = self.
col_typescol_types[class_idx]
2633 if class_type!=
'int' and class_type!=
'float' and class_type!=
'bool':
2634 raise TypeError(
"Classifier column must be numeric or bool type")
2636 if (score_dir
not in ALLOWED_DIR)
or (class_dir
not in ALLOWED_DIR):
2637 raise ValueError(
"Direction must be one of %s"%str(ALLOWED_DIR))
2639 self.
SortSort(score_col, score_dir)
2644 old_score_val =
None
2647 for row
in self.
rowsrows:
2648 class_val = row[class_idx]
2649 score_val = row[score_idx]
2650 if class_val==
None or score_val==
None:
2653 if old_score_val==
None:
2654 old_score_val = score_val
2655 if score_val!=old_score_val:
2658 old_score_val = score_val
2660 if class_type==
'bool':
2664 if (class_dir==
'-' and class_val<=class_cutoff)
or (class_dir==
'+' and class_val>=class_cutoff):
2670 if x[-1]==0
or y[-1]==0:
2673 x = [float(v)/x[-1]
for v
in x]
2674 y = [float(v)/y[-1]
for v
in y]
2678 class_dir='-', class_cutoff=2.0):
2680 Computes the area under the curve of the enrichment using the trapezoidal
2683 For more information about parameters of the enrichment, see
2684 :meth:`ComputeEnrichment`.
2686 :warning: The function depends on *numpy*
2692 class_dir, class_cutoff)
2696 return np.trapz(enr[1], enr[0])
2698 LogError(
"Function needs numpy, but I could not import it.")
2702 class_dir='-', class_cutoff=2.0):
2704 Computes the receiver operating characteristics (ROC) of column *score_col*
2705 classified according to *class_col*.
2707 For this it is necessary, that the datapoints are classified into positive
2708 and negative points. This can be done in two ways:
2710 - by using one 'bool' column (*class_col*) which contains True for positives
2711 and False for negatives
2712 - by using a non-bool column (*class_col*), a cutoff value (*class_cutoff*)
2713 and the classification columns direction (*class_dir*). This will generate
2714 the classification on the fly
2716 - if ``class_dir=='-'``: values in the classification column that are less than or equal to *class_cutoff* will be counted as positives
2717 - if ``class_dir=='+'``: values in the classification column that are larger than or equal to *class_cutoff* will be counted as positives
2719 During the calculation, the table will be sorted according to *score_dir*,
2720 where a '-' values means smallest values first and therefore, the smaller
2721 the value, the better.
2723 If *class_col* does not contain any positives (i.e. value is True (if column
2724 is of type bool) or evaluated to True (if column is of type int or float
2725 (depending on *class_dir* and *class_cutoff*))) the ROC is not defined and
2726 the function will return *None*.
2728 :warning: If either the value of *class_col* or *score_col* is *None*, the
2729 data in this row is ignored.
2732 ALLOWED_DIR = [
'+',
'-']
2734 score_idx = self.
GetColIndexGetColIndex(score_col)
2735 score_type = self.
col_typescol_types[score_idx]
2736 if score_type!=
'int' and score_type!=
'float':
2737 raise TypeError(
"Score column must be numeric type")
2739 class_idx = self.
GetColIndexGetColIndex(class_col)
2740 class_type = self.
col_typescol_types[class_idx]
2741 if class_type!=
'int' and class_type!=
'float' and class_type!=
'bool':
2742 raise TypeError(
"Classifier column must be numeric or bool type")
2744 if (score_dir
not in ALLOWED_DIR)
or (class_dir
not in ALLOWED_DIR):
2745 raise ValueError(
"Direction must be one of %s"%str(ALLOWED_DIR))
2747 self.
SortSort(score_col, score_dir)
2753 old_score_val =
None
2755 for i,row
in enumerate(self.
rowsrows):
2756 class_val = row[class_idx]
2757 score_val = row[score_idx]
2758 if class_val==
None or score_val==
None:
2761 if old_score_val==
None:
2762 old_score_val = score_val
2763 if score_val!=old_score_val:
2766 old_score_val = score_val
2767 if class_type==
'bool':
2773 if (class_dir==
'-' and class_val<=class_cutoff)
or (class_dir==
'+' and class_val>=class_cutoff):
2781 if x[-1]==0
or y[-1]==0:
2784 x = [float(v)/x[-1]
for v
in x]
2785 y = [float(v)/y[-1]
for v
in y]
2789 class_dir='-', class_cutoff=2.0):
2791 Computes the area under the curve of the receiver operating characteristics
2792 using the trapezoidal rule.
2794 For more information about parameters of the ROC, see
2797 :warning: The function depends on *numpy*
2802 roc = self.
ComputeROCComputeROC(score_col, class_col, score_dir,
2803 class_dir, class_cutoff)
2807 return np.trapz(roc[1], roc[0])
2809 LogError(
"Function needs numpy, but I could not import it.")
2813 class_dir='-', class_cutoff=2.0):
2815 Computes the area under the curve of the log receiver operating
2816 characteristics (logROC) where the x-axis is semilogarithmic
2817 using the trapezoidal rule.
2819 The logROC is computed with a lambda of 0.001 according to
2820 Rapid Context-Dependent Ligand Desolvation in Molecular Docking
2821 Mysinger M. and Shoichet B., Journal of Chemical Information and Modeling
2822 2010 50 (9), 1561-1573
2824 For more information about parameters of the ROC, see
2827 :warning: The function depends on *numpy*
2832 roc = self.
ComputeROCComputeROC(score_col, class_col, score_dir,
2833 class_dir, class_cutoff)
2846 rocxt = [x
if x>0
else l
for x
in rocxt]
2847 for i
in range(len(rocxt)-1):
2848 if rocxt[i]==rocxt[i+1]:
2850 rocx.append(rocxt[i])
2851 rocy.append(rocyt[i])
2857 for i
in range(len(rocx)-1):
2859 if rocx[i]==rocx[i+1]:
2861 b = rocy[i+1]-rocx[i+1]*((rocy[i+1]-rocy[i])/(rocx[i+1]-rocx[i]))
2862 value += ((rocy[i+1]-rocy[i])/math.log(10))+b*(math.log10(rocx[i+1])-math.log10(rocx[i]))
2863 return value/math.log10(1.0/l)
2866 LogError(
"Function needs numpy, but I could not import it.")
2869 def PlotROC(self, score_col, class_col, score_dir='-',
2870 class_dir='-', class_cutoff=2.0,
2871 style='-', title=None, x_title=None, y_title=None,
2872 clear=True, save=None):
2874 Plot an ROC curve using matplotlib.
2876 For more information about parameters of the ROC, see
2877 :meth:`ComputeROC`, and for plotting see :meth:`Plot`.
2879 :warning: The function depends on *matplotlib*
2883 import matplotlib.pyplot
as plt
2885 roc = self.
ComputeROCComputeROC(score_col, class_col, score_dir,
2886 class_dir, class_cutoff)
2894 title =
'ROC of %s'%score_col
2897 x_title =
'false positive rate'
2900 y_title =
'true positive rate'
2905 plt.plot(enrx, enry, style)
2907 plt.title(title, size=
'x-large', fontweight=
'bold')
2908 plt.ylabel(y_title, size=
'x-large')
2909 plt.xlabel(x_title, size=
'x-large')
2916 LogError(
"Function needs matplotlib, but I could not import it.")
2920 class_dir='-', class_cutoff=2.0,
2921 style='-', title=None, x_title=None, y_title=None,
2922 clear=True, save=None):
2924 Plot an logROC curve where the x-axis is semilogarithmic using matplotlib
2926 For more information about parameters of the ROC, see
2927 :meth:`ComputeROC`, and for plotting see :meth:`Plot`.
2929 :warning: The function depends on *matplotlib*
2933 import matplotlib.pyplot
as plt
2935 roc = self.
ComputeROCComputeROC(score_col, class_col, score_dir,
2936 class_dir, class_cutoff)
2944 title =
'logROC of %s'%score_col
2947 x_title =
'false positive rate'
2950 y_title =
'true positive rate'
2955 rocx = [x
if x>0
else 0.001
for x
in rocx]
2958 plt.plot(rocx, rocy, style)
2960 plt.title(title, size=
'x-large', fontweight=
'bold')
2961 plt.ylabel(y_title, size=
'x-large')
2962 plt.xlabel(x_title, size=
'x-large')
2964 plt.xscale(
'log', basex=10)
2966 plt.xscale(
'log', base=10)
2967 plt.xlim(0.001, 1.0)
2974 LogError(
"Function needs matplotlib, but I could not import it.")
2978 class_dir='-', score_cutoff=2.0, class_cutoff=2.0):
2980 Compute Matthews correlation coefficient (MCC) for one column (*score_col*)
2981 with the points classified into true positives, false positives, true
2982 negatives and false negatives according to a specified classification
2983 column (*class_col*).
2985 The datapoints in *score_col* and *class_col* are classified into
2986 positive and negative points. This can be done in two ways:
2988 - by using 'bool' columns which contains True for positives and False
2991 - by using 'float' or 'int' columns and specifying a cutoff value and the
2992 columns direction. This will generate the classification on the fly
2994 * if ``class_dir``/``score_dir=='-'``: values in the classification column that are less than or equal to *class_cutoff*/*score_cutoff* will be counted as positives
2995 * if ``class_dir``/``score_dir=='+'``: values in the classification column that are larger than or equal to *class_cutoff*/*score_cutoff* will be counted as positives
2997 The two possibilities can be used together, i.e. 'bool' type for one column
2998 and 'float'/'int' type and cutoff/direction for the other column.
3000 ALLOWED_DIR = [
'+',
'-']
3002 score_idx = self.
GetColIndexGetColIndex(score_col)
3003 score_type = self.
col_typescol_types[score_idx]
3004 if score_type!=
'int' and score_type!=
'float' and score_type!=
'bool':
3005 raise TypeError(
"Score column must be numeric or bool type")
3007 class_idx = self.
GetColIndexGetColIndex(class_col)
3008 class_type = self.
col_typescol_types[class_idx]
3009 if class_type!=
'int' and class_type!=
'float' and class_type!=
'bool':
3010 raise TypeError(
"Classifier column must be numeric or bool type")
3012 if (score_dir
not in ALLOWED_DIR)
or (class_dir
not in ALLOWED_DIR):
3013 raise ValueError(
"Direction must be one of %s"%str(ALLOWED_DIR))
3020 for i,row
in enumerate(self.
rowsrows):
3021 class_val = row[class_idx]
3022 score_val = row[score_idx]
3024 if (class_type==
'bool' and class_val==
True)
or (class_type!=
'bool' and ((class_dir==
'-' and class_val<=class_cutoff)
or (class_dir==
'+' and class_val>=class_cutoff))):
3025 if (score_type==
'bool' and score_val==
True)
or (score_type!=
'bool' and ((score_dir==
'-' and score_val<=score_cutoff)
or (score_dir==
'+' and score_val>=score_cutoff))):
3030 if (score_type==
'bool' and score_val==
False)
or (score_type!=
'bool' and ((score_dir==
'-' and score_val>score_cutoff)
or (score_dir==
'+' and score_val<score_cutoff))):
3038 msg =
'factor (tp + fn) is zero'
3040 msg =
'factor (tp + fp) is zero'
3042 msg =
'factor (tn + fn) is zero'
3044 msg =
'factor (tn + fp) is zero'
3047 LogWarning(
"Could not compute MCC: MCC is not defined since %s"%msg)
3049 mcc = ((tp*tn)-(fp*fn)) / math.sqrt((tp+fn)*(tp+fp)*(tn+fn)*(tn+fp))
3053 def IsEmpty(self, col_name=None, ignore_nan=True):
3055 Checks if a table is empty.
3057 If no column name is specified, the whole table is checked for being empty,
3058 whereas if a column name is specified, only this column is checked.
3060 By default, all NAN (or None) values are ignored, and thus, a table
3061 containing only NAN values is considered as empty. By specifying the
3062 option ignore_nan=False, NAN values are counted as 'normal' values.
3068 raise ValueError(
'Table has no column named "%s"' % col_name)
3073 if self.
CountCount(col_name, ignore_nan=ignore_nan)==0:
3080 for row
in self.
rowsrows:
3092 Append each row of *tab* to the current table. The data is appended based
3093 on the column names, thus the order of the table columns is *not* relevant,
3094 only the header names.
3096 If there is a column in *tab* that is not present in the current table,
3097 it is added to the current table and filled with *None* for all the rows
3098 present in the current table.
3100 If the type of any column in *tab* is not the same as in the current table
3101 a *TypeError* is raised.
3103 If *overwrite* is not None and set to an existing column name, the specified
3104 column in the table is searched for the first occurrence of a value matching
3105 the value of the column with the same name in the dictionary. If a matching
3106 value is found, the row is overwritten with the dictionary. If no matching
3107 row is found, a new row is appended to the table.
3110 for name,typ
in zip(tab.col_names, tab.col_types):
3112 self.
AddColAddCol(name, typ)
3116 if name
in tab.col_names:
3118 new_type = tab.col_types[tab.GetColIndex(name)]
3119 if curr_type!=new_type:
3120 raise TypeError(
'cannot extend table, column %s in new '%name +\
3121 'table different type (%s) than in '%new_type +\
3122 'current table (%s)'%curr_type)
3124 num_rows = len(tab.rows)
3125 for i
in range(0,num_rows):
3127 data = dict(list(zip(tab.col_names,row)))
3128 self.
AddRowAddRow(data, overwrite)
3131 def Merge(table1, table2, by, only_matching=False):
3133 Returns a new table containing the data from both tables. The rows are
3134 combined based on the common values in the column(s) by. The option 'by' can
3135 be a list of column names. When this is the case, merging is based on
3137 For example, the two tables below
3155 when merged by column x, produce the following output:
3168 def _key(row, indices):
3169 return tuple([row[i]
for i
in indices])
3170 def _keep(indices, cn, ct, ni):
3171 ncn, nct, nni=([],[],[])
3172 for i
in range(len(cn)):
3173 if i
not in indices:
3177 return ncn, nct, nni
3178 col_names=list(table2.col_names)
3179 col_types=list(table2.col_types)
3180 new_index=[i
for i
in range(len(col_names))]
3181 if isinstance(by, str):
3182 common2_indices=[col_names.index(by)]
3184 common2_indices=[col_names.index(b)
for b
in by]
3185 col_names, col_types, new_index=_keep(common2_indices, col_names,
3186 col_types, new_index)
3188 for i, name
in enumerate(col_names):
3191 while try_name
in table1.col_names:
3193 try_name=
'%s_%d' % (name, counter)
3194 col_names[i]=try_name
3196 if isinstance(by, str):
3197 common1_indices=[table1.col_names.index(by)]
3199 common1_indices=[table1.col_names.index(b)
for b
in by]
3200 for row
in table1.rows:
3201 key=_key(row, common1_indices)
3203 raise ValueError(
'duplicate key "%s in first table"' % (str(key)))
3206 for row
in table2.rows:
3207 key=_key(row, common2_indices)
3209 raise ValueError(
'duplicate key "%s" in second table' % (str(key)))
3211 new_tab=
Table(table1.col_names+col_names, table1.col_types+col_types)
3212 for k, v
in common1.items():
3213 row=v+[
None for i
in range(len(table2.col_names)-len(common2_indices))]
3218 for i, index
in enumerate(new_index):
3219 row[len(table1.col_names)+i]=row2[index]
3220 if only_matching
and not matched:
3225 for k, v
in common2.items():
3226 if not k
in common1:
3227 v2=[v[i]
for i
in new_index]
3228 row=[
None for i
in range(len(table1.col_names))]+v2
3229 for common1_index, common2_index
in zip(common1_indices, common2_indices):
3230 row[common1_index]=v[common2_index]
def __init__(self, op, lhs, rhs)
def __init__(self, table, col)
def __getitem__(self, index)
def __setitem__(self, index, value)
def Percentiles(self, col, nths)
def _SaveOST(self, stream)
def ComputeEnrichment(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0)
def Sort(self, by, order='+')
def GetColIndex(self, col)
def _ParseColTypes(types, exp_num=None)
def SpearmanCorrel(self, col1, col2)
def GetNumpyMatrixAsArray(self, *args)
def Extend(self, tab, overwrite=None)
def PlotBar(self, cols=None, rows=None, xlabels=None, set_xlabels=True, xlabels_rotation='horizontal', y_title=None, title=None, colors=None, width=0.8, bottom=0, legend=False, legend_names=None, show=False, save=False)
def __init__(self, col_names=[], col_types=None, **kwargs)
def PlotHexbin(self, x, y, title=None, x_title=None, y_title=None, x_range=None, y_range=None, binning='log', colormap='jet', show_scalebar=False, scalebar_label=None, clear=True, save=False, show=False)
def GaussianSmooth(self, col, std=1.0, na_value=0.0, padding='reflect', c=0.0)
def __getattr__(self, col_name)
def _SaveHTML(self, stream_or_filename)
def __setitem__(self, k, value)
def PlotROC(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0, style='-', title=None, x_title=None, y_title=None, clear=True, save=None)
def Filter(self, *args, **kwargs)
def AddCol(self, col_name, col_type, data=None)
def Count(self, col, ignore_nan=True)
def Load(stream_or_filename, format='auto', sep=',')
def AddRow(self, data, overwrite=None)
def ToString(self, float_format='%.3f', int_format='%d', rows=None)
def _SaveContext(self, stream_or_filename)
def PlotLogROC(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0, style='-', title=None, x_title=None, y_title=None, clear=True, save=None)
def Correl(self, col1, col2)
def PlotEnrichment(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0, style='-', title=None, x_title=None, y_title=None, clear=True, save=None)
def ComputeEnrichmentAUC(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0)
def _AddRowsFromDict(self, d, overwrite=None)
def Plot(self, x, y=None, z=None, style='.', x_title=None, y_title=None, z_title=None, x_range=None, y_range=None, z_range=None, color=None, plot_if=None, legend=None, num_z_levels=10, z_contour=True, z_interpol='nn', diag_line=False, labels=None, max_num_labels=None, title=None, clear=True, save=False, **kwargs)
def Save(self, stream_or_filename, format='ost', sep=',')
def GetUnique(self, col, ignore_nan=True)
def ComputeLogROCAUC(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0)
def _SavePickle(self, stream)
def IsEmpty(self, col_name=None, ignore_nan=True)
def RenameCol(self, old_name, new_name)
def ComputeROC(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0)
def _Coerce(self, value, ty)
def GetNumpyMatrix(self, *args)
def _SaveCSV(self, stream, sep)
def ComputeROCAUC(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0)
def GetOptimalPrefactors(self, ref_col, *args, **kwargs)
def PairedTTest(self, col_a, col_b)
def PlotHistogram(self, col, x_range=None, num_bins=10, normed=False, histtype='stepfilled', align='mid', x_title=None, y_title=None, title=None, clear=True, save=False, color=None, y_range=None)
def SearchColNames(self, regex)
def ComputeMCC(self, score_col, class_col, score_dir='-', class_dir='-', score_cutoff=2.0, class_cutoff=2.0)
def RowMean(self, mean_col_name, cols)
def __setitem__(self, col_name, val)
def __getattr__(self, col_name)
def __setattr__(self, col_name, val)
def __init__(self, row_data, tab)
def __getitem__(self, col_name)
def GuessColumnType(iterator)
def Merge(table1, table2, by, only_matching=False)