"""
Data table storage.
Does not implement any indexing or iterator interface (it's delegated to `IDataTable`).
Should not be accessed directly by users of `DataTable`.
"""
from builtins import range
from ..utils.py3 import textstring
from . import indexing
from . import column
from .datatable_utils import get_shape, as_array
import numpy as np
import pandas as pd
[docs]class IDataTableStorage(object):
def __init__(self):
object.__init__(self)
## Shape ##
@property # property for compatibility with np.shape
def shape(self):
raise NotImplementedError("IDataTableStorage.shape")
@property # property for compatibility with np.ndim
def ndim(self):
return 2
## Casting to NumPy array ##
[docs] def as_array(self, force_copy=False):
"""
Turn the storage into a numpy array.
If ``force_copy==True``, ensure that the result is a copy of the data.
"""
if self.shape[1]==0:
return np.zeros((0,0))
if force_copy:
return self.get_item(slice(None)).copy()
else:
return self.get_item(slice(None))
def __array__(self): # property for compatibility with np.ufuncs
return self.as_array()
[docs] def as_pandas(self, force_copy=False):
"""
Turn the storage into a pandas DataFrame.
If ``force_copy==True``, ensure that the result is a copy of the data.
"""
return pd.DataFrame(self.as_array(force_copy=False),columns=self.get_column_names(),copy=force_copy)
## Indexing ##
# numpy-like; accept 1D or 2D numpy style index
[docs] def get_item(self, idx): # ; return np.array, unless single element is accessed
"""Return the data at the index `idx` (1D or 2D) as a numpy array."""
raise NotImplementedError("IDataTableStorage.get_item")
[docs] def set_item(self, idx, val): # accept 1D or 2D index numpy-style
"""Return the data at the index `idx` (1D or 2D) to `val`."""
raise NotImplementedError("IDataTableStorage.set_item")
# column-wise; accept 1D index suitable for columns
[docs] def get_columns(self, idx): # return column object or list of column objects
"""Return a column or a list of columns at the index `idx` (1D)."""
raise NotImplementedError("IDataTableStorage.get_columns")
[docs] def get_single_column(self, idx): # same as get_columns, but only accept single number as an index
"""
Return a single column at the index `idx` (1D).
Same as :meth:`get_columns`, but only accepts single column index.
"""
return self.get_columns(idx)
[docs] def set_columns(self, idx, val): # accepts column object (or list of column objects) or iterable (or 2D iterable)
"""Set a column or a list of columns at the index `idx` (1D) to `val`."""
raise NotImplementedError("IDataTableStorage.set_columns")
[docs] def add_columns(self, idx, val, names, transposed, force_copy=False): # accepts column object (or list of column objects) or iterable (or 2D iterable);
"""
Add new columns at index `idx` (1D).
Columns data is given by `val` and their names are given by `names` (a string for a single column, or a list of strings for multiple columns).
If ``transposed==True``, `val` is assumed to be arranged column-wise (list of columns).
If ``transposed==False``, `val` is assumed to be arranged row-wise (list of rows).
If ``transposed=="auto"``, it is assumed to be ``True`` if `val` is a 2D numpy array, and ``False`` otherwise.
If ``force_copy==True``, make sure that `val` data is copied.
"""
#if transposed="auto" and argument is 2D numpy array, effectively transposes it to enforce standard notation (column is second idx)
raise NotImplementedError("IDataTableStorage.add_columns")
[docs] def del_columns(self, idx):
"""Delete a column or a list of columns at the index `idx` (1D)"""
raise NotImplementedError("IDataTableStorage.del_columns")
# row-wise; accept 1D index suitable for rows
[docs] def get_rows(self, idx): # return each row as tuple (i.e., return tuple or list of tuples) (for numpy arrays use _get_item)
"""
Return a row or a list of rows at the index `idx` (1D).
Each row is represented as a tuple.
"""
raise NotImplementedError("IDataTableStorage.get_rows")
[docs] def get_single_row(self, idx): # same as get_rows, but only accept single number as an index
"""
Return a single row at the index `idx` (1D) as a tuple.
Same as :meth:`get_rows`, but only accepts single column index.
"""
return self.get_rows(idx)
[docs] def get_single_row_item(self, idx): # same as get_item, but only accept single number as an (row) index
"""
Return a single row at the index `idx` (1D) as a numpy array.
Same as :meth:`get_item`, but only accepts single column index.
"""
return self.get_item(idx)
[docs] def set_rows(self, idx, val): # accepts iterable (or 2D iterable)
"""
Set a row or a list of rows at the index `idx` (1D) to `val`.
"""
self.set_item((idx,slice(None)),val)
[docs] def add_rows(self, idx, val): # accepts iterable (or 2D iterable)
"""
Add new rows at index `idx` (1D).
"""
raise NotImplementedError("IDataTableStorage.add_rows")
[docs] def del_rows(self, idx):
"""Delete a row or a list of rows at the index `idx` (1D)"""
raise NotImplementedError("IDataTableStorage.del_rows")
# table-wise
[docs] def get_subtable(self, idx):
"""Return the data at the index `idx` (1D or 2D) as an `IDataTableStorage` object of the same type."""
raise NotImplementedError("IDataTableStorage.get_subtable")
[docs] def expand(self, length):
"""
Expand the table by `length`. Usually fill with zeros, unless the column values can be auto-predicted.
"""
raise NotImplementedError("IDataTableStorage.expand")
## Columns indexing ##
[docs] def get_column_names(self, idx=None):
"""Return the list of column names at index `idx` (by default, all of the names)."""
raise NotImplementedError("IDataTableStorage.get_column_names")
[docs] def get_column_indices(self, idx=None):
"""Return the list of column numerical indices corresponding to the index `idx`."""
raise NotImplementedError("IDataTableStorage.get_column_indices")
[docs] def set_column_names(self, new_names):
"""Set new column names."""
raise NotImplementedError("IDataTableStorage.change_column_name")
[docs] def swap_columns(self, idx1, idx2):
"""Swap two columns at indices `idx1` and `idx2`."""
raise NotImplementedError("IDataTableStorage.swap_columns")
## Copying ##
[docs] def copy(self):
raise NotImplementedError("IDataTableStorage.copy")
## Repr ##
def __str__(self):
return self.as_array().__str__()
def __repr__(self):
s=str(self).replace("\n ","\n"+" "*6)
return "{0}(columns={1},\ndata={2})".format(type(self).__name__,str(self.get_column_names()),s)
[docs]class ColumnDataTableStorage(IDataTableStorage):
"""
Table storage which stores the data as a list of columns (defined in :mod:`.datatable.column`).
More flexible compared to the :class:`ArrayDataTableStorage`, but potentially slower.
Args:
columns: table data; can be a numpy array, a list of columns, or a 2D list
names(list): list of column names; by default, the column names are autogenerated: ``"col00"``, ``"col01"``, etc.
transposed: if ``True``, the `columns` arguments is assumed to be column-wise (list of columns)
if ``False``, the `columns` arguments is assumed to be row-wise (list of rows)
if ``"auto"``, assumed to be ``False`` for numpy arrays and ``True`` otherwise
force_copy (bool): if ``True``, make sure that the supplied data is copied
"""
def __init__(self, columns=None, names=None, transposed="auto", force_copy=True):
IDataTableStorage.__init__(self)
self._columns=[]
self._column_names=[]
self._column_uid=0
if columns is not None:
self.add_columns(0,columns,names,transposed=transposed,force_copy=force_copy)
## Shape ##
@property
def shape(self):
ncols=len(self._columns)
if ncols==0:
return (0,0)
else:
return (len(self._columns[0]),ncols)
def _added_shape_valid(self, shape, direction="column"):
"""
Check if adding data doesn't violate shape rectangleness.
direction can be "column" or "row".
"""
if self.shape[1]==0:
return True
else:
if direction=="column":
return self.shape[0]==shape[0]
else:
return self.shape[1]==shape[1]
[docs] def as_pandas(self, force_copy=False):
"""
Turn the storage into a pandas DataFrame.
If ``force_copy==True``, ensure that the result is a copy of the data.
"""
columns=[]
for c in self._columns:
if isinstance(c,column.ListDataColumn):
c=c._column
else:
c=c.as_array(force_copy=False)
columns.append(c)
columns=dict(zip(self.get_column_names(),columns))
return pd.DataFrame(columns,columns=self.get_column_names(),copy=force_copy)
## Columns indexing ##
[docs] def get_column_names(self, idx=None):
"""Return the list of column names."""
if idx is None:
return self._column_names
else:
ndim,idx=indexing.to_list_idx_noslice(idx,self._column_names).tup()
if ndim==0:
return self._column_names[idx]
else:
return [self._column_names[i] for i in idx]
[docs] def get_column_indices(self, idx=None):
"""Return the list of column numerical indices corresponding to the index `idx`."""
if idx is None:
return list(range(len(self._columns)))
else:
return indexing.to_list_idx_noslice(idx,self._column_names).idx
[docs] def set_column_names(self, new_names):
"""Set new column names."""
if len(new_names)!=len(self._columns):
raise ValueError("wrong number of column names: expected {0}, got {1}".format(len(self._columns),len(new_names)))
self._check_name_clashes(new_names,adding=False)
self._column_names=new_names
[docs] def swap_columns(self, idx1, idx2):
"""Swap two columns at indices `idx1` and `idx2`."""
ndim1,idx1=indexing.to_list_idx_noslice(idx1,self.get_column_names()).tup()
ndim2,idx2=indexing.to_list_idx_noslice(idx2,self.get_column_names()).tup()
if ndim1!=0 or ndim2!=0:
raise ValueError("Can only swap one pair at a time")
self._column_names[idx1],self._column_names[idx2]=self._column_names[idx2],self._column_names[idx1]
self._columns[idx1],self._columns[idx2]=self._columns[idx2],self._columns[idx1]
def _check_name_clashes(self, names, adding=True):
if isinstance(names,textstring):
names=[names]
for i,n in enumerate(names): # check collisions in the supplied array
if n in names[i+1:]:
raise KeyError("duplicate column name: {0}".format(n))
if adding: # check collisions with the current columns
for n in names:
try:
indexing.string_list_idx(n,self._column_names,only_exact=True)
except KeyError:
continue
raise KeyError("duplicate column name: {0}".format(n))
def _gen_unique_name(self):
while True:
name="col{:02}".format(self._column_uid)
self._column_uid=self._column_uid+1
try:
self._check_name_clashes(name,adding=True)
return name
except KeyError:
pass
## Copying ##
[docs] def copy(self):
return ColumnDataTableStorage([c.copy() for c in self._columns],list(self._column_names))
## Column modification ##
def _set_item_column(self, c_idx, r_idx, val):
self._columns[c_idx]=self._columns[c_idx]._set_item(r_idx,val)
def _add_item_column(self, c_idx, r_idx, val):
self._columns[c_idx]=self._columns[c_idx]._add_item(r_idx,val)
## Indexing ##
## numpy-like return type ##
[docs] def get_item(self, idx):
"""Return the data at the index `idx` (1D or 2D) as a numpy array."""
if not self._columns:
raise IndexError("no columns in the table")
r_idx,c_idx=indexing.to_double_index(idx,self.get_column_names())
c_ndim,c_idx=c_idx.tup()
if c_ndim==0:
return self._columns[c_idx][r_idx.idx]
else:
if r_idx.ndim==0:
return np.array([self._columns[c][r_idx] for c in c_idx])
else:
return np.column_stack([self._columns[c][r_idx] for c in c_idx])
[docs] def set_item(self, idx, val):
"""Return the data at the index `idx` (1D or 2D) to `val`."""
if not self._columns:
raise IndexError("no columns in the table")
r_idx,c_idx=indexing.to_double_index(idx,self.get_column_names())
c_ndim,c_idx=c_idx.tup()
v_shape=get_shape(val)
v_ndim=len(v_shape)
if v_ndim==0:
if c_ndim==0:
self._set_item_column(c_idx,r_idx,val)
else:
for c in c_idx:
self._set_item_column(c,r_idx,val)
elif v_ndim==1:
if c_ndim==0:
if r_idx.ndim==0:
raise ValueError("can't assign array to element")
else:
self._set_item_column(c_idx,r_idx,val)
else:
if v_shape[0]!=len(c_idx):
raise ValueError("wrong dimension of assigned value")
for i,c in enumerate(c_idx):
self._set_item_column(c,r_idx,val[i])
elif v_ndim==2:
if c_ndim*r_idx.ndim!=1:
raise ValueError("can't assign array to element")
else:
if isinstance(val,list):
for i,c in enumerate(c_idx):
self._set_item_column(c,r_idx, [val[r][i] for r in range(len(val))] )
else:
for i,c in enumerate(c_idx):
self._set_item_column(c,r_idx,val[:,i])
else:
raise ValueError("can't assign multidimensional arrays with d>2")
## column-wise ##
[docs] def get_columns(self, idx): # return column object or list of column objects
"""Return a column or a list of columns at the index `idx` (1D)."""
ndim,idx=indexing.to_list_idx_noslice(idx,self.get_column_names()).tup()
if ndim==0:
return self._columns[idx]
else:
return [self._columns[i] for i in idx]
[docs] def get_single_column(self, idx):
"""
Return a single column at the index `idx` (1D).
Same as :meth:`get_columns`, but only accepts single column index.
"""
return self._columns[idx]
[docs] def set_columns(self, idx, val, force_copy=False): # accepts column object (or list of column objects) or iterable (or 2D iterable)
"""Set a column or a list of columns at the index `idx` (1D) to `val`."""
c_ndim,idx=indexing.to_list_idx_noslice(idx,self.get_column_names()).tup()
v_shape=get_shape(val)
v_ndim=len(v_shape)
if v_ndim==0:
if c_ndim==0:
self._columns[idx][:]=val
else:
for c in idx:
self._columns[c][:]=val
elif v_ndim==1:
if not self._added_shape_valid((v_shape[0],1)):
raise ValueError("wrong length for the new columns")
if c_ndim==0:
self._columns[idx]=column.as_column(val,False,force_copy=force_copy)
else:
val=column.as_column(val,False,force_copy=force_copy)
if len(idx)>0:
self._columns[idx[0]]=val
for c in idx[:-1]:
self._columns[c]=val.copy()
elif v_ndim==2:
if not self._added_shape_valid(v_shape[::-1]):
raise ValueError("wrong length for the new columns")
if c_ndim==0:
raise ValueError("can't assign array to element")
else:
for i,c in enumerate(idx):
self._columns[c]=column.as_column(val[i],False,force_copy=force_copy)
else:
raise ValueError("can't assign multidimensional arrays with d>2")
[docs] def add_columns(self, idx, val, names, transposed="auto", force_copy=False): # accepts column object (or list of column objects) or iterable (or 2D iterable)
"""
Add new columns at index `idx` (1D).
Columns data is given by `val` and their names are given by `names` (a string for a single column, or a list of strings for multiple columns).
If ``transposed==True``, `val` is assumed to be arranged column-wise (list of columns).
If ``transposed==False``, `val` is assumed to be arranged row-wise (list of rows).
If ``transposed=="auto"``, it is assumed to be ``False`` if `val` is a 2D numpy array, and ``True`` otherwise.
If ``force_copy==True``, make sure that `val` data is copied.
"""
c_ndim,idx=indexing.to_list_idx_noslice(idx,self.get_column_names()).tup()
if c_ndim==1:
raise ValueError("can only insert items in a single location")
if isinstance(names,textstring) or names is None:
names=[names]
v_shape=get_shape(val)
v_ndim=len(v_shape)
if v_ndim==0:
if len(self._columns)==0:
raise ValueError("can't add number to an empty table")
else:
try:
val+0
val=[column.LinearDataColumn(self.shape[0],val,0) for _ in names] # duplicate for several new columns
except TypeError: #non-numeric val; create list column instead
val=[column.ListDataColumn([val]*self.shape[0]) for _ in names]
v_shape=(len(val),self.shape[0])
elif v_ndim==1:
if (transposed=="auto" and len(self._columns)==0 and len(names)==len(val)) or (transposed==True): # row is supplied
val=[column.as_column([v],False,force_copy=force_copy) for v in val]
v_shape=(v_shape[0],1)
else:
val=[column.as_column(val,False,force_copy=force_copy) for _ in names] # duplicate for several new columns
v_shape=(len(names),v_shape[0])
elif v_ndim==2:
if transposed=="auto":
transposed=(isinstance(val,list) or isinstance(val,tuple))
if transposed:
val=[column.as_column(c,False,force_copy=force_copy) for c in val]
else:
v_shape=(v_shape[::-1])
if isinstance(val,np.ndarray):
val=[column.as_column(val[:,c],False,force_copy=force_copy) for c in range(v_shape[0])]
else:
val=[column.as_column( [val[r][c] for r in range(v_shape[1])] ,False,force_copy=force_copy) for c in range(v_shape[0])]
else:
raise ValueError("can't assign multidimensional arrays with d>2")
if not self._added_shape_valid(v_shape[::-1]):
raise ValueError("invalid shape of added columns")
if names==[None]:
names=[self._gen_unique_name() for _ in range(len(val))]
if len(names)!=len(val):
raise ValueError("invalid column names number: expected {0}, got {1}".format(len(val),len(names)))
self._check_name_clashes(names,adding=True)
self._columns[idx:idx]=val
self._column_names[idx:idx]=names
[docs] def del_columns(self, idx):
"""Delete a column or a list of columns at the index `idx` (1D)"""
ndim,idx=indexing.to_list_idx_noslice(idx,self.get_column_names()).tup()
if ndim==0:
idx=[idx]
cols=[c for c in range(len(self._columns)) if not (c in idx)]
self._columns=[self._columns[c] for c in cols]
self._column_names=[self._column_names[c] for c in cols]
## row-wise ##
[docs] def get_rows(self, idx): # return each row as tuple (i.e., return tuple or list of tuples) (for numpy arrays use _get_item)
"""
Return a row or a list of rows at the index `idx` (1D).
Each row is represented as a tuple.
"""
if not self._columns:
raise IndexError("no columns in the table")
r_idx=indexing.to_numpy_idx(idx)
if r_idx.ndim==0:
return tuple([c[r_idx] for c in self._columns])
else:
cols=[c[r_idx] for c in self._columns]
return zip(*cols)
[docs] def get_single_row(self, idx): # same as get_rows, but only accept single number as an index
"""
Return a single row at the index `idx` (1D) as a tuple.
Same as :meth:`get_rows`, but only accepts single column index.
"""
if not self._columns:
raise IndexError("no columns in the table")
return tuple([c._get_single_item(idx) for c in self._columns])
[docs] def get_single_row_item(self, idx): # same as get_item, but only accept single number as an (row) index
"""
Return a single row at the index `idx` (1D) as a numpy array.
Same as :meth:`get_item`, but only accepts single column index.
"""
if not self._columns:
raise IndexError("no columns in the table")
return as_array([c._get_single_item(idx) for c in self._columns])
[docs] def add_rows(self, idx, val): # accepts iterable (or 2D iterable)
"""
Add new rows at index `idx` (1D).
"""
if not self._columns:
raise IndexError("no columns in the table")
r_idx=indexing.to_numpy_idx(idx)
if r_idx.ndim==1:
raise ValueError("can only insert items in a single location")
v_shape=get_shape(val)
v_ndim=len(v_shape)
if v_ndim==0:
val=[[val]*len(self._columns)]
v_shape=(1,len(val))
elif v_ndim==1:
val=[val]
v_shape=(1,v_shape[0])
elif v_ndim!=2:
raise ValueError("can't assign multidimensional arrays with d>2")
if not self._added_shape_valid(v_shape,"row"):
raise ValueError("invalid shape of added rows")
if isinstance(val,list):
for c in range(len(self._columns)):
self._add_item_column(c,r_idx,[val[r][c] for r in range(len(val))] )
else:
for c in range(len(self._columns)):
self._add_item_column(c,r_idx,val[:,c])
[docs] def del_rows(self, idx):
"""Delete a row or a list of rows at the index `idx` (1D)"""
if not self._columns:
raise IndexError("no columns in the table")
r_idx=indexing.to_numpy_idx(idx)
self._columns=[c._del_item(r_idx) for c in self._columns]
# table-wise
[docs] def get_subtable(self, idx, force_copy=False):
"""Return the data at the index `idx` (1D or 2D) as an `IDataTableStorage` object of the same type."""
if not self._columns:
raise IndexError("no columns in the table")
r_idx,c_idx=indexing.to_double_index(idx,self.get_column_names())
c_ndim,c_idx=c_idx.tup()
if c_ndim==0:
c_idx=[c_idx]
new_column_names=[self._column_names[c] for c in c_idx]
new_columns=[self._columns[c].subcolumn(r_idx,force_copy=force_copy) for c in c_idx]
return ColumnDataTableStorage(new_columns, new_column_names)
[docs] def expand(self, length):
"""
Expand the table. Usually fill with zeros, unless the column values can be auto-predicted.
"""
self._columns=[c.expand(length) for c in self._columns]
## Repr ##
def __str__(self):
try:
return self.as_array().__str__()
except ValueError: # complicated shape
return str([ list(self.get_rows(r)) for r in range(self.shape[0]) ])
[docs]class ArrayDataTableStorage(IDataTableStorage):
"""
Table storage which stores the data as a 2D numpy array.
Faster, but less flexible than :class:`ColumnDataTableStorage`.
Indexing is only numpy-style or column-wise.
All columns have the same datatype and are stored in the same array.
All columns and rows are returned as numpy arrays.
Args:
columns: table data; can be a numpy array, a list of columns, or a 2D list
names(list): list of column names; by default, the column names are autogenerated: ``"col00"``, ``"col01"``, etc.
transposed: if ``True``, the `columns` arguments is assumed to be column-wise (list of columns)
if ``False``, the `columns` arguments is assumed to be row-wise (list of rows)
if ``"auto"``, assumed to be ``False`` for numpy arrays and ``True`` otherwise
force_copy (bool): if ``True``, make sure that the supplied data is copied
"""
def __init__(self, columns=None, names=None, transposed="auto", force_copy=True):
IDataTableStorage.__init__(self)
self._data=None
self._column_names=[]
self._column_uid=0
if columns is not None:
self.add_columns(0,columns,names,transposed=transposed,force_copy=force_copy)
## Shape ##
@property
def shape(self):
if self._data is None:
return (0,0)
else:
return self._data.shape
def _added_shape_valid(self, shape, direction="column"):
"""
Check if adding data doesn't violate shape rectangleness.
direction can be "column" or "row".
"""
if self.shape[0]==0:
return True
else:
if direction=="column":
return self.shape[0]==shape[0]
else:
return self.shape[0]==shape[1]
## Columns indexing ##
[docs] def get_column_names(self, idx=None):
"""Return the list of column names."""
if idx is None:
return self._column_names
else:
ndim,idx=indexing.to_list_idx_noslice(idx,self._column_names).tup()
if ndim==0:
return self._column_names[idx]
else:
return [self._column_names[i] for i in idx]
[docs] def get_column_indices(self, idx=None):
"""Return the list of column numerical indices corresponding to the index `idx`."""
if idx is None:
return list(range(len(self._column_names)))
else:
return indexing.to_list_idx_noslice(idx,self._column_names).idx
[docs] def set_column_names(self, new_names):
"""Set new column names."""
if len(new_names)!=self.shape[1]:
raise ValueError("wrong number of column names")
self._check_name_clashes(new_names)
self._column_names=new_names
[docs] def swap_columns(self, idx1, idx2):
"""Swap two columns at indices `idx1` and `idx2`."""
ndim1,idx1=indexing.to_list_idx_noslice(idx1,self.get_column_names()).tup()
ndim2,idx2=indexing.to_list_idx_noslice(idx2,self.get_column_names()).tup()
if ndim1!=0 or ndim2!=0:
raise ValueError("Can only swap one pair at a time")
self._column_names[idx1],self._column_names[idx2]=self._column_names[idx2],self._column_names[idx1]
self._data[:,idx1],self._data[:,idx2]=self._data[:,idx2].copy(),self._data[:,idx1].copy()
def _check_name_clashes(self, names, adding=True):
if isinstance(names,textstring):
names=[names]
for i,n in enumerate(names): # check collisions in the supplied array
if n in names[i+1:]:
raise KeyError("duplicate column name: {0}".format(n))
if adding: # check collisions with the current columns
for n in names:
try:
indexing.string_list_idx(n,self._column_names,only_exact=True)
except KeyError:
continue
raise KeyError("duplicate column name: {0}".format(n))
def _gen_unique_name(self):
while True:
name="col{:02}".format(self._column_uid)
self._column_uid=self._column_uid+1
try:
self._check_name_clashes(name)
return name
except KeyError:
pass
## Copying ##
[docs] def copy(self):
return ArrayDataTableStorage(self._data.copy(),list(self._column_names))
## Type expanding ##
def _expand_type(self, val):
"""
Expand type of stored column to accommodate value (int < float < complex).
"""
if val.dtype>self._data.dtype:
if not val.dtype in ["int","float","complex"]:
raise ValueError("don't support numpy arrays other than int, float or complex")
self._data=self._data.astype(val.dtype)
## Indexing ##
def _to_column_index(self, idx):
if isinstance(idx,textstring) or (isinstance(idx,list) and isinstance(idx[0],textstring)):
return indexing.string_list_idx(idx,self.get_column_names())
if isinstance(idx,slice) and (isinstance(idx.start,textstring) or isinstance(idx.stop,textstring)):
start_stop=indexing.string_list_idx([idx.start,idx.stop],self.get_column_names())
return slice(start_stop[0],start_stop[1],idx.step)
return None
## numpy like ##
[docs] def get_item(self, idx):
"""Return the data at the index `idx` (1D or 2D) as a numpy array."""
column_idx=self._to_column_index(idx)
if column_idx is None:
return self._data[idx]
else:
return self._data[:,column_idx]
[docs] def set_item(self, idx, val):
"""Return the data at the index `idx` (1D or 2D) to `val`."""
val=as_array(val)
self._expand_type(val)
column_idx=self._to_column_index(idx)
if column_idx is None:
self._data[idx]=val
else:
self._data[:,column_idx]=val
## column-wise ##
[docs] def get_columns(self, idx):
"""Return a column or a list of columns at the index `idx` (1D)."""
column_idx=self._to_column_index(idx)
if column_idx is not None:
idx=column_idx
res=self._data[:,idx]
if res.ndim==1:
return column.ArrayDataColumn(res)
else:
return [ column.ArrayDataColumn(res[:,i]) for i in range(res.shape[1]) ]
[docs] def get_single_column(self, idx):
"""
Return a single column at the index `idx` (1D).
Same as :meth:`get_columns`, but only accepts single column index.
"""
return self._data[:,idx]
[docs] def set_columns(self, idx, val, force_copy=False):
"""Set a column or a list of columns at the index `idx` (1D) to `val`."""
force_copy # unused
val=as_array(val)
self._expand_type(val)
column_idx=self._to_column_index(idx)
if column_idx is None:
self._data[:,idx]=val
else:
self._data[:,column_idx]=val
[docs] def add_columns(self, idx, val, names, transposed="auto", force_copy=False): # accepts column object (or list of column objects) or iterable (or 2D iterable)
"""
Add new columns at index `idx` (1D).
Columns data is given by `val` and their names are given by `names` (a string for a single column, or a list of strings for multiple columns).
If ``transposed==True``, `val` is assumed to be arranged column-wise (list of columns).
If ``transposed==False``, `val` is assumed to be arranged row-wise (list of rows).
If ``transposed=="auto"``, it is assumed to be ``True`` if `val` is a 2D numpy array, and ``False`` otherwise.
If ``force_copy==True``, make sure that `val` data is copied.
"""
if (not np.isscalar(idx)) or isinstance(idx,slice):
raise ValueError("can only insert items in a single location")
column_idx=self._to_column_index(idx)
if column_idx is not None:
idx=column_idx
if isinstance(names,textstring) or names is None:
names=[names]
v_ndim=np.ndim(val)
if v_ndim==0:
if self.shape[1]==0:
raise ValueError("can't add number to an empty table")
else:
val=np.zeros((self.shape[0],len(names)))+val
elif v_ndim==1:
val=np.expand_dims(as_array(val),len(names))
elif v_ndim==2:
if transposed=="auto":
transposed=(isinstance(val,list) or isinstance(val,tuple))
if transposed:
val=as_array(val).transpose()
else:
val=as_array(val)
else:
raise ValueError("can't assign multidimensional arrays with d>2")
if not self._added_shape_valid(val.shape):
raise ValueError("invalid shape of added columns")
if names==[None]:
names=[self._gen_unique_name() for _ in range(val.shape[1])]
if len(names)!=val.shape[1]:
raise ValueError("invalid column names number")
self._check_name_clashes(names,adding=True)
if self._data is None:
if force_copy:
self._data=val.copy()
else:
self._data=val
else:
self._data=np.concatenate((self._data[:,:idx],val,self._data[:,idx:]),axis=1)
self._column_names[idx:idx]=names
[docs] def del_columns(self, idx):
"""Delete a column or a list of columns at the index `idx` (1D)"""
column_idx=self._to_column_index(idx)
if column_idx is not None:
idx=column_idx
if indexing.is_slice(idx):
idx=indexing.to_range(idx,self.shape[1])
if np.isscalar(idx):
idx=[idx]
self._data=np.delete(self._data,idx,axis=1)
self._column_names=[c for i,c in enumerate(self._column_names) if not i in idx]
## row-wise ##
get_rows=get_item
[docs] def get_single_row(self, idx): # same as get_rows, but only accept single number as an index
"""
Return a single row at the index `idx` (1D) as a tuple.
Same as :meth:`get_rows`, but only accepts single column index.
"""
return self._data[idx]
[docs] def get_single_row_item(self, idx): # same as get_item, but only accept single number as an (row) index
"""
Return a single row at the index `idx` (1D) as a numpy array.
Same as :meth:`get_item`, but only accepts single column index.
"""
return self._data[idx]
[docs] def add_rows(self, idx, val): # accepts iterable (or 2D iterable)
"""
Add new rows at index `idx` (1D).
"""
if (not np.isscalar(idx)) or isinstance(idx,slice):
raise ValueError("can only insert items in a single location")
v_ndim=np.ndim(val)
if v_ndim==0:
if self.shape[1]==0:
raise ValueError("can't add number to an empty table")
else:
val=np.zeros((1,self.shape[1]))+val
elif v_ndim==1:
val=np.expand_dims(as_array(val),0)
elif v_ndim==2:
val=as_array(val)
else:
raise ValueError("can't assign multidimensional arrays with d>2")
if self._data is None:
self._data=val
else:
self._data=np.concatenate((self._data[:idx,:],val,self._data[idx:,:]),axis=0)
[docs] def del_rows(self, idx):
"""Delete a row or a list of rows at the index `idx` (1D)"""
self._data=np.delete(self._data,idx,axis=0)
# table-wise
[docs] def get_subtable(self, idx, force_copy=True):
"""Return the data at the index `idx` (1D or 2D) as an `IDataTableStorage` object of the same type."""
force_copy # unused
r_idx,c_idx=indexing.to_double_index(idx,self.get_column_names())
c_ndim,c_idx=c_idx.tup()
r_idx=r_idx.idx
if c_ndim==0:
c_idx=[c_idx]
new_column_names=[self._column_names[c] for c in c_idx]
new_columns=np.column_stack([self._data[:,c][r_idx] for c in c_idx])
return ArrayDataTableStorage(new_columns, new_column_names)
[docs] def expand(self, length):
"""
Expand the table. Usually fill with zeros, unless the column values can be auto-predicted.
"""
if self.shape[1]!=0:
self.add_rows(self.shape[0],np.zeros((length,self.shape[1])))