"""
Single column classes. Used in `ColumnDataTableStorage`.
"""
import numpy as np
from ..utils import numclass, strdump#@UnresolvedImport
from ..utils import iterator as iterator_utils #@UnresolvedImport
from . import indexing
from .datatable_utils import as_array
_depends_local=["..utils.numclass","..utils.strdump"]
##### Data column #####
[docs]class IDataColumn(numclass.NumClass):
## Indexing ##
def _get_item(self, idx):
"""
Return value(s) at `idx` as a number or a numpy array (copying isn't required).
"""
raise NotImplementedError("IDataColumn._get_item")
def _get_single_item(self, idx):
"""
Return value at `idx` as a number; `idx` is a single index.
"""
return self._get_item(idx)
def _set_item(self, idx, val):
"""
Assign value(s) at `idx` to `val`.
`val` can be a single value, a numpy array or another column.
Return a :class:`IDataColumn` object (usually it's self, but can be expanded if the current class doesn't support assigning values).
"""
raise NotImplementedError("IDataColumn._set_item")
def _del_item(self, idx):
"""
Delete value(s) at `idx`.
Return a :class:`IDataColumn` object (usually it's self, but can be expanded if the current class doesn't support deleting values).
"""
raise NotImplementedError("IDataColumn._set_item")
def _add_item(self, idx, val):
"""
Only supports adding at a single specific location.
"""
raise NotImplementedError("IDataColumn._add_item")
[docs] def expand(self, length):
"""
Expand column. Usually fill with zeros, unless the column values can be auto-predicted.
"""
self.insert(None,np.zeros(length))
return self
[docs] def subcolumn(self, idx, force_copy=False):
"""
Return value(s) at `idx` as a column object.
"""
if force_copy:
if indexing.covers_all(idx,self.nrows()):
return self.copy()
else:
return ArrayDataColumn(as_array(self._get_item(idx),force_copy=force_copy))
else:
if indexing.covers_all(idx,self.nrows()):
return self
else:
return ArrayDataColumn(self._get_item(idx))
def __getitem__(self, idx):
return self._get_item(idx)
def __setitem__(self, idx, val):
if self._set_item(idx,val) is not self:
raise ValueError("can't set items while preserving column type")
def __delitem__(self, idx):
if self._del_item(idx) is not self:
raise ValueError("can't delete items while preserving column type")
[docs] def insert(self, idx, val):
"""
Add a value `val` to the column at a position given by `idx`.
"""
if idx is None:
idx=self.nrows()
if self._add_item(idx,val) is not self:
raise ValueError("can't insert items while preserving column type")
[docs] def append(self, val):
"""
Append a value to the column
"""
self.insert(None,val)
## Casting to NumPy array ##
[docs] def as_array(self, force_copy=False):
"""
Turn the column into a numpy array
"""
if force_copy:
return self[:].copy()
else:
return self[:]
## Numpy ufunc support ##
def __array__(self): # property for compatibility with np.ufuncs
return self.as_array()
def __array_wrap__(self, arr, context=None):
return ArrayDataColumn(arr) if np.ndim(arr)==1 else arr
## Copying ##
[docs] def copy(self):
raise NotImplementedError("IDataColumn.copy")
## Shape ##
@property
def shape(self):
raise NotImplementedError("IDataColumn.shape")
[docs] def nrows(self):
return self.shape[0]
@property
def ndim(self):
return 1
def __len__(self):
return self.nrows()
## Iterators ##
def __iter__(self):
return iterator_utils.AccessIterator(self)
## Repr ##
def __str__(self):
return str(self.as_array())
def __repr__(self):
return "{0}({1})".format(type(self).__name__,str(self))
## Arithmetics ##
_numops_impl=numclass.NumClass._numops_all
def _perform_numop(self, x, op_func, _):
return ArrayDataColumn(op_func(self.as_array(),x))
def _perform_numop_comp(self, x, op_func, _): # often used for indexing, so want to return plain numpy array
return op_func(self.as_array(),x)
## External functions adding ##
[docs] @classmethod
def add_array_function(cls, func, alias=None, wrap_into_column=False, as_property=False, doc=None):
"""
Turns a function into a method, which is automatically applied to the array representation.
Arguments:
func (callable): a function which takes the column converted into a numpy array as a first argument, and then the rest if the supplied arguments
alias (str): the method name; by default, it's ``func.__name__``
wrap_into_column (bool): if ``True``, the returned result is wrapped into :class:`ArrayDataColumn`
as_property (bool): if ``True``, the function is added as a property getter instead
doc (str): the method docstring; by default, it's ``func.__doc__``
"""
if alias is None:
alias=func.__name__
if wrap_into_column:
def self_func(self, *args, **vargs):
return ArrayDataColumn(func(self.as_array(force_copy=False),*args,**vargs))
else:
def self_func(self, *args, **vargs):
return func(self.as_array(force_copy=False),*args,**vargs)
if doc is None:
try:
self_func.__doc__=func.__doc__
except AttributeError:
pass
else:
self_func.__doc__=doc
if as_property:
setattr(cls,alias,property(self_func))
else:
setattr(cls,alias,self_func)
IDataColumn.add_array_function(np.argsort,doc="Same as :func:`numpy.argsort`.")
IDataColumn.add_array_function(np.argmin,doc="Same as :func:`numpy.argmin`.")
IDataColumn.add_array_function(np.argmax,doc="Same as :func:`numpy.argmax`.")
IDataColumn.add_array_function(np.min,"min",doc="Same as :func:`numpy.amin`.")
IDataColumn.add_array_function(np.max,"max",doc="Same as :func:`numpy.amax`.")
IDataColumn.add_array_function(np.mean,doc="Same as :func:`numpy.mean`.")
IDataColumn.add_array_function(np.std,doc="Same as :func:`numpy.std`.")
IDataColumn.add_array_function(np.sum,doc="Same as :func:`numpy.sum`.")
IDataColumn.add_array_function(np.nonzero,doc="Same as :func:`numpy.nonzero`.")
IDataColumn.add_array_function(np.unique,doc="Same as :func:`numpy.unique`.")
IDataColumn.add_array_function(np.real,wrap_into_column=True,as_property=True,doc="Same as :func:`numpy.real`.")
IDataColumn.add_array_function(np.imag,wrap_into_column=True,as_property=True,doc="Same as :func:`numpy.imag`.")
IDataColumn.add_array_function(np.conjugate,"conjugate",wrap_into_column=True,doc="Same as :obj:`numpy.conj`.")
[docs]class WrapperDataColumn(IDataColumn):
"""
Wraps potentially mutable column types and proxies all the requests to them.
Used when the underlying column object can change in the runtime (e.g., to accommodate a new element type).
"""
def __init__(self, column):
IDataColumn.__init__(self)
self._column=column
## Indexing ##
def _get_item(self, idx):
return self._column._get_item(idx)
def _get_single_item(self, idx):
return self._column._get_single_item(idx)
def _set_item(self, idx, val):
self._column=self._column._set_item(idx,val)
return self
def _del_item(self, idx):
self._column=self._column._del_item(idx)
return self
def _add_item(self, idx, val):
self._column=self._column._add_item(idx,val)
return self
insert=_add_item
[docs] def expand(self, length):
"""
Expand column. Usually fill with zeros, unless the column values can be auto-predicted.
"""
self._column=self._column.expand(length)
return self
[docs] def subcolumn(self, idx, force_copy=False, wrap=True):
"""
Return value(s) at `idx` as a column object.
"""
if wrap:
if not force_copy and indexing.covers_all(idx,self.nrows()):
return self
else:
return WrapperDataColumn(self._column.subcolumn(idx,force_copy))
else:
self._column.subcolumn(idx,force_copy)
## Casting to NumPy array ##
[docs] def as_array(self, force_copy=False):
"""
Turn the column into a numpy array
"""
return self._column.as_array(force_copy=force_copy)
## Copying ##
[docs] def copy(self):
return WrapperDataColumn(self._column.copy())
## Shape ##
@property
def shape(self):
return self._column.shape
## Arithmetics ##
def _test_op_impl(self, op_sym):
return self._column._test_op_impl(op_sym)
def _perform_numop(self, x, op_func, op_sym):
return WrapperDataColumn(self._column._perform_numop(x,op_func,op_sym))
@property
def real(self):
return self._column.real
@property
def imag(self):
return self._column.imag
[docs] def conjugate(self):
"""Return the complex-conjugate of the column"""
return self._column.conjugate()
[docs]class IStoredDataColumn(IDataColumn):
"""
Abstract class to distinguish data columns with stored data, as opposed to the ones with generated data.
"""
pass
[docs]class ArrayDataColumn(IStoredDataColumn):
"""
Column which stores its data in a numpy array.
Automatically expands the stored data type (int -> float -> complex) if needed.
"""
def __init__(self, column):
IStoredDataColumn.__init__(self)
self._column=np.asarray(column)
if self._column.ndim!=1:
raise ValueError("can't create column from {0}-dimensional object: {1}".format(self._column.ndim,column))
## Type expanding ##
def _expand_type(self, val):
"""
Expand the type of the stored column to accommodate `val` (``int`` < ``float`` < ``complex``).
"""
if val.dtype>self._column.dtype:
if not val.dtype in ["int","float","complex"]:
raise ValueError("ArrayDataColumn doesn't support numpy arrays other than int, float or complex")
self._column=self._column.astype(val.dtype)
## Indexing ##
def _get_item(self, idx):
idx=indexing.to_numpy_idx(idx).idx
return self._column[idx]
def _get_single_item(self, idx):
return self._column[idx]
def _set_item(self, idx, val):
idx=indexing.to_numpy_idx(idx).idx
val=np.asarray(val)
self._expand_type(val)
self._column[idx]=val
return self
def _del_item(self, idx):
idx=indexing.to_list_idx(idx).idx
self._column=np.delete(self._column,idx)
return self
def _add_item(self, idx, val):
ndim,idx=indexing.to_numpy_idx(idx).tup()
if ndim==0:
val=np.asarray(val)
self._expand_type(val)
self._column=np.append(np.append(self._column[:idx],val),self._column[idx:])
else:
raise ValueError("can only insert item in a single location")
return self
## Copying ##
[docs] def copy(self):
return ArrayDataColumn(self._column.copy())
## Shape ##
@property
def shape(self):
return (len(self._column),)
## Arithmetics ##
def _perform_numop_i(self, x, op_func, _):
op_func(self._column,x)
return self
def _dump_array_col(col, dumpf):
return dumpf(col._column)[1]
def _load_array_col(val, loadf):
val=loadf(("np",val))
return ArrayDataColumn(val)
strdump.dumper.add_class(ArrayDataColumn,_dump_array_col,_load_array_col,"column.array",recursive=True)
[docs]class ListDataColumn(IStoredDataColumn):
"""
Column which stores its data in a list.
"""
def __init__(self, column):
IStoredDataColumn.__init__(self)
if isinstance(column,list):
self._column=column
else:
self._column=list(column)
## Indexing ##
def _get_sublist(self, idx):
if indexing.is_slice(idx):
return self._column[idx]
else:
return [self._column[i] for i in idx]
def _get_item(self, idx):
ndim,idx=indexing.to_list_idx(idx).tup()
if ndim==0:
return self._column[idx]
else:
return as_array(self._get_sublist(idx))
def _get_single_item(self, idx):
return self._column[idx]
def _set_item(self, idx, val):
ndim,idx=indexing.to_list_idx(idx).tup()
if np.isscalar(val):
if ndim==0:
self._column[idx]=val
else:
l=len(self._column)
if indexing.covers_all(idx,l):
self._column=[val]*l
else:
for i in indexing.to_range(idx,l):
self._column[i]=val
elif ndim==1:
if indexing.is_slice(idx):
self._column[idx]=val
else:
for i,r in enumerate(idx):
self._column[r]=val[i]
else:
raise ValueError("can't assign the value")
return self
def _del_item(self, idx):
ndim,idx=indexing.to_list_idx(idx).tup()
if ndim==0 or indexing.is_slice(idx):
del self._column[idx]
else:
save=np.ones(self.nrows())
for i in idx:
save[i]=False
self._column=[self._column[i] for i,s in enumerate(save) if s]
return self
def _add_item(self, idx, val):
ndim,idx=indexing.to_list_idx(idx).tup()
if np.isscalar(val):
val=[val]
if ndim==0:
self._column[idx:idx]=val
else:
raise ValueError("can only insert item in a single location")
return self
[docs] def subcolumn(self, idx, force_copy=False):
"""
Return value(s) at `idx` as a column object.
"""
ndim,idx=indexing.to_list_idx(idx).tup()
if not force_copy and indexing.covers_all(idx,self.nrows()):
return self
else:
if ndim==0:
return ListDataColumn([self._column[idx]])
else:
return ListDataColumn(self._get_sublist(idx))
## Casting to NumPy array ##
[docs] def as_array(self, force_copy=False):
"""
Turn the column into a numpy array
"""
return as_array(self._column,force_copy=force_copy)
## Copying ##
[docs] def copy(self):
return ListDataColumn(list(self._column))
## Shape ##
@property
def shape(self):
return (len(self._column),)
## Arithmetics ##
def _perform_numop_i(self, x, op_func, _):
self._column=[op_func(e,x) for e in self._column]
return self
def _perform_numop(self, x, op_func, _):
return ListDataColumn([op_func(e,x) for e in self._column])
## Repr ##
def __str__(self):
return str(self._column)
@property
def real(self):
return ListDataColumn([np.real(e) for e in self._column])
@property
def imag(self):
return ListDataColumn([np.imag(e) for e in self._column])
[docs] def conjugate(self):
"""Return the complex-conjugate of the column"""
return ListDataColumn([np.conjugate(e) for e in self._column])
def _dump_list_col(col, dumpf):
return dumpf(col._column)
def _load_list_col(val, loadf):
return ListDataColumn(loadf(val))
strdump.dumper.add_class(ListDataColumn,_dump_list_col,_load_list_col,"column.list",recursive=True)
[docs]class LinearDataColumn(IDataColumn):
"""
A linear data column.
Represents a linear data (essentially, a range object). Doesn't store the full data, just the length, start and step.
Automatically increments upon expansion.
"""
def __init__(self, length, start=0, step=1):
IDataColumn.__init__(self)
self.start=start
self.length=length
self.step=step
def _calc_item(self, idx):
if np.any(idx>=self.length) or np.any(idx<0):
raise IndexError("index {} is out of bounds".format(idx))
return idx*self.step+self.start
## Indexing ##
def _get_item(self, idx):
ndim,idx=indexing.to_list_idx(idx).tup()
if indexing.is_slice(idx):
idx=np.arange(*idx.indices(self.length))
if ndim==0:
return self._calc_item(idx)
else:
return self._calc_item(np.asarray(idx))
def _get_single_item(self, idx):
return self._calc_item(idx)
def _set_item(self, idx, val):
array_column=ArrayDataColumn(self.as_array())
array_column._set_item(idx,val)
return array_column
def _del_item(self, idx):
d_idx=indexing.to_list_idx(idx)
idx=d_idx.idx
if indexing.is_slice(idx):
idx=idx.indices(self.length)
if idx[2]==-1:
idx=(idx[1]+1,idx[0]-1,1)
if idx[2]==1:
if idx[0]==0:
self.start=self.start+self.step*idx[1]
self.length=self.length-(idx[1]-idx[0])
return self
elif idx[1]==self.length:
self.length=self.length-(idx[1]-idx[0])
return self
array_column=ArrayDataColumn(self.as_array())
array_column._del_item(d_idx)
return array_column
def _add_item(self, idx, val):
array_column=ArrayDataColumn(self.as_array())
array_column._add_item(idx,val)
return array_column
[docs] def expand(self, length):
"""
Expand column. Usually fill with zeros, unless the column values can be auto-predicted.
"""
self.length=self.length+length
return self
[docs] def subcolumn(self, idx, force_copy=False):
"""
Return value(s) at `idx` as a column object.
"""
ndim,idx=indexing.to_list_idx(idx).tup()
if force_copy:
return IDataColumn.subcolumn(self,idx,True)
if indexing.covers_all(idx,self.nrows()):
return self
if ndim==0:
return LinearDataColumn(1,self._calc_item(idx))
if indexing.is_slice(idx):
indices=idx.indices(self.length)
if (indices[1]-indices[0])%indices[2]==0:
new_len=(indices[1]-indices[0])//indices[2]
else:
new_len=(indices[1]-indices[0])//indices[2]+1
return LinearDataColumn(new_len,self._calc_item(indices[0]),self.step*indices[2])
else:
return IDataColumn.subcolumn(self,idx)
## Casting to NumPy array ##
[docs] def as_array(self, force_copy=False):
"""
Turn the column into a numpy array
"""
return self._calc_item(np.arange(self.length))
## Copying ##
[docs] def copy(self):
return LinearDataColumn(self.length,self.start,self.step)
## Shape ##
@property
def shape(self):
return (self.length,)
## Arithmetics ##
_numops_perf_i={"i+","i-","i*","i/"}
def _perform_numop_i(self, x, op_func, op_sym):
if op_sym in self._numops_perf_i and np.isscalar(x):
if op_sym=="i/" and not np.iscomplexobj(x): # override integer division
x=float(x)
new_start=op_func(self.start,x)
new_step=op_func(self.start+self.step,x)-op_func(self.start,x)
self.start,self.step=new_start,new_step
return self
return IDataColumn._perform_numop(self,x,op_func,op_sym)
_numops_perf_lru={"l+","l-","l*","l/","r+","r-","r*","u+","u-"}
def _perform_numop(self, x, op_func, op_sym):
if op_sym in self._numops_perf_lru and np.isscalar(x):
if op_sym=="l/" and not np.iscomplex(x): # override integer division
x=float(x)
new_start=op_func(self.start,x)
new_step=op_func(self.start+self.step,x)-op_func(self.start,x)
return LinearDataColumn(self.length,new_start,new_step)
return IDataColumn._perform_numop(self,x,op_func,op_sym)
@property
def real(self):
return LinearDataColumn(self.length,np.real(self.start),np.real(self.step))
@property
def imag(self):
return LinearDataColumn(self.length,np.imag(self.start),np.imag(self.step))
[docs] def conjugate(self):
"""Return the complex-conjugate of the column"""
return LinearDataColumn(self.length,np.conjugate(self.start),np.conjugate(self.step))
### strdump definitions ###
def _dump_linear_col(col):
return col.length,col.start,col.step
def _load_linear_col(val):
length,start,step=val
return LinearDataColumn(length,start,step)
strdump.dumper.add_class(LinearDataColumn,_dump_linear_col,_load_linear_col,"column.linear")
[docs]def as_linear_column(column):
"""
Try and turn a column into a linear column.
If it is not linear, raise :exc:`ValueError`.
"""
if len(column)==0:
return LinearDataColumn(0)
elif len(column)==1:
return LinearDataColumn(1,column[0])
else:
column=np.asarray(column)
diff=column[1:]-column[:-1]
if (diff==diff[0]).all():
return LinearDataColumn(len(column),column[0],diff[0])
else:
raise ValueError("column can't be represented as a linear data")
[docs]def as_column(column, force_numpy=True, try_linear=False, force_copy=False):
"""
Turn an object into a column.
If `column` is a list and ``force_numpy==True``, turn it into a numpy array and return :class:`ArrayDataColumn`
(by default lists are wrapped into :class:`ListDataColumn`).
If ``try_linear==True``, try to represent it as a linear data first.
If ``force_copy==True``, create the copy of the data.
"""
if isinstance(column,IDataColumn):
if force_copy:
return column.copy()
else:
return column
else:
if try_linear:
try:
return as_linear_column(column)
except ValueError:
pass
if isinstance(column,list) and not force_numpy:
if force_copy:
return ListDataColumn(list(column))
else:
return ListDataColumn(column)
else:
return ArrayDataColumn(np.asarray(column))
[docs]def crange(*args):
"""
[start,] stop[, step]
Analogue of `range` which creates a linear data column.
"""
start=0
step=1
if len(args)==1:
stop=args[0]
elif len(args)==2:
start=args[0]
stop=args[1]
elif len(args)==3:
start=args[0]
stop=args[1]
step=args[2]
else:
raise ValueError("crange accepts between 1 and 3 arguments")
length=np.int(np.ceil((stop-start)/float(step)))
return LinearDataColumn(length,start,step)
[docs]def zeros(length):
"""
Create a column of the given length filled with zeros.
"""
return LinearDataColumn(length,0,0)