Source code for pyinform.dist

# Copyright 2016-2019 Douglas G. Moore. All rights reserved.
# Use of this source code is governed by a MIT
# license that can be found in the LICENSE file.
import numpy as np

from ctypes import c_bool, c_double, c_uint, c_ulong, c_void_p, POINTER
from pyinform import _inform


[docs]class Dist: """ Dist is class designed to represent empirical probability distributions, i.e. histograms, for cleanly logging observations of time series data. The premise behind this class is that it allows **PyInform** to define the standard entropy measures on distributions. This reduces functions such as :py:func:`pyinform.activeinfo.active_info` to building distributions and then applying standard entropy measures. """
[docs] def __init__(self, n): """ Construct a distribution. If the parameter *n* is an integer, the distribution is constructed with a zeroed support of size *n*. If *n* is a list or ``numpy.ndarray``, the sequence is treated as the underlying support. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist(5) >>> d = Dist([0,0,1,2]) :param n: the support for the distribution :type n: int, list or ``numpy.ndarray`` :raises ValueError: if support is empty or multidimensional :raises MemoryError: if memory allocation fails within the C call """ if isinstance(n, list) or isinstance(n, np.ndarray): xs = np.ascontiguousarray(n, dtype=np.uint32) if xs.ndim != 1: raise ValueError("support is multi-dimenstional") elif xs.size == 0: raise ValueError("support is empty") data = xs.ctypes.data_as(POINTER(c_uint)) self._dist = _dist_create(data, xs.size) else: if n <= 0: raise ValueError("support is zero") self._dist = _dist_alloc(c_ulong(n)) if not self._dist: raise MemoryError()
def __dealloc__(self): """ Deallocate the memory underlying the distribution. """ if self._dist: _dist_free(self._dist)
[docs] def __len__(self): """ Determine the size of the support of the distribution. .. rubric:: Examples: .. doctest:: Dist >>> len(Dist(5)) 5 >>> len(Dist([0,1,5])) 3 See also :py:meth:`.counts`. :return: the size of the support :rtype: int """ return int(_dist_size(self._dist))
[docs] def resize(self, n): """ Resize the support of the distribution in place. If the distribution... - **shrinks** - the last ``len(self) - n`` elements are lost, the rest are preserved - **grows** - the last ``n - len(self)`` elements are zeroed - **is unchanged** - well, that sorta says it all, doesn't it? .. rubric:: Examples: .. doctest:: Dist >>> d = Dist(5) >>> d.resize(3) >>> len(d) 3 >>> d.resize(8) >>> len(d) 8 .. doctest:: Dist >>> d = Dist([1,2,3,4]) >>> d.resize(2) >>> list(d) [1, 2] >>> d.resize(4) >>> list(d) [1, 2, 0, 0] :param int n: the desired size of the support :raises ValueError: if the requested size is zero :raises MemoryError: if memory allocation fails in the C call """ if n <= 0: raise ValueError("support is zero") self._dist = _dist_realloc(self._dist, c_ulong(n)) if not self._dist: raise MemoryError()
[docs] def copy(self): """ Perform a deep copy of the distribution. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist([1,2,3]) >>> e = d >>> e[0] = 3 >>> list(e) [3, 2, 3] >>> list(d) [3, 2, 3] .. doctest:: Dist >>> f = d.copy() >>> f[0] = 1 >>> list(f) [1, 2, 3] >>> list(d) [3, 2, 3] :returns: the copied distribution :rtype: :py:class:`pyinform.dist.Dist` """ d = Dist(len(self)) _dist_copy(self._dist, d._dist) return d
[docs] def counts(self): """ Return the number of observations made thus far. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist(5) >>> d.counts() 0 .. doctest:: Dist >>> d = Dist([1,0,3,2]) >>> d.counts() 6 See also :py:meth:`.__len__`. :return: the number of observations :rtype: int """ return _dist_counts(self._dist)
[docs] def valid(self): """ Determine if the distribution is a valid probability distribution, i.e. if the support is not empty and at least one observation has been made. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist(5) >>> d.valid() False .. doctest:: Dist >>> d = Dist([0,0,0,1]) >>> d.valid() True See also :py:meth:`.__len__` and :py:meth:`.counts`. :return: a boolean signifying that the distribution is valid :rtype: bool """ return _dist_is_valid(self._dist)
[docs] def __getitem__(self, event): """ Get the number of observations made of *event*. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist(2) >>> (d[0], d[1]) (0, 0) .. doctest:: Dist >>> d = Dist([0,1]) >>> (d[0], d[1]) (0, 1) See also :py:meth:`.__setitem__`, :py:meth:`.tick` and :py:meth:`.probability`. :param int event: the observed event :return: the number of observations of *event* :rtype: int :raises IndexError: if ``event < 0 or len(self) <= event`` """ if event < 0 or event >= len(self): raise IndexError() return _dist_get(self._dist, c_ulong(event))
[docs] def __setitem__(self, event, value): """ Set the number of observations of *event* to *value*. If *value* is negative, then the observation count is set to zero. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist(2) >>> for i, _ in enumerate(d): ... d[i] = i*i ... >>> list(d) [0, 1] .. doctest:: Dist >>> d = Dist([0,1,2,3]) >>> for i, n in enumerate(d): ... d[i] = 2 * n ... >>> list(d) [0, 2, 4, 6] See also :py:meth:`.__getitem__` and :py:meth:`.tick`. :param int event: the observed event :param int value: the number of observations :raises IndexError: if ``event < 0 or len(self) <= event`` """ if event < 0 or event >= len(self): raise IndexError() value = max(0, value) return _dist_set(self._dist, c_ulong(event), c_uint(value))
[docs] def tick(self, event): """ Make a single observation of *event*, and return the total number of observations of said *event*. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist(5) >>> for i, _ in enumerate(d): ... assert(d.tick(i) == 1) ... >>> list(d) [1, 1, 1, 1, 1] .. doctest:: Dist >>> d = Dist([0,1,2,3]) >>> for i, _ in enumerate(d): ... assert(d.tick(i) == i + 1) ... >>> list(d) [1, 2, 3, 4] See also :py:meth:`.__getitem__` and :py:meth:`.__setitem__`. :param int event: the observed event :return: the total number of observations of *event* :rtype: int :raises IndexError: if ``event < 0 or len(self) <= event`` """ if event < 0 or event >= len(self): raise IndexError() return _dist_tick(self._dist, c_ulong(event))
[docs] def probability(self, event): """ Compute the empiricial probability of an *event*. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist([1,1,1,1]) >>> for i, _ in enumerate(d): ... assert(d.probability(i) == 1./4) ... See also :py:meth:`.__getitem__` and :py:meth:`.dump`. :param int event: the observed event :return: the empirical probability *event* :rtype: float :raises ValueError: if ``not self.valid()`` :raises IndexError: if ``event < 0 or len(self) <= event`` """ if not self.valid(): raise ValueError("invalid distribution") elif event < 0 or event >= len(self): raise IndexError() return _dist_prob(self._dist, c_ulong(event))
[docs] def dump(self): """ Compute the empirical probability of each observable event and return the result as an array. .. rubric:: Examples: .. doctest:: Dist >>> d = Dist([1,2,2,1]) >>> d.dump() array([0.16666667, 0.33333333, 0.33333333, 0.16666667]) See also :py:meth:`.probability`. :return: the empirical probabilities of all o :rtype: ``numpy.ndarray`` :raises ValueError: if ``not self.valid()`` :raises RuntimeError: if the dump fails in the C call :raises IndexError: if ``event < 0 or len(self) <= event`` """ if not self.valid(): raise ValueError("invalid distribution") n = len(self) probs = np.empty(n, dtype=np.float64) data = probs.ctypes.data_as(POINTER(c_double)) m = _dist_dump(self._dist, data, c_ulong(n)) if m != n: raise RuntimeError("cannot dump the distribution") return probs
_dist_alloc = _inform.inform_dist_alloc _dist_alloc.argtypes = [c_ulong] _dist_alloc.restype = c_void_p _dist_realloc = _inform.inform_dist_realloc _dist_realloc.argtypes = [c_void_p, c_ulong] _dist_realloc.restype = c_void_p _dist_copy = _inform.inform_dist_copy _dist_copy.argtypes = [c_void_p, c_void_p] _dist_copy.restype = c_void_p _dist_create = _inform.inform_dist_create _dist_create.argtypes = [POINTER(c_uint), c_ulong] _dist_create.restype = c_void_p _dist_free = _inform.inform_dist_free _dist_free.argtypes = [c_void_p] _dist_free.restype = None _dist_size = _inform.inform_dist_size _dist_size.argtypes = [c_void_p] _dist_size.restype = c_ulong _dist_counts = _inform.inform_dist_counts _dist_counts.argtypes = [c_void_p] _dist_counts.restype = c_uint _dist_is_valid = _inform.inform_dist_is_valid _dist_is_valid.argtypes = [c_void_p] _dist_is_valid.restype = c_bool _dist_get = _inform.inform_dist_get _dist_get.argtypes = [c_void_p, c_ulong] _dist_get.restype = c_uint _dist_set = _inform.inform_dist_set _dist_set.argtypes = [c_void_p, c_ulong, c_uint] _dist_set.restype = c_uint _dist_tick = _inform.inform_dist_tick _dist_tick.argtypes = [c_void_p, c_ulong] _dist_tick.restype = c_uint _dist_prob = _inform.inform_dist_prob _dist_prob.argtypes = [c_void_p, c_ulong] _dist_prob.restype = c_double _dist_dump = _inform.inform_dist_dump _dist_dump.argtypes = [c_void_p, POINTER(c_double), c_ulong] _dist_dump.restype = c_ulong