ntv-numpy.ntv_numpy.data_array

@author: Philippe@loco-labs.io

The data_array module is part of the ntv-numpy.ntv_numpy package (specification document).

It contains the classes Darray (abstract), Dfull, Dcomplete for the representation of unidimensional arrays.

For more information, see the user guide or the github repository.

  1# -*- coding: utf-8 -*-
  2"""
  3@author: Philippe@loco-labs.io
  4
  5The `data_array` module is part of the `ntv-numpy.ntv_numpy` package ([specification document](
  6https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)).
  7
  8It contains the classes `Darray` (abstract), `Dfull`, `Dcomplete` for the
  9representation of unidimensional arrays.
 10
 11For more information, see the
 12[user guide](https://loco-philippe.github.io/ntv-numpy/docs/user_guide.html)
 13 or the [github repository](https://github.com/loco-philippe/ntv-numpy).
 14"""
 15from abc import ABC, abstractmethod
 16import json
 17import numpy as np
 18import pandas as pd
 19from json_ntv import Ntv, NtvConnector
 20
 21
 22class Darray(ABC):
 23    ''' The Darray class is an abstract class used by `Dfull`and `Dcomplete` classes.
 24
 25    *Attributes :*
 26    - **data** :  np.ndarray - data after coding
 27    - **ref**:  int or string - reference to another Darray data
 28    - **coding**: np.ndarray of int - mapping between data and the values
 29
 30    *dynamic values (@property)*
 31    - `values`
 32
 33    *methods*
 34    - `read_json` (staticmethod)
 35    - `to_json`
 36    '''
 37
 38    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
 39        '''Darray constructor.
 40
 41        *Parameters*
 42
 43        - **data**: list, Darray or np.ndarray - data to represent (after coding)
 44        - **ref** : String or integer (default None) - name or index of another Darray
 45        - **coding**: List of integer (default None) - mapping between data and the list of values
 46        - **dtype**: string (default None) - numpy.dtype to apply
 47        '''
 48        if isinstance(data, Darray):
 49            self.data = data.data
 50            self.ref = data.ref
 51            self.coding = data.coding
 52            return
 53        data = data if isinstance(data, (list, np.ndarray)) else [data]
 54        if (len(data) > 0 and isinstance(data[0], (list, np.ndarray))) or unidim:
 55            dtype = data.dtype if isinstance(data, np.ndarray) else 'object'
 56            self.data = np.fromiter(data, dtype=dtype)
 57        else:
 58            self.data = np.array(data, dtype=dtype).reshape(-1)
 59        self.ref = ref
 60        self.coding = np.array(coding)
 61
 62    def __repr__(self):
 63        '''return classname and number of value'''
 64        return self.__class__.__name__ + '[' + str(len(self)) + ']'
 65
 66    def __str__(self):
 67        '''return json string format'''
 68        return json.dumps(self.to_json())
 69
 70    def __eq__(self, other):
 71        ''' equal if values are equal'''
 72        return np.array_equal(self.values, other.values, equal_nan=False)
 73
 74    def __len__(self):
 75        ''' len of values'''
 76        return self._len_val
 77
 78    def __contains__(self, item):
 79        ''' item of values'''
 80        return item in self.values
 81
 82    def __getitem__(self, ind):
 83        ''' return value item'''
 84        if isinstance(ind, tuple):
 85            return [self.values[i] for i in ind]
 86            # return [copy(self.values[i]) for i in ind]
 87        return self.values[ind]
 88        # return copy(self.values[ind])
 89
 90    def __copy__(self):
 91        ''' Copy all the data '''
 92        return self.__class__(self)
 93
 94    @staticmethod
 95    def read_json(val, dtype=None, unidim=False):
 96        ''' return a Darray entity from a list of data.
 97
 98        *Parameters*
 99
100        - **val**: list of data
101        - **dtype** : string (default None) - numpy.dtype to apply
102        '''
103        val = val if isinstance(val, list) else [val]
104        if not val or not isinstance(val[0], list):
105            return Dfull(val, dtype=dtype, unidim=unidim)
106        match val:
107            case [data, ref, list(coding)] if (isinstance(ref, (int, str)) and
108                                               isinstance(coding[0], int) and
109                                               max(coding) < len(data)):
110                return None
111            case [data, ref] if (isinstance(data, list) and
112                                 isinstance(ref, (int, str))):
113                return None
114            case [data, list(coef)] if len(coef) == 1:
115                return None
116            case [data, list(coding)] if (isinstance(coding[0], int) and
117                                          max(coding) < len(data)):
118                return Dcomplete(data, None, coding, dtype=dtype, unidim=unidim)
119            case _:
120                return Dfull(val, dtype=dtype, unidim=unidim)
121
122    @abstractmethod
123    def to_json(self):
124        ''' return a JsonValue'''
125
126    @property
127    @abstractmethod
128    def values(self):
129        ''' return the list of values'''
130
131    @property
132    @abstractmethod
133    def _len_val(self):
134        '''return the length of the entity'''
135
136
137class Dfull(Darray):
138    ''' Representation of a one dimensional Array with full representation
139
140    *dynamic values (@property)*
141    - `values`
142
143    *methods*
144    - `read_json` (staticmethod)
145    - `to_json`
146    '''
147
148    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
149        '''Dfull constructor.
150
151        *Parameters*
152
153        - **data**: list, Darray or np.ndarray - data to represent (after coding)
154        - **ref** : unused
155        - **coding**: unused
156        - **dtype**: string (default None) - numpy.dtype to apply
157        '''
158        super().__init__(data, dtype=dtype, unidim=unidim)
159
160    def to_json(self):
161        ''' return a JsonValue of the Dfull entity.'''
162        return Dutil.list_json(self.data)
163
164    @property
165    def values(self):
166        ''' return the list of values'''
167        return self.data
168
169    @property
170    def _len_val(self):
171        '''return the length of the Dfull entity'''
172        return len(self.data) if self.data.ndim > 0 else 0
173
174
175class Dcomplete(Darray):
176    ''' Representation of a one dimensional Array with full representation
177
178    *dynamic values (@property)*
179    - `values`
180
181    *methods*
182    - `read_json` (staticmethod)
183    - `to_json`
184    '''
185
186    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
187        '''Dcomplete constructor.
188
189        *Parameters*
190
191        - **data**: list, Darray or np.ndarray - data to represent (after coding)
192        - **ref** : unused
193        - **coding**: List of integer (default None) - mapping between data and the list of values
194        - **dtype**: string (default None) - numpy.dtype to apply
195        '''
196        if coding is None:
197            try:
198                data, coding = np.unique(data, return_inverse=True)
199            except (TypeError, ValueError):
200                dat, idx, coding = np.unique(np.frompyfunc(Ntv.from_obj, 1, 1)(data),
201                                             return_index=True, return_inverse=True)
202                data = data[idx]
203        super().__init__(data, coding=coding, dtype=dtype, unidim=unidim)
204
205    def to_json(self):
206        ''' return a JsonValue of the Dcomplete entity.'''
207        return [Dutil.list_json(self.data), self.coding.tolist()]
208
209    @property
210    def values(self):
211        ''' return the list of values'''
212        return self.data[self.coding]
213
214    @property
215    def _len_val(self):
216        '''return the length of the Dcomplete entity'''
217        return len(self.coding) if self.coding.ndim > 0 else 0
218
219
220class Dutil:
221    '''np.ndarray utilities.
222
223    *static methods*
224    - `convert`
225    - `is_json`
226    - `ntv_val`
227    - `add_ext`
228    - `split_type`
229    - `ntv_type`
230    - `nda_ntv_type`
231    - `dtype`
232    - `json_ntv`
233    - `split_name`
234    - `split_json_name`
235    '''
236
237    @staticmethod
238    def equals(nself, nother):
239        '''return True if all elements are equals and dtype are equal'''
240        if not (isinstance(nself, np.ndarray) and isinstance(nother, np.ndarray)):
241            return False
242        if nself.dtype != nother.dtype or nself.shape != nother.shape:
243            return False
244        if len(nself.shape) == 0:
245            return True
246        if len(nself) != len(nother):
247            return False
248        if len(nself) == 0:
249            return True
250        if isinstance(nself[0], (np.ndarray, pd.Series, pd.DataFrame)):
251            SeriesConnec = NtvConnector.connector().get('SeriesConnec')
252            DataFrameConnec = NtvConnector.connector().get('DataFrameConnec')
253            equal = {np.ndarray: Dutil.equals,
254                     pd.Series: SeriesConnec.equals,
255                     pd.DataFrame: DataFrameConnec.equals}
256            for nps, npo in zip(nself, nother):
257                if not equal[type(nself[0])](nps, npo):
258                    return False
259            return True
260        return np.array_equal(nself, nother)
261
262    @staticmethod
263    def list_json(nda):
264        '''return a JSON representation of a unidimensional np.ndarray'''
265        if len(nda) == 0:
266            return []
267        if isinstance(nda[0], np.ndarray):
268            return [Dutil.list_json(arr) for arr in nda]
269        return nda.tolist()
class Darray(abc.ABC):
 23class Darray(ABC):
 24    ''' The Darray class is an abstract class used by `Dfull`and `Dcomplete` classes.
 25
 26    *Attributes :*
 27    - **data** :  np.ndarray - data after coding
 28    - **ref**:  int or string - reference to another Darray data
 29    - **coding**: np.ndarray of int - mapping between data and the values
 30
 31    *dynamic values (@property)*
 32    - `values`
 33
 34    *methods*
 35    - `read_json` (staticmethod)
 36    - `to_json`
 37    '''
 38
 39    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
 40        '''Darray constructor.
 41
 42        *Parameters*
 43
 44        - **data**: list, Darray or np.ndarray - data to represent (after coding)
 45        - **ref** : String or integer (default None) - name or index of another Darray
 46        - **coding**: List of integer (default None) - mapping between data and the list of values
 47        - **dtype**: string (default None) - numpy.dtype to apply
 48        '''
 49        if isinstance(data, Darray):
 50            self.data = data.data
 51            self.ref = data.ref
 52            self.coding = data.coding
 53            return
 54        data = data if isinstance(data, (list, np.ndarray)) else [data]
 55        if (len(data) > 0 and isinstance(data[0], (list, np.ndarray))) or unidim:
 56            dtype = data.dtype if isinstance(data, np.ndarray) else 'object'
 57            self.data = np.fromiter(data, dtype=dtype)
 58        else:
 59            self.data = np.array(data, dtype=dtype).reshape(-1)
 60        self.ref = ref
 61        self.coding = np.array(coding)
 62
 63    def __repr__(self):
 64        '''return classname and number of value'''
 65        return self.__class__.__name__ + '[' + str(len(self)) + ']'
 66
 67    def __str__(self):
 68        '''return json string format'''
 69        return json.dumps(self.to_json())
 70
 71    def __eq__(self, other):
 72        ''' equal if values are equal'''
 73        return np.array_equal(self.values, other.values, equal_nan=False)
 74
 75    def __len__(self):
 76        ''' len of values'''
 77        return self._len_val
 78
 79    def __contains__(self, item):
 80        ''' item of values'''
 81        return item in self.values
 82
 83    def __getitem__(self, ind):
 84        ''' return value item'''
 85        if isinstance(ind, tuple):
 86            return [self.values[i] for i in ind]
 87            # return [copy(self.values[i]) for i in ind]
 88        return self.values[ind]
 89        # return copy(self.values[ind])
 90
 91    def __copy__(self):
 92        ''' Copy all the data '''
 93        return self.__class__(self)
 94
 95    @staticmethod
 96    def read_json(val, dtype=None, unidim=False):
 97        ''' return a Darray entity from a list of data.
 98
 99        *Parameters*
100
101        - **val**: list of data
102        - **dtype** : string (default None) - numpy.dtype to apply
103        '''
104        val = val if isinstance(val, list) else [val]
105        if not val or not isinstance(val[0], list):
106            return Dfull(val, dtype=dtype, unidim=unidim)
107        match val:
108            case [data, ref, list(coding)] if (isinstance(ref, (int, str)) and
109                                               isinstance(coding[0], int) and
110                                               max(coding) < len(data)):
111                return None
112            case [data, ref] if (isinstance(data, list) and
113                                 isinstance(ref, (int, str))):
114                return None
115            case [data, list(coef)] if len(coef) == 1:
116                return None
117            case [data, list(coding)] if (isinstance(coding[0], int) and
118                                          max(coding) < len(data)):
119                return Dcomplete(data, None, coding, dtype=dtype, unidim=unidim)
120            case _:
121                return Dfull(val, dtype=dtype, unidim=unidim)
122
123    @abstractmethod
124    def to_json(self):
125        ''' return a JsonValue'''
126
127    @property
128    @abstractmethod
129    def values(self):
130        ''' return the list of values'''
131
132    @property
133    @abstractmethod
134    def _len_val(self):
135        '''return the length of the entity'''

The Darray class is an abstract class used by Dfulland Dcomplete classes.

Attributes :

  • data : np.ndarray - data after coding
  • ref: int or string - reference to another Darray data
  • coding: np.ndarray of int - mapping between data and the values

dynamic values (@property)

methods

Darray(data, ref=None, coding=None, dtype=None, unidim=False)
39    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
40        '''Darray constructor.
41
42        *Parameters*
43
44        - **data**: list, Darray or np.ndarray - data to represent (after coding)
45        - **ref** : String or integer (default None) - name or index of another Darray
46        - **coding**: List of integer (default None) - mapping between data and the list of values
47        - **dtype**: string (default None) - numpy.dtype to apply
48        '''
49        if isinstance(data, Darray):
50            self.data = data.data
51            self.ref = data.ref
52            self.coding = data.coding
53            return
54        data = data if isinstance(data, (list, np.ndarray)) else [data]
55        if (len(data) > 0 and isinstance(data[0], (list, np.ndarray))) or unidim:
56            dtype = data.dtype if isinstance(data, np.ndarray) else 'object'
57            self.data = np.fromiter(data, dtype=dtype)
58        else:
59            self.data = np.array(data, dtype=dtype).reshape(-1)
60        self.ref = ref
61        self.coding = np.array(coding)

Darray constructor.

Parameters

  • data: list, Darray or np.ndarray - data to represent (after coding)
  • ref : String or integer (default None) - name or index of another Darray
  • coding: List of integer (default None) - mapping between data and the list of values
  • dtype: string (default None) - numpy.dtype to apply
ref
coding
@staticmethod
def read_json(val, dtype=None, unidim=False):
 95    @staticmethod
 96    def read_json(val, dtype=None, unidim=False):
 97        ''' return a Darray entity from a list of data.
 98
 99        *Parameters*
100
101        - **val**: list of data
102        - **dtype** : string (default None) - numpy.dtype to apply
103        '''
104        val = val if isinstance(val, list) else [val]
105        if not val or not isinstance(val[0], list):
106            return Dfull(val, dtype=dtype, unidim=unidim)
107        match val:
108            case [data, ref, list(coding)] if (isinstance(ref, (int, str)) and
109                                               isinstance(coding[0], int) and
110                                               max(coding) < len(data)):
111                return None
112            case [data, ref] if (isinstance(data, list) and
113                                 isinstance(ref, (int, str))):
114                return None
115            case [data, list(coef)] if len(coef) == 1:
116                return None
117            case [data, list(coding)] if (isinstance(coding[0], int) and
118                                          max(coding) < len(data)):
119                return Dcomplete(data, None, coding, dtype=dtype, unidim=unidim)
120            case _:
121                return Dfull(val, dtype=dtype, unidim=unidim)

return a Darray entity from a list of data.

Parameters

  • val: list of data
  • dtype : string (default None) - numpy.dtype to apply
@abstractmethod
def to_json(self):
123    @abstractmethod
124    def to_json(self):
125        ''' return a JsonValue'''

return a JsonValue

values
127    @property
128    @abstractmethod
129    def values(self):
130        ''' return the list of values'''

return the list of values

class Dfull(Darray):
138class Dfull(Darray):
139    ''' Representation of a one dimensional Array with full representation
140
141    *dynamic values (@property)*
142    - `values`
143
144    *methods*
145    - `read_json` (staticmethod)
146    - `to_json`
147    '''
148
149    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
150        '''Dfull constructor.
151
152        *Parameters*
153
154        - **data**: list, Darray or np.ndarray - data to represent (after coding)
155        - **ref** : unused
156        - **coding**: unused
157        - **dtype**: string (default None) - numpy.dtype to apply
158        '''
159        super().__init__(data, dtype=dtype, unidim=unidim)
160
161    def to_json(self):
162        ''' return a JsonValue of the Dfull entity.'''
163        return Dutil.list_json(self.data)
164
165    @property
166    def values(self):
167        ''' return the list of values'''
168        return self.data
169
170    @property
171    def _len_val(self):
172        '''return the length of the Dfull entity'''
173        return len(self.data) if self.data.ndim > 0 else 0

Representation of a one dimensional Array with full representation

dynamic values (@property)

methods

Dfull(data, ref=None, coding=None, dtype=None, unidim=False)
149    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
150        '''Dfull constructor.
151
152        *Parameters*
153
154        - **data**: list, Darray or np.ndarray - data to represent (after coding)
155        - **ref** : unused
156        - **coding**: unused
157        - **dtype**: string (default None) - numpy.dtype to apply
158        '''
159        super().__init__(data, dtype=dtype, unidim=unidim)

Dfull constructor.

Parameters

  • data: list, Darray or np.ndarray - data to represent (after coding)
  • ref : unused
  • coding: unused
  • dtype: string (default None) - numpy.dtype to apply
def to_json(self):
161    def to_json(self):
162        ''' return a JsonValue of the Dfull entity.'''
163        return Dutil.list_json(self.data)

return a JsonValue of the Dfull entity.

values
165    @property
166    def values(self):
167        ''' return the list of values'''
168        return self.data

return the list of values

Inherited Members
Darray
ref
coding
read_json
class Dcomplete(Darray):
176class Dcomplete(Darray):
177    ''' Representation of a one dimensional Array with full representation
178
179    *dynamic values (@property)*
180    - `values`
181
182    *methods*
183    - `read_json` (staticmethod)
184    - `to_json`
185    '''
186
187    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
188        '''Dcomplete constructor.
189
190        *Parameters*
191
192        - **data**: list, Darray or np.ndarray - data to represent (after coding)
193        - **ref** : unused
194        - **coding**: List of integer (default None) - mapping between data and the list of values
195        - **dtype**: string (default None) - numpy.dtype to apply
196        '''
197        if coding is None:
198            try:
199                data, coding = np.unique(data, return_inverse=True)
200            except (TypeError, ValueError):
201                dat, idx, coding = np.unique(np.frompyfunc(Ntv.from_obj, 1, 1)(data),
202                                             return_index=True, return_inverse=True)
203                data = data[idx]
204        super().__init__(data, coding=coding, dtype=dtype, unidim=unidim)
205
206    def to_json(self):
207        ''' return a JsonValue of the Dcomplete entity.'''
208        return [Dutil.list_json(self.data), self.coding.tolist()]
209
210    @property
211    def values(self):
212        ''' return the list of values'''
213        return self.data[self.coding]
214
215    @property
216    def _len_val(self):
217        '''return the length of the Dcomplete entity'''
218        return len(self.coding) if self.coding.ndim > 0 else 0

Representation of a one dimensional Array with full representation

dynamic values (@property)

methods

Dcomplete(data, ref=None, coding=None, dtype=None, unidim=False)
187    def __init__(self, data, ref=None, coding=None, dtype=None, unidim=False):
188        '''Dcomplete constructor.
189
190        *Parameters*
191
192        - **data**: list, Darray or np.ndarray - data to represent (after coding)
193        - **ref** : unused
194        - **coding**: List of integer (default None) - mapping between data and the list of values
195        - **dtype**: string (default None) - numpy.dtype to apply
196        '''
197        if coding is None:
198            try:
199                data, coding = np.unique(data, return_inverse=True)
200            except (TypeError, ValueError):
201                dat, idx, coding = np.unique(np.frompyfunc(Ntv.from_obj, 1, 1)(data),
202                                             return_index=True, return_inverse=True)
203                data = data[idx]
204        super().__init__(data, coding=coding, dtype=dtype, unidim=unidim)

Dcomplete constructor.

Parameters

  • data: list, Darray or np.ndarray - data to represent (after coding)
  • ref : unused
  • coding: List of integer (default None) - mapping between data and the list of values
  • dtype: string (default None) - numpy.dtype to apply
def to_json(self):
206    def to_json(self):
207        ''' return a JsonValue of the Dcomplete entity.'''
208        return [Dutil.list_json(self.data), self.coding.tolist()]

return a JsonValue of the Dcomplete entity.

values
210    @property
211    def values(self):
212        ''' return the list of values'''
213        return self.data[self.coding]

return the list of values

Inherited Members
Darray
ref
coding
read_json
class Dutil:
221class Dutil:
222    '''np.ndarray utilities.
223
224    *static methods*
225    - `convert`
226    - `is_json`
227    - `ntv_val`
228    - `add_ext`
229    - `split_type`
230    - `ntv_type`
231    - `nda_ntv_type`
232    - `dtype`
233    - `json_ntv`
234    - `split_name`
235    - `split_json_name`
236    '''
237
238    @staticmethod
239    def equals(nself, nother):
240        '''return True if all elements are equals and dtype are equal'''
241        if not (isinstance(nself, np.ndarray) and isinstance(nother, np.ndarray)):
242            return False
243        if nself.dtype != nother.dtype or nself.shape != nother.shape:
244            return False
245        if len(nself.shape) == 0:
246            return True
247        if len(nself) != len(nother):
248            return False
249        if len(nself) == 0:
250            return True
251        if isinstance(nself[0], (np.ndarray, pd.Series, pd.DataFrame)):
252            SeriesConnec = NtvConnector.connector().get('SeriesConnec')
253            DataFrameConnec = NtvConnector.connector().get('DataFrameConnec')
254            equal = {np.ndarray: Dutil.equals,
255                     pd.Series: SeriesConnec.equals,
256                     pd.DataFrame: DataFrameConnec.equals}
257            for nps, npo in zip(nself, nother):
258                if not equal[type(nself[0])](nps, npo):
259                    return False
260            return True
261        return np.array_equal(nself, nother)
262
263    @staticmethod
264    def list_json(nda):
265        '''return a JSON representation of a unidimensional np.ndarray'''
266        if len(nda) == 0:
267            return []
268        if isinstance(nda[0], np.ndarray):
269            return [Dutil.list_json(arr) for arr in nda]
270        return nda.tolist()

np.ndarray utilities.

static methods

  • convert
  • is_json
  • ntv_val
  • add_ext
  • split_type
  • ntv_type
  • nda_ntv_type
  • dtype
  • json_ntv
  • split_name
  • split_json_name
@staticmethod
def equals(nself, nother):
238    @staticmethod
239    def equals(nself, nother):
240        '''return True if all elements are equals and dtype are equal'''
241        if not (isinstance(nself, np.ndarray) and isinstance(nother, np.ndarray)):
242            return False
243        if nself.dtype != nother.dtype or nself.shape != nother.shape:
244            return False
245        if len(nself.shape) == 0:
246            return True
247        if len(nself) != len(nother):
248            return False
249        if len(nself) == 0:
250            return True
251        if isinstance(nself[0], (np.ndarray, pd.Series, pd.DataFrame)):
252            SeriesConnec = NtvConnector.connector().get('SeriesConnec')
253            DataFrameConnec = NtvConnector.connector().get('DataFrameConnec')
254            equal = {np.ndarray: Dutil.equals,
255                     pd.Series: SeriesConnec.equals,
256                     pd.DataFrame: DataFrameConnec.equals}
257            for nps, npo in zip(nself, nother):
258                if not equal[type(nself[0])](nps, npo):
259                    return False
260            return True
261        return np.array_equal(nself, nother)

return True if all elements are equals and dtype are equal

@staticmethod
def list_json(nda):
263    @staticmethod
264    def list_json(nda):
265        '''return a JSON representation of a unidimensional np.ndarray'''
266        if len(nda) == 0:
267            return []
268        if isinstance(nda[0], np.ndarray):
269            return [Dutil.list_json(arr) for arr in nda]
270        return nda.tolist()

return a JSON representation of a unidimensional np.ndarray