ntv-numpy.ntv_numpy.xdataset

@author: Philippe@loco-labs.io

The xdataset module is part of the ntv-numpy.ntv_numpy package (specification document).

It contains the classes Xdataset, XdatasetInterface, XdatasetCategory for the multidimensional dataset.

For more information, see the user guide or the github repository.

  1# -*- coding: utf-8 -*-
  2"""
  3@author: Philippe@loco-labs.io
  4
  5The `xdataset` module is part of the `ntv-numpy.ntv_numpy` package ([specification document](
  6https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)).
  7
  8It contains the classes `Xdataset`, `XdatasetInterface`, `XdatasetCategory` for
  9the multidimensional dataset.
 10
 11For more information, see the
 12[user guide](https://loco-philippe.github.io/ntv-numpy/docs/user_guide.html)
 13 or the [github repository](https://github.com/loco-philippe/ntv-numpy).
 14"""
 15from abc import ABC, abstractmethod
 16import json
 17import pprint
 18from json_ntv import Ntv
 19from ntv_numpy.ndarray import Nutil
 20from ntv_numpy.xndarray import Xndarray
 21from ntv_numpy.xconnector import XarrayConnec, ScippConnec, AstropyNDDataConnec
 22from ntv_numpy.xconnector import PandasConnec
 23
 24
 25class XdatasetCategory(ABC):
 26    ''' category of Xndarray (dynamic tuple of full_name) - see Xdataset docstring'''
 27
 28    xnd: list = NotImplemented
 29    names: list = NotImplemented
 30
 31    @abstractmethod
 32    def dims(self, var, json_name=False):
 33        '''method defined in Xdataset class'''
 34
 35    @property
 36    def data_arrays(self):
 37        '''return a tuple of data_arrays Xndarray full_name'''
 38        return tuple(sorted(nda for nda in self.namedarrays
 39                            if nda not in self.dimensions + self.uniques))
 40
 41    @property
 42    def dimensions(self):
 43        '''return a tuple of dimensions Xndarray full_name'''
 44        dimable = []
 45        for var in self.variables:
 46            dimable += self.dims(var)
 47        return tuple(sorted(set(nda for nda in dimable if nda in self.namedarrays)))
 48
 49    @property
 50    def shape(self):
 51        '''return an array with the length of dimensions'''
 52        return [len(self[dim]) for dim in self.dimensions]
 53
 54    @property
 55    def coordinates(self):
 56        '''return a tuple of coordinates Xndarray full_name'''
 57        dims = set(self.dimensions)
 58        if not dims:
 59            return ()
 60        return tuple(sorted(set(xnda.name for xnda in self.xnd
 61                                if xnda.xtype == 'variable' and set(xnda.links) != dims)))
 62
 63    @property
 64    def data_vars(self):
 65        '''return a tuple of data_vars Xndarray full_name'''
 66        dims = set(self.dimensions)
 67        if not dims:
 68            return self.variables
 69        return tuple(sorted(xnda.name for xnda in self.xnd
 70                            if xnda.xtype == 'variable' and set(xnda.links) == dims))
 71
 72    @property
 73    def namedarrays(self):
 74        '''return a tuple of namedarray Xndarray full_name'''
 75        return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == 'namedarray'))
 76
 77    @property
 78    def variables(self):
 79        '''return a tuple of variables Xndarray full_name'''
 80        return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == 'variable'))
 81
 82    @property
 83    def undef_vars(self):
 84        '''return a tuple of variables Xndarray full_name with inconsistent shape'''
 85        return tuple(sorted(var for var in self.variables if self[var].shape !=
 86                            [len(self[dim]) for dim in self.dims(var)]))
 87
 88    @property
 89    def undef_links(self):
 90        '''return a tuple of variables Xndarray full_name with inconsistent links'''
 91        return tuple(sorted(link for var in self.variables for link in self[var].links
 92                            if link not in self.names))
 93
 94    @property
 95    def masks(self):
 96        '''return a tuple of additional Xndarray full_name with boolean ntv_type'''
 97        return tuple(sorted(xnda.full_name for xnda in self.xnd
 98                            if xnda.xtype == 'additional' and xnda.ntv_type == 'boolean'))
 99
100    @property
101    def data_add(self):
102        '''return a tuple of additional Xndarray full_name with not boolean ntv_type'''
103        return tuple(sorted(xnda.full_name for xnda in self.xnd
104                            if xnda.xtype == 'additional' and xnda.ntv_type != 'boolean'))
105
106    @property
107    def metadata(self):
108        '''return a tuple of metadata Xndarray full_name'''
109        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == 'meta'))
110
111    @property
112    def uniques(self):
113        '''return a tuple of unique Xndarray full_name'''
114        return tuple(full_name for full_name in self.namedarrays if len(self[full_name]) == 1)
115
116    @property
117    def additionals(self):
118        '''return a tuple of additionals Xndarray full_name'''
119        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == 'additional'))
120
121    def group(self, grp):
122        '''return a tuple of Xndarray full_name with the same name'''
123        if isinstance(grp, str):
124            return tuple(sorted(xnda.full_name for xnda in self.xnd
125                                if grp in (xnda.name, xnda.full_name)))
126        return tuple(sorted(nam for gr_nam in grp for nam in self.group(gr_nam)))
127
128    def add_group(self, add_name):
129        '''return a tuple of Xndarray full_name with the same add_name'''
130        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.add_name == add_name))
131
132
133class XdatasetInterface(ABC):
134    ''' Xdataset interface - see Xdataset docstring'''
135
136    name: str = NotImplemented
137    xnd: list = NotImplemented
138
139    @staticmethod
140    def read_json(jsn, **kwargs):
141        ''' convert json data into a Xdataset.
142
143        *Parameters*
144
145        - **convert** : boolean (default True) - If True, convert json data with
146        non Numpy ntv_type into Xndarray with python type
147        '''
148        option = {'convert': True} | kwargs
149        jso = json.loads(jsn) if isinstance(jsn, str) else jsn
150        value, name = Ntv.decode_json(jso)[:2]
151
152        xnd = [Xndarray.read_json({key: val}, **option)
153               for key, val in value.items()]
154        return Xdataset(xnd, name)
155
156    def to_json(self, **kwargs):
157        ''' convert a Xdataset into json-value.
158
159        *Parameters*
160
161        - **encoded** : Boolean (default False) - json value if False else json text
162        - **header** : Boolean (default True) - including 'xdataset' type
163        - **notype** : list of Boolean (default list of None) - including data type if False
164        - **novalue** : Boolean (default False) - including value if False
165        - **noshape** : Boolean (default True) - if True, without shape if dim < 1
166        - **format** : list of string (default list of 'full') - representation
167        format of the ndarray,
168        '''
169        notype = kwargs['notype'] if ('notype' in kwargs and isinstance(kwargs['notype'], list) and
170                                      len(kwargs['notype']) == len(self)) else [False] * len(self)
171        forma = kwargs['format'] if ('format' in kwargs and isinstance(kwargs['format'], list) and
172                                     len(kwargs['format']) == len(self)) else ['full'] * len(self)
173        noshape = kwargs.get('noshape', True)
174        dic_xnd = {}
175        for xna, notyp, forma in zip(self.xnd, notype, forma):
176            dic_xnd |= xna.to_json(notype=notyp, novalue=kwargs.get('novalue', False),
177                                   noshape=noshape, format=forma, header=False)
178        return Nutil.json_ntv(self.name, 'xdataset', dic_xnd,
179                              header=kwargs.get('header', True),
180                              encoded=kwargs.get('encoded', False))
181
182    def to_xarray(self, **kwargs):
183        '''return a xr.DataArray or a xr.Dataset from a Xdataset
184
185        *Parameters*
186
187        - **dataset** : Boolean (default True) - if False and a single data_var,
188        return a xr.DataArray
189        - **info** : Boolean (default True) - if True, add json representation
190        of 'relative' Xndarrays and 'data_arrays' Xndarrays in attrs
191        '''
192        return XarrayConnec.xexport(self, **kwargs)
193
194    @staticmethod
195    def from_xarray(xar, **kwargs):
196        '''return a Xdataset from a DataArray or a Dataset'''
197        return XarrayConnec.ximport(xar, Xdataset, **kwargs)
198
199    def to_scipp(self, **kwargs):
200        '''return a sc.DataArray or a sc.Dataset from a Xdataset
201
202        *Parameters*
203
204        - **dataset** : Boolean (default True) - if False and a single data_var,
205        return a DataArray
206        - **info** : Boolean (default True) - if True return a DataGroup with
207        metadata and data_arrays
208        - **ntv_type** : Boolean (default True) - if True add ntv-type to the name
209        '''
210        return ScippConnec.xexport(self, **kwargs)
211
212    @staticmethod
213    def from_scipp(sci, **kwargs):
214        '''return a Xdataset from a scipp object DataArray, Dataset or DataGroup'''
215        return ScippConnec.ximport(sci, Xdataset, **kwargs)
216
217    def to_nddata(self, **kwargs):
218        '''return a NDData from a Xdataset'''
219        return AstropyNDDataConnec.xexport(self, **kwargs)
220
221    @staticmethod
222    def from_nddata(ndd, **kwargs):
223        '''return a Xdataset from a NDData'''
224        return AstropyNDDataConnec.ximport(ndd, Xdataset, **kwargs)
225
226    def to_dataframe(self, **kwargs):
227        '''return a pd.DataFrame from a Xdataset
228
229        *Parameters*
230
231        - **ntv_type**: Boolean (default True) - if False use full_name else json_name
232        - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
233        - **dims**: list of string (default None) - order of dimensions full_name to apply
234        '''
235        return PandasConnec.xexport(self, **kwargs)
236
237    @staticmethod
238    def from_dataframe(dfr, **kwargs):
239        '''return a Xdataset from a pd.DataFrame
240
241        *Parameters*
242
243        - dims: list of string (default None) - order of dimensions to apply
244        '''
245        return PandasConnec.ximport(dfr, Xdataset, **kwargs)
246
247
248class Xdataset(XdatasetCategory, XdatasetInterface):
249    ''' Representation of a multidimensional Dataset
250
251    *Attributes :*
252    - **name** :  String - name of the Xdataset
253    - **xnd**:   list of Xndarray
254
255    *dynamic values (@property)*
256    - `xtype`
257    - `validity`
258    - `dic_xnd`
259    - `partition`
260    - `length`
261    - `info`
262
263    *methods*
264    - `parent`
265    - `dims`
266    - `shape_dims`
267    - `to_canonical`
268    - `to_ndarray`
269    - `to_darray`
270
271    *XdatasetCategory (@property)*
272    - `names`
273    - `data_arrays`
274    - `dimensions`
275    - `coordinates`
276    - `data_vars`
277    - `namedarrays`
278    - `variables`
279    - `undef_vars`
280    - `undef_links`
281    - `masks`
282    - `data_add`
283    - `meta`
284    - `metadata`
285    - `uniques`
286    - `additionals`
287    - `group`
288    - `add_group`
289
290    *XdatasetInterface methods *
291    - `read_json` (static)
292    - `to_json`
293    - `from_xarray` (static)
294    - `to_xarray`
295    - `from_scipp` (static)
296    - `to_scipp`
297    - `from_nddata` (static)
298    - `to_nddata`
299    - `from_dataframe` (static)
300    - `to_dataframe`
301    '''
302
303    def __init__(self, xnd=None, name=None):
304        '''Xdataset constructor
305
306            *Parameters*
307
308            - **xnd** : Xdataset/Xndarray/list of Xndarray (default None),
309            - **name** : String (default None) - name of the Xdataset
310        '''
311        self.name = name
312        match xnd:
313            case list():
314                self.xnd = xnd
315            case xdat if isinstance(xdat, Xdataset):
316                self.name = xdat.name
317                self.xnd = xdat.xnd
318            case xnda if isinstance(xnda, Xndarray):
319                self.xnd = [xnda]
320            case _:
321                self.xnd = []
322
323    def __repr__(self):
324        '''return classname and number of value'''
325        return (self.__class__.__name__ + '[' + str(len(self)) + ']\n' +
326                pprint.pformat(self.to_json(novalue=True, header=False, noshape=False)))
327
328    def __str__(self):
329        '''return json string format'''
330        return json.dumps(self.to_json())
331
332    def __eq__(self, other):
333        '''equal if xnd are equal'''
334        for xnda in self.xnd:
335            if xnda not in other:
336                return False
337        for xnda in other.xnd:
338            if xnda not in self:
339                return False
340        return True
341
342    def __len__(self):
343        '''number of Xndarray'''
344        return len(self.xnd)
345
346    def __contains__(self, item):
347        ''' item of xnd'''
348        return item in self.xnd
349
350    def __getitem__(self, selec):
351        ''' return Xndarray or tuple of Xndarray with selec:
352            - string : name of a xndarray,
353            - integer : index of a xndarray,
354            - index selector : index interval
355            - tuple : names or index '''
356        if selec is None or selec == '' or selec in ([], ()):
357            return self
358        if isinstance(selec, (list, tuple)) and len(selec) == 1:
359            selec = selec[0]
360        if isinstance(selec, tuple):
361            return [self[i] for i in selec]
362        if isinstance(selec, str):
363            return self.dic_xnd[selec]
364        if isinstance(selec, list):
365            return self[selec[0]][selec[1:]]
366        return self.xnd[selec]
367
368    def __delitem__(self, ind):
369        '''remove a Xndarray (ind is index, name or tuple of names).'''
370        if isinstance(ind, int):
371            del self.xnd[ind]
372        elif isinstance(ind, str):
373            del self.xnd[self.names.index(ind)]
374        elif isinstance(ind, tuple):
375            ind_n = [self.names[i] if isinstance(i, int) else i for i in ind]
376            for i in ind_n:
377                del self[i]
378
379    def __copy__(self):
380        ''' Copy all the data '''
381        return self.__class__(self)
382
383    def parent(self, var):
384        '''return the Xndarray parent (where the full_name is equal to the name)'''
385        if var.name in self.names:
386            return self[var.name]
387        return var
388
389    def dims(self, var, json_name=False):
390        '''return the list of parent namedarrays of the links of a Xndarray
391
392        *parameters*
393
394        - **var**: string - full_name of the Xndarray
395        - **json_name**: boolean (defaut False) - if True return json_name else full_name
396        '''
397        if var not in self.names:
398            return None
399        if self[var].add_name and not self[var].links:
400            return self.dims(self[var].name, json_name)
401        if var in self.namedarrays:
402            return [self[var].json_name if json_name else var]
403        if var not in self.variables + self.additionals:
404            return None
405        list_dims = []
406        for link in self[var].links:
407            list_dims += self.dims(link, json_name) if self.dims(link,
408                                                                 json_name) else [link]
409        return list_dims
410
411    def shape_dims(self, var):
412        '''return a shape with the dimensions associated to the var full_name'''
413        return [len(self[dim]) for dim in self.dims(var)
414                ] if set(self.dims(var)) <= set(self.names) else None
415
416    @property
417    def validity(self):
418        '''return the validity state: 'inconsistent', 'undifined' or 'valid' '''
419        for xnda in self:
420            if xnda.mode in ['relative', 'inconsistent']:
421                return 'undefined'
422        if self.undef_links or self.undef_vars:
423            return 'inconsistent'
424        return 'valid'
425
426    @property
427    def xtype(self):
428        '''return the Xdataset type: 'meta', 'group', 'mono', 'multi' '''
429        if self.metadata and not (self.additionals or self.variables or
430                                  self.namedarrays):
431            return 'meta'
432        if self.validity != 'valid':
433            return 'group'
434        match len(self.data_vars):
435            case 0:
436                return 'group'
437            case 1:
438                return 'mono'
439            case _:
440                return 'multi'
441
442    @property
443    def dic_xnd(self):
444        '''return a dict of Xndarray where key is the full_name'''
445        return {xnda.full_name: xnda for xnda in self.xnd}
446
447    @property
448    def length(self):
449        '''return the max length of Xndarray'''
450        return max(len(xnda) for xnda in self.xnd)
451
452    @property
453    def names(self):
454        '''return a tuple with the Xndarray full_name'''
455        return tuple(xnda.full_name for xnda in self.xnd)
456
457    @property
458    def partition(self):
459        '''return a dict of Xndarray grouped with category'''
460        dic = {}
461        dic |= {'data_vars': list(self.data_vars)} if self.data_vars else {}
462        dic |= {'data_arrays': list(self.data_arrays)
463                } if self.data_arrays else {}
464        dic |= {'dimensions': list(self.dimensions)} if self.dimensions else {}
465        dic |= {'coordinates': list(self.coordinates)
466                } if self.coordinates else {}
467        dic |= {'additionals': list(self.additionals)
468                } if self.additionals else {}
469        dic |= {'metadata': list(self.metadata)} if self.metadata else {}
470        dic |= {'uniques': list(self.uniques)} if self.uniques else {}
471        return dic
472
473    @property
474    def info(self):
475        '''return a dict with Xdataset information '''
476        inf = {'name': self.name, 'xtype': self.xtype} | self.partition
477        inf['validity'] = self.validity
478        inf['length'] = len(self[self.data_vars[0]]) if self.data_vars else 0
479        inf['width'] = len(self)
480        data = {name: {key: val for key, val in self[name].info.items() if key != 'name'}
481                for name in self.names}
482        return {'structure': {key: val for key, val in inf.items() if val},
483                'data': {key: val for key, val in data.items() if val}}
484
485    @property
486    def tab_info(self):
487        '''return a dict with Xdataset information for tabular interface'''
488        info = self.info
489        data = info['data']
490        t_info = {}
491        if 'dimensions' in info['structure']:
492            t_info['dimensions'] = info['structure']['dimensions']
493        t_info['data'] = {name: {key: val for key, val in data[name].items()
494                                 if key in ['shape', 'xtype', 'meta', 'links']}
495                          for name in data}
496        return t_info
497
498    def to_canonical(self):
499        '''remove optional links of the included Xndarray'''
500        for name in self.names:
501            if self[name].links in ([self[name].name], [name]):
502                self[name].links = None
503        for add in self.additionals:
504            if self[add].links in [self[self[add].name].links,
505                                   [self[add].name]]:
506                self[add].links = None
507        for unic in self.uniques:
508            self[unic].links = None
509        return self
510
511    def to_ndarray(self, full_name):
512        '''convert a Xndarray from a Xdataset in a np.ndarray'''
513        if self.shape_dims(full_name) is None:
514            data = self[full_name].ndarray
515        else:
516            data = self[full_name].darray.reshape(self.shape_dims(full_name))
517        if data.dtype.name[:8] == 'datetime':
518            data = data.astype('datetime64[ns]')
519        return data
520
521    def to_darray(self, full_name):
522        '''convert a Xndarray from a Xdataset in a flattened np.ndarray'''
523        data = self[full_name].darray
524        if data.dtype.name[:8] == 'datetime':
525            data = data.astype('datetime64[ns]')
526        return data
class XdatasetCategory(abc.ABC):
 26class XdatasetCategory(ABC):
 27    ''' category of Xndarray (dynamic tuple of full_name) - see Xdataset docstring'''
 28
 29    xnd: list = NotImplemented
 30    names: list = NotImplemented
 31
 32    @abstractmethod
 33    def dims(self, var, json_name=False):
 34        '''method defined in Xdataset class'''
 35
 36    @property
 37    def data_arrays(self):
 38        '''return a tuple of data_arrays Xndarray full_name'''
 39        return tuple(sorted(nda for nda in self.namedarrays
 40                            if nda not in self.dimensions + self.uniques))
 41
 42    @property
 43    def dimensions(self):
 44        '''return a tuple of dimensions Xndarray full_name'''
 45        dimable = []
 46        for var in self.variables:
 47            dimable += self.dims(var)
 48        return tuple(sorted(set(nda for nda in dimable if nda in self.namedarrays)))
 49
 50    @property
 51    def shape(self):
 52        '''return an array with the length of dimensions'''
 53        return [len(self[dim]) for dim in self.dimensions]
 54
 55    @property
 56    def coordinates(self):
 57        '''return a tuple of coordinates Xndarray full_name'''
 58        dims = set(self.dimensions)
 59        if not dims:
 60            return ()
 61        return tuple(sorted(set(xnda.name for xnda in self.xnd
 62                                if xnda.xtype == 'variable' and set(xnda.links) != dims)))
 63
 64    @property
 65    def data_vars(self):
 66        '''return a tuple of data_vars Xndarray full_name'''
 67        dims = set(self.dimensions)
 68        if not dims:
 69            return self.variables
 70        return tuple(sorted(xnda.name for xnda in self.xnd
 71                            if xnda.xtype == 'variable' and set(xnda.links) == dims))
 72
 73    @property
 74    def namedarrays(self):
 75        '''return a tuple of namedarray Xndarray full_name'''
 76        return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == 'namedarray'))
 77
 78    @property
 79    def variables(self):
 80        '''return a tuple of variables Xndarray full_name'''
 81        return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == 'variable'))
 82
 83    @property
 84    def undef_vars(self):
 85        '''return a tuple of variables Xndarray full_name with inconsistent shape'''
 86        return tuple(sorted(var for var in self.variables if self[var].shape !=
 87                            [len(self[dim]) for dim in self.dims(var)]))
 88
 89    @property
 90    def undef_links(self):
 91        '''return a tuple of variables Xndarray full_name with inconsistent links'''
 92        return tuple(sorted(link for var in self.variables for link in self[var].links
 93                            if link not in self.names))
 94
 95    @property
 96    def masks(self):
 97        '''return a tuple of additional Xndarray full_name with boolean ntv_type'''
 98        return tuple(sorted(xnda.full_name for xnda in self.xnd
 99                            if xnda.xtype == 'additional' and xnda.ntv_type == 'boolean'))
100
101    @property
102    def data_add(self):
103        '''return a tuple of additional Xndarray full_name with not boolean ntv_type'''
104        return tuple(sorted(xnda.full_name for xnda in self.xnd
105                            if xnda.xtype == 'additional' and xnda.ntv_type != 'boolean'))
106
107    @property
108    def metadata(self):
109        '''return a tuple of metadata Xndarray full_name'''
110        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == 'meta'))
111
112    @property
113    def uniques(self):
114        '''return a tuple of unique Xndarray full_name'''
115        return tuple(full_name for full_name in self.namedarrays if len(self[full_name]) == 1)
116
117    @property
118    def additionals(self):
119        '''return a tuple of additionals Xndarray full_name'''
120        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == 'additional'))
121
122    def group(self, grp):
123        '''return a tuple of Xndarray full_name with the same name'''
124        if isinstance(grp, str):
125            return tuple(sorted(xnda.full_name for xnda in self.xnd
126                                if grp in (xnda.name, xnda.full_name)))
127        return tuple(sorted(nam for gr_nam in grp for nam in self.group(gr_nam)))
128
129    def add_group(self, add_name):
130        '''return a tuple of Xndarray full_name with the same add_name'''
131        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.add_name == add_name))

category of Xndarray (dynamic tuple of full_name) - see Xdataset docstring

xnd: list = NotImplemented
names: list = NotImplemented
@abstractmethod
def dims(self, var, json_name=False):
32    @abstractmethod
33    def dims(self, var, json_name=False):
34        '''method defined in Xdataset class'''

method defined in Xdataset class

data_arrays
36    @property
37    def data_arrays(self):
38        '''return a tuple of data_arrays Xndarray full_name'''
39        return tuple(sorted(nda for nda in self.namedarrays
40                            if nda not in self.dimensions + self.uniques))

return a tuple of data_arrays Xndarray full_name

dimensions
42    @property
43    def dimensions(self):
44        '''return a tuple of dimensions Xndarray full_name'''
45        dimable = []
46        for var in self.variables:
47            dimable += self.dims(var)
48        return tuple(sorted(set(nda for nda in dimable if nda in self.namedarrays)))

return a tuple of dimensions Xndarray full_name

shape
50    @property
51    def shape(self):
52        '''return an array with the length of dimensions'''
53        return [len(self[dim]) for dim in self.dimensions]

return an array with the length of dimensions

coordinates
55    @property
56    def coordinates(self):
57        '''return a tuple of coordinates Xndarray full_name'''
58        dims = set(self.dimensions)
59        if not dims:
60            return ()
61        return tuple(sorted(set(xnda.name for xnda in self.xnd
62                                if xnda.xtype == 'variable' and set(xnda.links) != dims)))

return a tuple of coordinates Xndarray full_name

data_vars
64    @property
65    def data_vars(self):
66        '''return a tuple of data_vars Xndarray full_name'''
67        dims = set(self.dimensions)
68        if not dims:
69            return self.variables
70        return tuple(sorted(xnda.name for xnda in self.xnd
71                            if xnda.xtype == 'variable' and set(xnda.links) == dims))

return a tuple of data_vars Xndarray full_name

namedarrays
73    @property
74    def namedarrays(self):
75        '''return a tuple of namedarray Xndarray full_name'''
76        return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == 'namedarray'))

return a tuple of namedarray Xndarray full_name

variables
78    @property
79    def variables(self):
80        '''return a tuple of variables Xndarray full_name'''
81        return tuple(sorted(xnda.name for xnda in self.xnd if xnda.xtype == 'variable'))

return a tuple of variables Xndarray full_name

undef_vars
83    @property
84    def undef_vars(self):
85        '''return a tuple of variables Xndarray full_name with inconsistent shape'''
86        return tuple(sorted(var for var in self.variables if self[var].shape !=
87                            [len(self[dim]) for dim in self.dims(var)]))

return a tuple of variables Xndarray full_name with inconsistent shape

masks
95    @property
96    def masks(self):
97        '''return a tuple of additional Xndarray full_name with boolean ntv_type'''
98        return tuple(sorted(xnda.full_name for xnda in self.xnd
99                            if xnda.xtype == 'additional' and xnda.ntv_type == 'boolean'))

return a tuple of additional Xndarray full_name with boolean ntv_type

data_add
101    @property
102    def data_add(self):
103        '''return a tuple of additional Xndarray full_name with not boolean ntv_type'''
104        return tuple(sorted(xnda.full_name for xnda in self.xnd
105                            if xnda.xtype == 'additional' and xnda.ntv_type != 'boolean'))

return a tuple of additional Xndarray full_name with not boolean ntv_type

metadata
107    @property
108    def metadata(self):
109        '''return a tuple of metadata Xndarray full_name'''
110        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == 'meta'))

return a tuple of metadata Xndarray full_name

uniques
112    @property
113    def uniques(self):
114        '''return a tuple of unique Xndarray full_name'''
115        return tuple(full_name for full_name in self.namedarrays if len(self[full_name]) == 1)

return a tuple of unique Xndarray full_name

additionals
117    @property
118    def additionals(self):
119        '''return a tuple of additionals Xndarray full_name'''
120        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.xtype == 'additional'))

return a tuple of additionals Xndarray full_name

def group(self, grp):
122    def group(self, grp):
123        '''return a tuple of Xndarray full_name with the same name'''
124        if isinstance(grp, str):
125            return tuple(sorted(xnda.full_name for xnda in self.xnd
126                                if grp in (xnda.name, xnda.full_name)))
127        return tuple(sorted(nam for gr_nam in grp for nam in self.group(gr_nam)))

return a tuple of Xndarray full_name with the same name

def add_group(self, add_name):
129    def add_group(self, add_name):
130        '''return a tuple of Xndarray full_name with the same add_name'''
131        return tuple(sorted(xnda.full_name for xnda in self.xnd if xnda.add_name == add_name))

return a tuple of Xndarray full_name with the same add_name

class XdatasetInterface(abc.ABC):
134class XdatasetInterface(ABC):
135    ''' Xdataset interface - see Xdataset docstring'''
136
137    name: str = NotImplemented
138    xnd: list = NotImplemented
139
140    @staticmethod
141    def read_json(jsn, **kwargs):
142        ''' convert json data into a Xdataset.
143
144        *Parameters*
145
146        - **convert** : boolean (default True) - If True, convert json data with
147        non Numpy ntv_type into Xndarray with python type
148        '''
149        option = {'convert': True} | kwargs
150        jso = json.loads(jsn) if isinstance(jsn, str) else jsn
151        value, name = Ntv.decode_json(jso)[:2]
152
153        xnd = [Xndarray.read_json({key: val}, **option)
154               for key, val in value.items()]
155        return Xdataset(xnd, name)
156
157    def to_json(self, **kwargs):
158        ''' convert a Xdataset into json-value.
159
160        *Parameters*
161
162        - **encoded** : Boolean (default False) - json value if False else json text
163        - **header** : Boolean (default True) - including 'xdataset' type
164        - **notype** : list of Boolean (default list of None) - including data type if False
165        - **novalue** : Boolean (default False) - including value if False
166        - **noshape** : Boolean (default True) - if True, without shape if dim < 1
167        - **format** : list of string (default list of 'full') - representation
168        format of the ndarray,
169        '''
170        notype = kwargs['notype'] if ('notype' in kwargs and isinstance(kwargs['notype'], list) and
171                                      len(kwargs['notype']) == len(self)) else [False] * len(self)
172        forma = kwargs['format'] if ('format' in kwargs and isinstance(kwargs['format'], list) and
173                                     len(kwargs['format']) == len(self)) else ['full'] * len(self)
174        noshape = kwargs.get('noshape', True)
175        dic_xnd = {}
176        for xna, notyp, forma in zip(self.xnd, notype, forma):
177            dic_xnd |= xna.to_json(notype=notyp, novalue=kwargs.get('novalue', False),
178                                   noshape=noshape, format=forma, header=False)
179        return Nutil.json_ntv(self.name, 'xdataset', dic_xnd,
180                              header=kwargs.get('header', True),
181                              encoded=kwargs.get('encoded', False))
182
183    def to_xarray(self, **kwargs):
184        '''return a xr.DataArray or a xr.Dataset from a Xdataset
185
186        *Parameters*
187
188        - **dataset** : Boolean (default True) - if False and a single data_var,
189        return a xr.DataArray
190        - **info** : Boolean (default True) - if True, add json representation
191        of 'relative' Xndarrays and 'data_arrays' Xndarrays in attrs
192        '''
193        return XarrayConnec.xexport(self, **kwargs)
194
195    @staticmethod
196    def from_xarray(xar, **kwargs):
197        '''return a Xdataset from a DataArray or a Dataset'''
198        return XarrayConnec.ximport(xar, Xdataset, **kwargs)
199
200    def to_scipp(self, **kwargs):
201        '''return a sc.DataArray or a sc.Dataset from a Xdataset
202
203        *Parameters*
204
205        - **dataset** : Boolean (default True) - if False and a single data_var,
206        return a DataArray
207        - **info** : Boolean (default True) - if True return a DataGroup with
208        metadata and data_arrays
209        - **ntv_type** : Boolean (default True) - if True add ntv-type to the name
210        '''
211        return ScippConnec.xexport(self, **kwargs)
212
213    @staticmethod
214    def from_scipp(sci, **kwargs):
215        '''return a Xdataset from a scipp object DataArray, Dataset or DataGroup'''
216        return ScippConnec.ximport(sci, Xdataset, **kwargs)
217
218    def to_nddata(self, **kwargs):
219        '''return a NDData from a Xdataset'''
220        return AstropyNDDataConnec.xexport(self, **kwargs)
221
222    @staticmethod
223    def from_nddata(ndd, **kwargs):
224        '''return a Xdataset from a NDData'''
225        return AstropyNDDataConnec.ximport(ndd, Xdataset, **kwargs)
226
227    def to_dataframe(self, **kwargs):
228        '''return a pd.DataFrame from a Xdataset
229
230        *Parameters*
231
232        - **ntv_type**: Boolean (default True) - if False use full_name else json_name
233        - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
234        - **dims**: list of string (default None) - order of dimensions full_name to apply
235        '''
236        return PandasConnec.xexport(self, **kwargs)
237
238    @staticmethod
239    def from_dataframe(dfr, **kwargs):
240        '''return a Xdataset from a pd.DataFrame
241
242        *Parameters*
243
244        - dims: list of string (default None) - order of dimensions to apply
245        '''
246        return PandasConnec.ximport(dfr, Xdataset, **kwargs)

Xdataset interface - see Xdataset docstring

name: str = NotImplemented
xnd: list = NotImplemented
@staticmethod
def read_json(jsn, **kwargs):
140    @staticmethod
141    def read_json(jsn, **kwargs):
142        ''' convert json data into a Xdataset.
143
144        *Parameters*
145
146        - **convert** : boolean (default True) - If True, convert json data with
147        non Numpy ntv_type into Xndarray with python type
148        '''
149        option = {'convert': True} | kwargs
150        jso = json.loads(jsn) if isinstance(jsn, str) else jsn
151        value, name = Ntv.decode_json(jso)[:2]
152
153        xnd = [Xndarray.read_json({key: val}, **option)
154               for key, val in value.items()]
155        return Xdataset(xnd, name)

convert json data into a Xdataset.

Parameters

  • convert : boolean (default True) - If True, convert json data with non Numpy ntv_type into Xndarray with python type
def to_json(self, **kwargs):
157    def to_json(self, **kwargs):
158        ''' convert a Xdataset into json-value.
159
160        *Parameters*
161
162        - **encoded** : Boolean (default False) - json value if False else json text
163        - **header** : Boolean (default True) - including 'xdataset' type
164        - **notype** : list of Boolean (default list of None) - including data type if False
165        - **novalue** : Boolean (default False) - including value if False
166        - **noshape** : Boolean (default True) - if True, without shape if dim < 1
167        - **format** : list of string (default list of 'full') - representation
168        format of the ndarray,
169        '''
170        notype = kwargs['notype'] if ('notype' in kwargs and isinstance(kwargs['notype'], list) and
171                                      len(kwargs['notype']) == len(self)) else [False] * len(self)
172        forma = kwargs['format'] if ('format' in kwargs and isinstance(kwargs['format'], list) and
173                                     len(kwargs['format']) == len(self)) else ['full'] * len(self)
174        noshape = kwargs.get('noshape', True)
175        dic_xnd = {}
176        for xna, notyp, forma in zip(self.xnd, notype, forma):
177            dic_xnd |= xna.to_json(notype=notyp, novalue=kwargs.get('novalue', False),
178                                   noshape=noshape, format=forma, header=False)
179        return Nutil.json_ntv(self.name, 'xdataset', dic_xnd,
180                              header=kwargs.get('header', True),
181                              encoded=kwargs.get('encoded', False))

convert a Xdataset into json-value.

Parameters

  • encoded : Boolean (default False) - json value if False else json text
  • header : Boolean (default True) - including 'xdataset' type
  • notype : list of Boolean (default list of None) - including data type if False
  • novalue : Boolean (default False) - including value if False
  • noshape : Boolean (default True) - if True, without shape if dim < 1
  • format : list of string (default list of 'full') - representation format of the ndarray,
def to_xarray(self, **kwargs):
183    def to_xarray(self, **kwargs):
184        '''return a xr.DataArray or a xr.Dataset from a Xdataset
185
186        *Parameters*
187
188        - **dataset** : Boolean (default True) - if False and a single data_var,
189        return a xr.DataArray
190        - **info** : Boolean (default True) - if True, add json representation
191        of 'relative' Xndarrays and 'data_arrays' Xndarrays in attrs
192        '''
193        return XarrayConnec.xexport(self, **kwargs)

return a xr.DataArray or a xr.Dataset from a Xdataset

Parameters

  • dataset : Boolean (default True) - if False and a single data_var, return a xr.DataArray
  • info : Boolean (default True) - if True, add json representation of 'relative' Xndarrays and 'data_arrays' Xndarrays in attrs
@staticmethod
def from_xarray(xar, **kwargs):
195    @staticmethod
196    def from_xarray(xar, **kwargs):
197        '''return a Xdataset from a DataArray or a Dataset'''
198        return XarrayConnec.ximport(xar, Xdataset, **kwargs)

return a Xdataset from a DataArray or a Dataset

def to_scipp(self, **kwargs):
200    def to_scipp(self, **kwargs):
201        '''return a sc.DataArray or a sc.Dataset from a Xdataset
202
203        *Parameters*
204
205        - **dataset** : Boolean (default True) - if False and a single data_var,
206        return a DataArray
207        - **info** : Boolean (default True) - if True return a DataGroup with
208        metadata and data_arrays
209        - **ntv_type** : Boolean (default True) - if True add ntv-type to the name
210        '''
211        return ScippConnec.xexport(self, **kwargs)

return a sc.DataArray or a sc.Dataset from a Xdataset

Parameters

  • dataset : Boolean (default True) - if False and a single data_var, return a DataArray
  • info : Boolean (default True) - if True return a DataGroup with metadata and data_arrays
  • ntv_type : Boolean (default True) - if True add ntv-type to the name
@staticmethod
def from_scipp(sci, **kwargs):
213    @staticmethod
214    def from_scipp(sci, **kwargs):
215        '''return a Xdataset from a scipp object DataArray, Dataset or DataGroup'''
216        return ScippConnec.ximport(sci, Xdataset, **kwargs)

return a Xdataset from a scipp object DataArray, Dataset or DataGroup

def to_nddata(self, **kwargs):
218    def to_nddata(self, **kwargs):
219        '''return a NDData from a Xdataset'''
220        return AstropyNDDataConnec.xexport(self, **kwargs)

return a NDData from a Xdataset

@staticmethod
def from_nddata(ndd, **kwargs):
222    @staticmethod
223    def from_nddata(ndd, **kwargs):
224        '''return a Xdataset from a NDData'''
225        return AstropyNDDataConnec.ximport(ndd, Xdataset, **kwargs)

return a Xdataset from a NDData

def to_dataframe(self, **kwargs):
227    def to_dataframe(self, **kwargs):
228        '''return a pd.DataFrame from a Xdataset
229
230        *Parameters*
231
232        - **ntv_type**: Boolean (default True) - if False use full_name else json_name
233        - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
234        - **dims**: list of string (default None) - order of dimensions full_name to apply
235        '''
236        return PandasConnec.xexport(self, **kwargs)

return a pd.DataFrame from a Xdataset

Parameters

  • ntv_type: Boolean (default True) - if False use full_name else json_name
  • info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
  • dims: list of string (default None) - order of dimensions full_name to apply
@staticmethod
def from_dataframe(dfr, **kwargs):
238    @staticmethod
239    def from_dataframe(dfr, **kwargs):
240        '''return a Xdataset from a pd.DataFrame
241
242        *Parameters*
243
244        - dims: list of string (default None) - order of dimensions to apply
245        '''
246        return PandasConnec.ximport(dfr, Xdataset, **kwargs)

return a Xdataset from a pd.DataFrame

Parameters

  • dims: list of string (default None) - order of dimensions to apply
class Xdataset(XdatasetCategory, XdatasetInterface):
249class Xdataset(XdatasetCategory, XdatasetInterface):
250    ''' Representation of a multidimensional Dataset
251
252    *Attributes :*
253    - **name** :  String - name of the Xdataset
254    - **xnd**:   list of Xndarray
255
256    *dynamic values (@property)*
257    - `xtype`
258    - `validity`
259    - `dic_xnd`
260    - `partition`
261    - `length`
262    - `info`
263
264    *methods*
265    - `parent`
266    - `dims`
267    - `shape_dims`
268    - `to_canonical`
269    - `to_ndarray`
270    - `to_darray`
271
272    *XdatasetCategory (@property)*
273    - `names`
274    - `data_arrays`
275    - `dimensions`
276    - `coordinates`
277    - `data_vars`
278    - `namedarrays`
279    - `variables`
280    - `undef_vars`
281    - `undef_links`
282    - `masks`
283    - `data_add`
284    - `meta`
285    - `metadata`
286    - `uniques`
287    - `additionals`
288    - `group`
289    - `add_group`
290
291    *XdatasetInterface methods *
292    - `read_json` (static)
293    - `to_json`
294    - `from_xarray` (static)
295    - `to_xarray`
296    - `from_scipp` (static)
297    - `to_scipp`
298    - `from_nddata` (static)
299    - `to_nddata`
300    - `from_dataframe` (static)
301    - `to_dataframe`
302    '''
303
304    def __init__(self, xnd=None, name=None):
305        '''Xdataset constructor
306
307            *Parameters*
308
309            - **xnd** : Xdataset/Xndarray/list of Xndarray (default None),
310            - **name** : String (default None) - name of the Xdataset
311        '''
312        self.name = name
313        match xnd:
314            case list():
315                self.xnd = xnd
316            case xdat if isinstance(xdat, Xdataset):
317                self.name = xdat.name
318                self.xnd = xdat.xnd
319            case xnda if isinstance(xnda, Xndarray):
320                self.xnd = [xnda]
321            case _:
322                self.xnd = []
323
324    def __repr__(self):
325        '''return classname and number of value'''
326        return (self.__class__.__name__ + '[' + str(len(self)) + ']\n' +
327                pprint.pformat(self.to_json(novalue=True, header=False, noshape=False)))
328
329    def __str__(self):
330        '''return json string format'''
331        return json.dumps(self.to_json())
332
333    def __eq__(self, other):
334        '''equal if xnd are equal'''
335        for xnda in self.xnd:
336            if xnda not in other:
337                return False
338        for xnda in other.xnd:
339            if xnda not in self:
340                return False
341        return True
342
343    def __len__(self):
344        '''number of Xndarray'''
345        return len(self.xnd)
346
347    def __contains__(self, item):
348        ''' item of xnd'''
349        return item in self.xnd
350
351    def __getitem__(self, selec):
352        ''' return Xndarray or tuple of Xndarray with selec:
353            - string : name of a xndarray,
354            - integer : index of a xndarray,
355            - index selector : index interval
356            - tuple : names or index '''
357        if selec is None or selec == '' or selec in ([], ()):
358            return self
359        if isinstance(selec, (list, tuple)) and len(selec) == 1:
360            selec = selec[0]
361        if isinstance(selec, tuple):
362            return [self[i] for i in selec]
363        if isinstance(selec, str):
364            return self.dic_xnd[selec]
365        if isinstance(selec, list):
366            return self[selec[0]][selec[1:]]
367        return self.xnd[selec]
368
369    def __delitem__(self, ind):
370        '''remove a Xndarray (ind is index, name or tuple of names).'''
371        if isinstance(ind, int):
372            del self.xnd[ind]
373        elif isinstance(ind, str):
374            del self.xnd[self.names.index(ind)]
375        elif isinstance(ind, tuple):
376            ind_n = [self.names[i] if isinstance(i, int) else i for i in ind]
377            for i in ind_n:
378                del self[i]
379
380    def __copy__(self):
381        ''' Copy all the data '''
382        return self.__class__(self)
383
384    def parent(self, var):
385        '''return the Xndarray parent (where the full_name is equal to the name)'''
386        if var.name in self.names:
387            return self[var.name]
388        return var
389
390    def dims(self, var, json_name=False):
391        '''return the list of parent namedarrays of the links of a Xndarray
392
393        *parameters*
394
395        - **var**: string - full_name of the Xndarray
396        - **json_name**: boolean (defaut False) - if True return json_name else full_name
397        '''
398        if var not in self.names:
399            return None
400        if self[var].add_name and not self[var].links:
401            return self.dims(self[var].name, json_name)
402        if var in self.namedarrays:
403            return [self[var].json_name if json_name else var]
404        if var not in self.variables + self.additionals:
405            return None
406        list_dims = []
407        for link in self[var].links:
408            list_dims += self.dims(link, json_name) if self.dims(link,
409                                                                 json_name) else [link]
410        return list_dims
411
412    def shape_dims(self, var):
413        '''return a shape with the dimensions associated to the var full_name'''
414        return [len(self[dim]) for dim in self.dims(var)
415                ] if set(self.dims(var)) <= set(self.names) else None
416
417    @property
418    def validity(self):
419        '''return the validity state: 'inconsistent', 'undifined' or 'valid' '''
420        for xnda in self:
421            if xnda.mode in ['relative', 'inconsistent']:
422                return 'undefined'
423        if self.undef_links or self.undef_vars:
424            return 'inconsistent'
425        return 'valid'
426
427    @property
428    def xtype(self):
429        '''return the Xdataset type: 'meta', 'group', 'mono', 'multi' '''
430        if self.metadata and not (self.additionals or self.variables or
431                                  self.namedarrays):
432            return 'meta'
433        if self.validity != 'valid':
434            return 'group'
435        match len(self.data_vars):
436            case 0:
437                return 'group'
438            case 1:
439                return 'mono'
440            case _:
441                return 'multi'
442
443    @property
444    def dic_xnd(self):
445        '''return a dict of Xndarray where key is the full_name'''
446        return {xnda.full_name: xnda for xnda in self.xnd}
447
448    @property
449    def length(self):
450        '''return the max length of Xndarray'''
451        return max(len(xnda) for xnda in self.xnd)
452
453    @property
454    def names(self):
455        '''return a tuple with the Xndarray full_name'''
456        return tuple(xnda.full_name for xnda in self.xnd)
457
458    @property
459    def partition(self):
460        '''return a dict of Xndarray grouped with category'''
461        dic = {}
462        dic |= {'data_vars': list(self.data_vars)} if self.data_vars else {}
463        dic |= {'data_arrays': list(self.data_arrays)
464                } if self.data_arrays else {}
465        dic |= {'dimensions': list(self.dimensions)} if self.dimensions else {}
466        dic |= {'coordinates': list(self.coordinates)
467                } if self.coordinates else {}
468        dic |= {'additionals': list(self.additionals)
469                } if self.additionals else {}
470        dic |= {'metadata': list(self.metadata)} if self.metadata else {}
471        dic |= {'uniques': list(self.uniques)} if self.uniques else {}
472        return dic
473
474    @property
475    def info(self):
476        '''return a dict with Xdataset information '''
477        inf = {'name': self.name, 'xtype': self.xtype} | self.partition
478        inf['validity'] = self.validity
479        inf['length'] = len(self[self.data_vars[0]]) if self.data_vars else 0
480        inf['width'] = len(self)
481        data = {name: {key: val for key, val in self[name].info.items() if key != 'name'}
482                for name in self.names}
483        return {'structure': {key: val for key, val in inf.items() if val},
484                'data': {key: val for key, val in data.items() if val}}
485
486    @property
487    def tab_info(self):
488        '''return a dict with Xdataset information for tabular interface'''
489        info = self.info
490        data = info['data']
491        t_info = {}
492        if 'dimensions' in info['structure']:
493            t_info['dimensions'] = info['structure']['dimensions']
494        t_info['data'] = {name: {key: val for key, val in data[name].items()
495                                 if key in ['shape', 'xtype', 'meta', 'links']}
496                          for name in data}
497        return t_info
498
499    def to_canonical(self):
500        '''remove optional links of the included Xndarray'''
501        for name in self.names:
502            if self[name].links in ([self[name].name], [name]):
503                self[name].links = None
504        for add in self.additionals:
505            if self[add].links in [self[self[add].name].links,
506                                   [self[add].name]]:
507                self[add].links = None
508        for unic in self.uniques:
509            self[unic].links = None
510        return self
511
512    def to_ndarray(self, full_name):
513        '''convert a Xndarray from a Xdataset in a np.ndarray'''
514        if self.shape_dims(full_name) is None:
515            data = self[full_name].ndarray
516        else:
517            data = self[full_name].darray.reshape(self.shape_dims(full_name))
518        if data.dtype.name[:8] == 'datetime':
519            data = data.astype('datetime64[ns]')
520        return data
521
522    def to_darray(self, full_name):
523        '''convert a Xndarray from a Xdataset in a flattened np.ndarray'''
524        data = self[full_name].darray
525        if data.dtype.name[:8] == 'datetime':
526            data = data.astype('datetime64[ns]')
527        return data

Representation of a multidimensional Dataset

Attributes :

  • name : String - name of the Xdataset
  • xnd: list of Xndarray

dynamic values (@property)

methods

XdatasetCategory (@property)

*XdatasetInterface methods *

Xdataset(xnd=None, name=None)
304    def __init__(self, xnd=None, name=None):
305        '''Xdataset constructor
306
307            *Parameters*
308
309            - **xnd** : Xdataset/Xndarray/list of Xndarray (default None),
310            - **name** : String (default None) - name of the Xdataset
311        '''
312        self.name = name
313        match xnd:
314            case list():
315                self.xnd = xnd
316            case xdat if isinstance(xdat, Xdataset):
317                self.name = xdat.name
318                self.xnd = xdat.xnd
319            case xnda if isinstance(xnda, Xndarray):
320                self.xnd = [xnda]
321            case _:
322                self.xnd = []

Xdataset constructor

Parameters

  • xnd : Xdataset/Xndarray/list of Xndarray (default None),
  • name : String (default None) - name of the Xdataset
name = NotImplemented
def parent(self, var):
384    def parent(self, var):
385        '''return the Xndarray parent (where the full_name is equal to the name)'''
386        if var.name in self.names:
387            return self[var.name]
388        return var

return the Xndarray parent (where the full_name is equal to the name)

def dims(self, var, json_name=False):
390    def dims(self, var, json_name=False):
391        '''return the list of parent namedarrays of the links of a Xndarray
392
393        *parameters*
394
395        - **var**: string - full_name of the Xndarray
396        - **json_name**: boolean (defaut False) - if True return json_name else full_name
397        '''
398        if var not in self.names:
399            return None
400        if self[var].add_name and not self[var].links:
401            return self.dims(self[var].name, json_name)
402        if var in self.namedarrays:
403            return [self[var].json_name if json_name else var]
404        if var not in self.variables + self.additionals:
405            return None
406        list_dims = []
407        for link in self[var].links:
408            list_dims += self.dims(link, json_name) if self.dims(link,
409                                                                 json_name) else [link]
410        return list_dims

return the list of parent namedarrays of the links of a Xndarray

parameters

  • var: string - full_name of the Xndarray
  • json_name: boolean (defaut False) - if True return json_name else full_name
def shape_dims(self, var):
412    def shape_dims(self, var):
413        '''return a shape with the dimensions associated to the var full_name'''
414        return [len(self[dim]) for dim in self.dims(var)
415                ] if set(self.dims(var)) <= set(self.names) else None

return a shape with the dimensions associated to the var full_name

validity
417    @property
418    def validity(self):
419        '''return the validity state: 'inconsistent', 'undifined' or 'valid' '''
420        for xnda in self:
421            if xnda.mode in ['relative', 'inconsistent']:
422                return 'undefined'
423        if self.undef_links or self.undef_vars:
424            return 'inconsistent'
425        return 'valid'

return the validity state: 'inconsistent', 'undifined' or 'valid'

xtype
427    @property
428    def xtype(self):
429        '''return the Xdataset type: 'meta', 'group', 'mono', 'multi' '''
430        if self.metadata and not (self.additionals or self.variables or
431                                  self.namedarrays):
432            return 'meta'
433        if self.validity != 'valid':
434            return 'group'
435        match len(self.data_vars):
436            case 0:
437                return 'group'
438            case 1:
439                return 'mono'
440            case _:
441                return 'multi'

return the Xdataset type: 'meta', 'group', 'mono', 'multi'

dic_xnd
443    @property
444    def dic_xnd(self):
445        '''return a dict of Xndarray where key is the full_name'''
446        return {xnda.full_name: xnda for xnda in self.xnd}

return a dict of Xndarray where key is the full_name

length
448    @property
449    def length(self):
450        '''return the max length of Xndarray'''
451        return max(len(xnda) for xnda in self.xnd)

return the max length of Xndarray

names
453    @property
454    def names(self):
455        '''return a tuple with the Xndarray full_name'''
456        return tuple(xnda.full_name for xnda in self.xnd)

return a tuple with the Xndarray full_name

partition
458    @property
459    def partition(self):
460        '''return a dict of Xndarray grouped with category'''
461        dic = {}
462        dic |= {'data_vars': list(self.data_vars)} if self.data_vars else {}
463        dic |= {'data_arrays': list(self.data_arrays)
464                } if self.data_arrays else {}
465        dic |= {'dimensions': list(self.dimensions)} if self.dimensions else {}
466        dic |= {'coordinates': list(self.coordinates)
467                } if self.coordinates else {}
468        dic |= {'additionals': list(self.additionals)
469                } if self.additionals else {}
470        dic |= {'metadata': list(self.metadata)} if self.metadata else {}
471        dic |= {'uniques': list(self.uniques)} if self.uniques else {}
472        return dic

return a dict of Xndarray grouped with category

info
474    @property
475    def info(self):
476        '''return a dict with Xdataset information '''
477        inf = {'name': self.name, 'xtype': self.xtype} | self.partition
478        inf['validity'] = self.validity
479        inf['length'] = len(self[self.data_vars[0]]) if self.data_vars else 0
480        inf['width'] = len(self)
481        data = {name: {key: val for key, val in self[name].info.items() if key != 'name'}
482                for name in self.names}
483        return {'structure': {key: val for key, val in inf.items() if val},
484                'data': {key: val for key, val in data.items() if val}}

return a dict with Xdataset information

tab_info
486    @property
487    def tab_info(self):
488        '''return a dict with Xdataset information for tabular interface'''
489        info = self.info
490        data = info['data']
491        t_info = {}
492        if 'dimensions' in info['structure']:
493            t_info['dimensions'] = info['structure']['dimensions']
494        t_info['data'] = {name: {key: val for key, val in data[name].items()
495                                 if key in ['shape', 'xtype', 'meta', 'links']}
496                          for name in data}
497        return t_info

return a dict with Xdataset information for tabular interface

def to_canonical(self):
499    def to_canonical(self):
500        '''remove optional links of the included Xndarray'''
501        for name in self.names:
502            if self[name].links in ([self[name].name], [name]):
503                self[name].links = None
504        for add in self.additionals:
505            if self[add].links in [self[self[add].name].links,
506                                   [self[add].name]]:
507                self[add].links = None
508        for unic in self.uniques:
509            self[unic].links = None
510        return self

remove optional links of the included Xndarray

def to_ndarray(self, full_name):
512    def to_ndarray(self, full_name):
513        '''convert a Xndarray from a Xdataset in a np.ndarray'''
514        if self.shape_dims(full_name) is None:
515            data = self[full_name].ndarray
516        else:
517            data = self[full_name].darray.reshape(self.shape_dims(full_name))
518        if data.dtype.name[:8] == 'datetime':
519            data = data.astype('datetime64[ns]')
520        return data

convert a Xndarray from a Xdataset in a np.ndarray

def to_darray(self, full_name):
522    def to_darray(self, full_name):
523        '''convert a Xndarray from a Xdataset in a flattened np.ndarray'''
524        data = self[full_name].darray
525        if data.dtype.name[:8] == 'datetime':
526            data = data.astype('datetime64[ns]')
527        return data

convert a Xndarray from a Xdataset in a flattened np.ndarray