tab-dataset.tab_dataset.dataset_interface

The dataset_interface module is part of the tab-dataset package.

It contains the classes DatasetInterface for Dataset entities.

For more information, see the user guide or the github repository.

  1# -*- coding: utf-8 -*-
  2"""
  3The `dataset_interface` module is part of the `tab-dataset` package.
  4
  5It contains the classes `DatasetInterface` for Dataset entities.
  6
  7For more information, see the 
  8[user guide](https://loco-philippe.github.io/tab-dataset/docs/user_guide.html) 
  9or the [github repository](https://github.com/loco-philippe/tab-dataset).
 10"""
 11
 12import csv
 13import math
 14import json
 15import xarray
 16import numpy as np
 17import matplotlib.pyplot as plt
 18from tabulate import tabulate
 19
 20from json_ntv.ntv import NtvList, NtvJsonEncoder
 21from tab_dataset.cfield import Cutil
 22from tab_dataset.cdataset import DatasetError
 23
 24
 25class DatasetInterface:
 26    '''this class includes Dataset methods :
 27
 28    - `DatasetInterface.json`
 29    - `DatasetInterface.plot`
 30    - `DatasetInterface.to_ntv`
 31    - `DatasetInterface.to_csv`
 32    - `DatasetInterface.to_file`
 33    - `DatasetInterface.to_xarray`
 34    - `DatasetInterface.to_dataframe`
 35    - `DatasetInterface.view`
 36    - `DatasetInterface.vlist`
 37    - `DatasetInterface.voxel`
 38    '''
 39
 40    def json(self, **kwargs):
 41        '''
 42        Return json dict, json string or Cbor binary.
 43
 44        *Parameters (kwargs)*
 45
 46        - **encoded** : boolean (default False) - choice for return format
 47        (string/bytes if True, dict else)
 48        - **format**  : string (default 'json')- choice for return format (json, cbor)
 49        - **codif** : dict (default ES.codeb). Numerical value for string in CBOR encoder
 50        - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec
 51        if 'default' each index has keys, if 'optimize' keys are optimized,
 52        if 'dict' dict format is used, if 'nokeys' keys are absent
 53        - **name** : boolean (default False) - if False, default index name are not included
 54        - **geojson** : boolean (default False) - geojson for LocationValue if True
 55
 56        *Returns* : string or dict'''
 57        return self.to_obj(**kwargs)
 58
 59    def plot(self, varname=None, idxname=None, order=None, line=True, size=5,
 60             marker='o', maxlen=20):
 61        '''
 62        This function visualize data with line or colormesh.
 63
 64        *Parameters*
 65
 66        - **varname** : string (default none) - Name of the variable to use. If None,
 67        first lvarname is used.
 68        - **line** : Boolean (default True) - Choice line or colormesh.
 69        - **order** : list (defaut None) - order of the axes (x, y, hue or col)
 70        - **size** : int (defaut 5) - plot size
 71        - **marker** : Char (default 'o') - Symbol for each point.
 72        - **maxlen** : Integer (default 20) - maximum length for string
 73
 74        *Returns*
 75
 76        - **None**  '''
 77        if not self.consistent:
 78            return None
 79        if idxname:
 80            idxname = [name for name in idxname if len(
 81                self.nindex(name).codec) > 1]
 82        #xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=[util.cast],##
 83        xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=None,
 84                             dtype='str', npdtype='str', maxlen=maxlen, coord=True)
 85        if not order:
 86            order = [0, 1, 2]
 87
 88        if len(xar.dims) == 1:
 89            xar.plot.line(x=xar.dims[0]+'_row', size=size, marker=marker)
 90        elif len(xar.dims) == 2 and line:
 91            xar.plot.line(x=xar.dims[order[0]] + '_row',
 92                          xticks=list(xar.coords[xar.dims[0]+'_row'].values),
 93                          hue=xar.dims[order[1]], size=size, marker=marker)
 94        elif len(xar.dims) == 2 and not line:
 95            xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row',
 96                     xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values),
 97                     yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values),
 98                     size=size)
 99        elif len(xar.dims) == 3 and line:
100            xar.plot.line(x=xar.dims[order[0]] + '_row', col=xar.dims[order[1]],
101                          xticks=list(
102                xar.coords[xar.dims[order[0]]+'_row'].values),
103                hue=xar.dims[order[2]], col_wrap=2, size=size, marker=marker)
104        elif len(xar.dims) == 3 and not line:
105            xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row',
106                     xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values),
107                     yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values),
108                     col=xar.dims[order[2]], col_wrap=2, size=size)
109        plt.show()
110        return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) for i in range(len(xar.dims))}
111
112    def to_csv(self, filename, optcsv={'quoting': csv.QUOTE_NONNUMERIC}, **kwargs):
113        '''
114        Generate csv file to display data.
115
116        *Parameters*
117
118        - **filename** : string - file name (with path)
119        - **optcsv** : parameter for csv.writer
120
121        *Parameters (kwargs)*
122
123        - **name=listcode** : element (default None) - eg location='ns'
124            - listcode : string with Code for each index (j: json, n: name, s: simple).
125            - name : name of the index
126        - **lenres** : Integer (default : 0) - Number of raws (all if 0)
127        - **header** : Boolean (default : True) - If True, first line with names
128        - **optcsv** : parameter for csv.writer
129        - **ifunc** : function (default None) - function to apply to indexes
130        - **other kwargs** : parameter for ifunc
131
132        *Returns* : size of csv file '''
133        size = 0
134        if not optcsv:
135            optcsv = {}
136        tab = self._to_tab(**kwargs)
137        with open(filename, 'w', newline='', encoding="utf-8") as csvfile:
138            writer = csv.writer(csvfile, **optcsv)
139            for lign in tab:
140                size += writer.writerow(lign)
141        return size
142
143    def to_dataframe(self, info=False, idx=None, fillvalue='?', fillextern=True,
144                     lisfunc=None, name=None, numeric=False, npdtype=None, **kwargs):
145        '''
146        Complete the Object and generate a Pandas DataFrame with the dimension define by idx.
147
148        *Parameters*
149
150        - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray
151        - **idx** : list (default none) - list of idx to be completed. If [],
152        self.primary is used.
153        - **fillvalue** : object (default '?') - value used for the new extval
154        - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value
155        - **lisfunc** : function (default none) - list of function to apply to indexes before export
156        - **name** : string (default None) - DataArray name. If None, variable name
157        - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values.
158        - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None)
159        - **kwargs** : parameter for lisfunc
160
161        *Returns* : pandas.DataFrame '''
162        if self.consistent:
163            return self.to_xarray(info=info, idx=idx, fillvalue=fillvalue,
164                                  fillextern=fillextern, lisfunc=lisfunc, name=name,
165                                  numeric=numeric, npdtype=npdtype, **kwargs
166                                  ).to_dataframe(name=name)
167        return None
168
169    def to_file(self, filename, **kwargs):
170        '''Generate file to display data.
171
172         *Parameters (kwargs)*
173
174        - **filename** : string - file name (with path)
175        - **kwargs** : see 'to_ntv' parameters
176
177        *Returns* : Integer - file lenght (bytes)  '''
178        option = {'format': 'cbor', 'modecodec': 'optimize'} | kwargs | {
179            'encoded': True}
180        data = self.to_ntv(modecodec=option['modecodec']).to_obj(**option)
181        if option['format'] == 'cbor':
182            size = len(data)
183            with open(filename, 'wb') as file:
184                file.write(data)
185        else:
186            size = len(bytes(data, 'UTF-8'))
187            with open(filename, 'w', newline='', encoding="utf-8") as file:
188                file.write(data)
189        return size
190
191    def to_ntv(self, modecodec='optimize', def_type='json', name=False):
192        '''Return a Ntv tab value (whithout name) .
193
194        *Parameters (kwargs)*
195
196        - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec
197        if 'default' each index has keys, if 'optimize' keys are optimized,
198        if 'dict' dict format is used, if 'nokeys' keys are absent
199        - **def_type** : string (default 'json') - default ntv_type for NtvList or NtvSet
200        - **name** : boolean (default False) - if False, default index name are not included
201
202
203        *Returns* : Ntv object'''
204        idxname = [name or iname != 'i' + str(i)
205                   for i, iname in enumerate(self.lname)]
206        if modecodec != 'optimize':
207            lis = [index.to_ntv(modecodec=modecodec, name=iname)
208                   for index, iname in zip(self.lindex, idxname)]
209        else:
210            lis = []
211            anafields = self.anafields
212            for idx, iname, anafld in zip(self.lindex, idxname, anafields):
213                coef = Cutil.encode_coef(idx.keys)
214                parent = anafld.p_derived.view('index')
215                if anafld.category == 'unique':
216                    lis.append(idx.to_ntv(name=iname))
217                elif anafld.category == 'coupled':
218                    idx_coup = idx.setkeys(
219                        self.lindex[parent].keys, inplace=False)
220                    lis.append(idx_coup.to_ntv(parent=parent, name=iname))
221                elif coef:
222                    lis.append(idx.to_ntv(keys=[coef], name=iname))
223                elif parent == -1:  # cat='variable' or 'secondary'
224                    if idx.keys == list(range(len(self))):
225                        lis.append(idx.to_ntv(modecodec='full', name=iname))
226                    else:
227                        lis.append(idx.to_ntv(modecodec='default', name=iname))
228                else:  # derived
229                    if len(self.lindex[parent].codec) == len(self):
230                        lis.append(idx.to_ntv(modecodec='default', name=iname))
231                    else:  # derived
232                        keys = idx.derkeys(self.lindex[parent])
233                        lis.append(idx.to_ntv(
234                            keys=keys, parent=parent, name=iname))
235        return NtvList(lis, self.name, ntv_type=def_type)
236
237    def to_xarray(self, info=False, idxname=None, varname=None, fillvalue='?',
238                  fillextern=True, lisfunc=None, name=None, numeric=False,
239                  npdtype=None, attrs=None, coord=False, **kwargs):
240        '''
241        Complete the Object and generate a Xarray DataArray with the dimension define by idx.
242        Only the first variable is incuded.
243
244        *Parameters*
245
246        - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray
247        - **idxname** : list (default none) - list of choosen primary fields. If None,
248        self.primary is used.
249        - **varname** : string (default none) - Name of the variable to use. If None,
250        first lvarname is used.
251        - **fillvalue** : object (default '?') - value used for the new extval
252        - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value
253        - **lisfunc** : function (default none) - list of function to apply to indexes before export
254        - **name** : string (default None) - DataArray name. If None, variable name
255        - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values.
256        - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None)
257        - **attrs** : dict (default None) - attributes for the DataArray
258        - **coord** : boolean (default False) - if True, add derivated coords
259        - **kwargs** : parameter for lisfunc
260
261        *Returns* : DataArray '''
262        option = {'dtype': None} | kwargs
263        if not self.consistent:
264            raise DatasetError("Dataset not consistent")
265        if idxname is None or idxname == []:
266            idxname = self.primaryname
267        ilf = self.full(idxname=idxname, varname=varname, fillvalue=fillvalue,
268                        fillextern=fillextern, inplace=False)
269        ilf.setcanonorder()
270        if not varname and len(ilf.lvarname) != 0:
271            varname = ilf.lvarname[0]
272        if not varname in ilf.lname:
273            ivar = -1
274        else:
275            ivar = ilf.lname.index(varname)
276        if isinstance(lisfunc, list) and len(lisfunc) == 1:
277            lisfunc = lisfunc * ilf.lenindex
278        elif isinstance(lisfunc, list) and len(lisfunc) != ilf.lenindex:
279            lisfunc = [None] * ilf.lenindex
280        elif not isinstance(lisfunc, list):
281            funcvar = lisfunc
282            lisfunc = [None] * ilf.lenindex
283            if ivar != -1:
284                lisfunc[ivar] = funcvar
285        lisfuncname = dict(zip(ilf.lname, lisfunc))
286        coords = ilf._xcoord(idxname, ivar, lisfuncname, coord, **option)
287        dims = idxname
288        if numeric:
289            #lisfunc[ivar] = util.cast
290            fillvalue = math.nan
291            npdtype = 'float'
292            option['dtype'] = 'float'
293        if ivar == -1:
294            data = self.field(list(range(len(ilf)))).to_numpy(npdtype='int')\
295                .reshape([len(ilf.nindex(name).codec) for name in idxname])
296        else:
297            data = ilf.lindex[ivar]\
298                .to_numpy(func=lisfunc[ivar], npdtype=npdtype, **option)\
299                .reshape([len(ilf.nindex(name).codec) for name in idxname])
300        if not name and ivar == -1:
301            name = ilf.name
302        elif not name:
303            name = ilf.lname[ivar]
304        if not isinstance(attrs, dict):
305            attrs = {}
306        for nam in ilf.lunicname:
307            attrs[nam] = ilf.nindex(nam).codec[0]
308        if info:
309            attrs |= ilf.indexinfos()
310        #print(data, coords, dims, attrs, name)
311        return xarray.DataArray(data, coords, dims, attrs=attrs, name=name)
312
313    def voxel(self, idxname=None, varname=None):
314        '''
315        Plot not null values in a cube with voxels and return indexes values.
316
317        *Parameters*
318
319        - **idxname** : list (default none) - list of idx to be completed. If None,
320        self.primary is used.
321        - **varname** : string (default none) - Name of the variable to use. If None,
322        first lvarname is used.
323
324        *Returns* : **dict of indexes values**
325        '''
326        if not self.consistent:
327            return None
328        if idxname is None or idxname == []:
329            idxname = self.primaryname
330        if varname is None and self.lvarname:
331            varname = self.lvarname[0]
332        if len(idxname) > 3:
333            raise DatasetError('number of idx > 3')
334        if len(idxname) == 2:
335            self.addindex(self.field('null', ' ', keys=[0]*len(self)))
336            idxname += [' ']
337        elif len(idxname) == 1:
338            self.addindex(self.field('null', ' ', keys=[0]*len(self)))
339            self.addindex(self.field('null', '  ', keys=[0]*len(self)))
340            idxname += [' ', '  ']
341        xar = self.to_xarray(idxname=idxname, varname=varname, fillvalue='?',
342                             fillextern=False, lisfunc=Cutil.is_not_equal, tovalue='?')
343        axe = plt.figure().add_subplot(projection='3d')
344        axe.voxels(xar, edgecolor='k')
345        axe.set_xticks(np.arange(self.idxlen[self.idxname.index(xar.dims[0])]))
346        axe.set_yticks(np.arange(self.idxlen[self.idxname.index(xar.dims[1])]))
347        axe.set_zticks(np.arange(self.idxlen[self.idxname.index(xar.dims[2])]))
348        axe.set(xlabel=xar.dims[0][:8],
349                ylabel=xar.dims[1][:8],
350                zlabel=xar.dims[2][:8])
351        plt.show()
352        self.delindex([' ', '  '])
353        return {xar.dims[i]: list(xar.coords[xar.dims[i]].values)
354                for i in range(len(xar.dims))}
355
356    def view(self, **kwargs):
357        '''
358        Generate tabular list to display data.
359
360        *Parameters (kwargs)*
361
362        - **name=listcode** : element (default None) - eg location='ns'
363            - listcode : string with Code for each index (j: json, n: name, s: simple).
364            - name : name of the index
365        - **defcode** : String (default : 'j') - default list code (if 'all' is True)
366        - **all** : Boolean (default : True) - 'defcode apply to all indexes or none
367        - **lenres** : Integer (default : 0) - Number of raws (all if 0)
368        - **header** : Boolean (default : True) - First line with names
369        - **width** : Integer (default None) - Number of characters displayed for each
370        attribute (all if None)
371        - **ifunc** : function (default None) - function to apply to indexes
372        - **tabulate params** : default 'tablefmt': 'simple', 'numalign': 'left',
373        'stralign': 'left', 'floatfmt': '.3f' - See tabulate module
374        - **other kwargs** : parameter for ifunc
375
376        *Returns* : list or html table (tabulate format) '''
377        opttab = {'defcode': 'j', 'all': True, 'lenres': 0, 'header': True}
378        optview = {'tablefmt': 'simple', 'numalign': 'decimal',
379                   'stralign': 'left', 'floatfmt': '.2f'}
380        option = opttab | optview | kwargs
381        tab = self._to_tab(**option)
382        width = ({'width': None} | kwargs)['width']
383        if width:
384            #tab = [[(lambda x: x[:width] if isinstance(x, str) else x)(val)
385            tab = [[val[:width] if isinstance(val, str) else val
386                    for val in lig] for lig in tab]
387        return tabulate(tab, headers='firstrow', **{k: option[k] for k in optview})
388
389    def vlist(self, *args, func=None, index=-1, **kwargs):
390        '''
391        Apply a function to an index and return the result.
392
393        *Parameters*
394
395        - **func** : function (default none) - function to apply to extval or extidx
396        - **args, kwargs** : parameters for the function
397        - **index** : integer - index to update (index=-1 for first variable)
398
399        *Returns* : list of func result'''
400        if index == -1 and self.lvar:
401            return self.lvar[0].vlist(func, *args, **kwargs)
402        if index == -1 and self.lenindex == 1:
403            index = 0
404        return self.lindex[index].vlist(func, *args, **kwargs)
405
406    # %%internal
407
408    def _to_tab(self, **kwargs):
409        ''' data preparation (dict of dict) for view or csv export.
410        Representation is included if :
411            - code is definie in the name element of the field
412            - or code is defined in 'defcode' element and 'all' element is True
413
414        *Parameters (kwargs)*
415
416        - **name=listcode** : element (default None) - eg location='ns'
417            - listcode : string with Code for each index (j: json, n: name, s: simple, f: ifunc).
418            - name : name of the index
419        - **defcode** : String (default : 'j') - default list code (if 'all' is True)
420        - **all** : Boolean (default : True) - 'defcode apply to all indexes or none
421        - **lenres** : Integer (default : 0) - Number of raws (all if 0)
422        - **ifunc** : function (default None) - function to apply to indexes
423        - **other kwargs** : parameter for ifunc'''
424
425        option = {'defcode': 'j', 'all': True, 'lenres': 0, 'ifunc': None,
426                  'header': True} | kwargs
427        tab = []
428        reslist = []
429        diccode = {'j': '', 'n': 'name-', 's': 'smpl-', 'f': 'func-'}
430        if option['header']:
431            for name in self.lname:
432                opt = name if name in option else 'defcode'
433                if opt != 'defcode' or option['all']:
434                    for char, code in diccode.items():
435                        if char in option[opt]:
436                            reslist.append(code + name)
437            tab.append(reslist)
438        lenres = option['lenres']
439        if lenres == 0:
440            lenres = len(self)
441        for i in range(min(lenres, len(self))):
442            reslist = []
443            for name in self.lname:
444                opt = name if name in option else 'defcode'
445                if opt != 'defcode' or option['all']:
446                    for char, code in diccode.items():
447                        if char in option[opt]:
448                            val = self.nindex(name).values[i]
449                            if char == 'j':
450                                #reslist.append(util.cast(val, dtype='json'))
451                                reslist.append(json.dumps(
452                                    self.field.s_to_e(val), cls=NtvJsonEncoder))
453                            elif char == 'n':
454                                reslist.append(self.field.i_to_name(val))
455                            elif char == 's':
456                                reslist.append(json.dumps(
457                                    self.field.s_to_e(val), cls=NtvJsonEncoder))
458                            elif char == 'f':
459                                reslist.append(Cutil.funclist(
460                                    val, option['ifunc'], **kwargs))
461            tab.append(reslist)
462        return tab
463
464    def _xcoord(self, axename, ivar, lisfuncname=None, coord=False, **kwargs):
465        ''' Coords generation for Xarray'''
466        #maxlen = kwargs.get('maxlen', 20)
467        #info = self.indexinfos()
468        dic_part = self.field_partition(axename)
469        coords = {}
470        ana = self.analysis
471        for i in range(self.lenindex):
472            #fieldi = info[i]
473            iname = self.lname[i]
474            # if fieldi['pparent'] == -1 or i == ivar:
475            if i in dic_part['variable'] or i in dic_part['unique'] or i == ivar:
476                continue
477            if isinstance(lisfuncname, dict) and len(lisfuncname) == self.lenindex:
478                funci = lisfuncname[iname]
479            else:
480                funci = None
481            if iname in axename:
482                coords[iname] = self.lindex[i].to_numpy(
483                    func=funci, codec=True, **kwargs)
484                if coord:
485                    coords[iname+'_row'] = (iname,
486                                            np.arange(len(coords[iname])))
487                    coords[iname+'_str'] = (iname,
488                                            self.lindex[i].to_numpy(func=str, codec=True))
489            else:
490                #ascendants = self.analysis.fields[i].ascendants('derived', 'index') # !!!!!!
491                #p_prim = [ind for ind in ascendants if self.lname[ind] in axename][0]
492                #p_prim = self.analysis.fields[i].ascendants('derived', 'index')[-1]
493                #self.lindex[i].setkeys(self.lindex[p_prim].keys)  # !!!
494                #coords[iname] = (self.lname[p_prim],
495                #                 self.lindex[i].to_numpy(func=funci, codec=True, **kwargs))
496                f_prim = [self.nindex(name) for name in axename if
497                  ana.get_relation(i, name).typecoupl in ['derived', 'coupled']][0]
498                self.lindex[i].setkeys(f_prim.keys)  # !!!
499                coords[iname] = (f_prim.name, self.lindex[i].to_numpy(
500                                            func=funci, codec=True, **kwargs))
501        return coords
class DatasetInterface:
 26class DatasetInterface:
 27    '''this class includes Dataset methods :
 28
 29    - `DatasetInterface.json`
 30    - `DatasetInterface.plot`
 31    - `DatasetInterface.to_ntv`
 32    - `DatasetInterface.to_csv`
 33    - `DatasetInterface.to_file`
 34    - `DatasetInterface.to_xarray`
 35    - `DatasetInterface.to_dataframe`
 36    - `DatasetInterface.view`
 37    - `DatasetInterface.vlist`
 38    - `DatasetInterface.voxel`
 39    '''
 40
 41    def json(self, **kwargs):
 42        '''
 43        Return json dict, json string or Cbor binary.
 44
 45        *Parameters (kwargs)*
 46
 47        - **encoded** : boolean (default False) - choice for return format
 48        (string/bytes if True, dict else)
 49        - **format**  : string (default 'json')- choice for return format (json, cbor)
 50        - **codif** : dict (default ES.codeb). Numerical value for string in CBOR encoder
 51        - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec
 52        if 'default' each index has keys, if 'optimize' keys are optimized,
 53        if 'dict' dict format is used, if 'nokeys' keys are absent
 54        - **name** : boolean (default False) - if False, default index name are not included
 55        - **geojson** : boolean (default False) - geojson for LocationValue if True
 56
 57        *Returns* : string or dict'''
 58        return self.to_obj(**kwargs)
 59
 60    def plot(self, varname=None, idxname=None, order=None, line=True, size=5,
 61             marker='o', maxlen=20):
 62        '''
 63        This function visualize data with line or colormesh.
 64
 65        *Parameters*
 66
 67        - **varname** : string (default none) - Name of the variable to use. If None,
 68        first lvarname is used.
 69        - **line** : Boolean (default True) - Choice line or colormesh.
 70        - **order** : list (defaut None) - order of the axes (x, y, hue or col)
 71        - **size** : int (defaut 5) - plot size
 72        - **marker** : Char (default 'o') - Symbol for each point.
 73        - **maxlen** : Integer (default 20) - maximum length for string
 74
 75        *Returns*
 76
 77        - **None**  '''
 78        if not self.consistent:
 79            return None
 80        if idxname:
 81            idxname = [name for name in idxname if len(
 82                self.nindex(name).codec) > 1]
 83        #xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=[util.cast],##
 84        xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=None,
 85                             dtype='str', npdtype='str', maxlen=maxlen, coord=True)
 86        if not order:
 87            order = [0, 1, 2]
 88
 89        if len(xar.dims) == 1:
 90            xar.plot.line(x=xar.dims[0]+'_row', size=size, marker=marker)
 91        elif len(xar.dims) == 2 and line:
 92            xar.plot.line(x=xar.dims[order[0]] + '_row',
 93                          xticks=list(xar.coords[xar.dims[0]+'_row'].values),
 94                          hue=xar.dims[order[1]], size=size, marker=marker)
 95        elif len(xar.dims) == 2 and not line:
 96            xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row',
 97                     xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values),
 98                     yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values),
 99                     size=size)
100        elif len(xar.dims) == 3 and line:
101            xar.plot.line(x=xar.dims[order[0]] + '_row', col=xar.dims[order[1]],
102                          xticks=list(
103                xar.coords[xar.dims[order[0]]+'_row'].values),
104                hue=xar.dims[order[2]], col_wrap=2, size=size, marker=marker)
105        elif len(xar.dims) == 3 and not line:
106            xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row',
107                     xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values),
108                     yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values),
109                     col=xar.dims[order[2]], col_wrap=2, size=size)
110        plt.show()
111        return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) for i in range(len(xar.dims))}
112
113    def to_csv(self, filename, optcsv={'quoting': csv.QUOTE_NONNUMERIC}, **kwargs):
114        '''
115        Generate csv file to display data.
116
117        *Parameters*
118
119        - **filename** : string - file name (with path)
120        - **optcsv** : parameter for csv.writer
121
122        *Parameters (kwargs)*
123
124        - **name=listcode** : element (default None) - eg location='ns'
125            - listcode : string with Code for each index (j: json, n: name, s: simple).
126            - name : name of the index
127        - **lenres** : Integer (default : 0) - Number of raws (all if 0)
128        - **header** : Boolean (default : True) - If True, first line with names
129        - **optcsv** : parameter for csv.writer
130        - **ifunc** : function (default None) - function to apply to indexes
131        - **other kwargs** : parameter for ifunc
132
133        *Returns* : size of csv file '''
134        size = 0
135        if not optcsv:
136            optcsv = {}
137        tab = self._to_tab(**kwargs)
138        with open(filename, 'w', newline='', encoding="utf-8") as csvfile:
139            writer = csv.writer(csvfile, **optcsv)
140            for lign in tab:
141                size += writer.writerow(lign)
142        return size
143
144    def to_dataframe(self, info=False, idx=None, fillvalue='?', fillextern=True,
145                     lisfunc=None, name=None, numeric=False, npdtype=None, **kwargs):
146        '''
147        Complete the Object and generate a Pandas DataFrame with the dimension define by idx.
148
149        *Parameters*
150
151        - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray
152        - **idx** : list (default none) - list of idx to be completed. If [],
153        self.primary is used.
154        - **fillvalue** : object (default '?') - value used for the new extval
155        - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value
156        - **lisfunc** : function (default none) - list of function to apply to indexes before export
157        - **name** : string (default None) - DataArray name. If None, variable name
158        - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values.
159        - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None)
160        - **kwargs** : parameter for lisfunc
161
162        *Returns* : pandas.DataFrame '''
163        if self.consistent:
164            return self.to_xarray(info=info, idx=idx, fillvalue=fillvalue,
165                                  fillextern=fillextern, lisfunc=lisfunc, name=name,
166                                  numeric=numeric, npdtype=npdtype, **kwargs
167                                  ).to_dataframe(name=name)
168        return None
169
170    def to_file(self, filename, **kwargs):
171        '''Generate file to display data.
172
173         *Parameters (kwargs)*
174
175        - **filename** : string - file name (with path)
176        - **kwargs** : see 'to_ntv' parameters
177
178        *Returns* : Integer - file lenght (bytes)  '''
179        option = {'format': 'cbor', 'modecodec': 'optimize'} | kwargs | {
180            'encoded': True}
181        data = self.to_ntv(modecodec=option['modecodec']).to_obj(**option)
182        if option['format'] == 'cbor':
183            size = len(data)
184            with open(filename, 'wb') as file:
185                file.write(data)
186        else:
187            size = len(bytes(data, 'UTF-8'))
188            with open(filename, 'w', newline='', encoding="utf-8") as file:
189                file.write(data)
190        return size
191
192    def to_ntv(self, modecodec='optimize', def_type='json', name=False):
193        '''Return a Ntv tab value (whithout name) .
194
195        *Parameters (kwargs)*
196
197        - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec
198        if 'default' each index has keys, if 'optimize' keys are optimized,
199        if 'dict' dict format is used, if 'nokeys' keys are absent
200        - **def_type** : string (default 'json') - default ntv_type for NtvList or NtvSet
201        - **name** : boolean (default False) - if False, default index name are not included
202
203
204        *Returns* : Ntv object'''
205        idxname = [name or iname != 'i' + str(i)
206                   for i, iname in enumerate(self.lname)]
207        if modecodec != 'optimize':
208            lis = [index.to_ntv(modecodec=modecodec, name=iname)
209                   for index, iname in zip(self.lindex, idxname)]
210        else:
211            lis = []
212            anafields = self.anafields
213            for idx, iname, anafld in zip(self.lindex, idxname, anafields):
214                coef = Cutil.encode_coef(idx.keys)
215                parent = anafld.p_derived.view('index')
216                if anafld.category == 'unique':
217                    lis.append(idx.to_ntv(name=iname))
218                elif anafld.category == 'coupled':
219                    idx_coup = idx.setkeys(
220                        self.lindex[parent].keys, inplace=False)
221                    lis.append(idx_coup.to_ntv(parent=parent, name=iname))
222                elif coef:
223                    lis.append(idx.to_ntv(keys=[coef], name=iname))
224                elif parent == -1:  # cat='variable' or 'secondary'
225                    if idx.keys == list(range(len(self))):
226                        lis.append(idx.to_ntv(modecodec='full', name=iname))
227                    else:
228                        lis.append(idx.to_ntv(modecodec='default', name=iname))
229                else:  # derived
230                    if len(self.lindex[parent].codec) == len(self):
231                        lis.append(idx.to_ntv(modecodec='default', name=iname))
232                    else:  # derived
233                        keys = idx.derkeys(self.lindex[parent])
234                        lis.append(idx.to_ntv(
235                            keys=keys, parent=parent, name=iname))
236        return NtvList(lis, self.name, ntv_type=def_type)
237
238    def to_xarray(self, info=False, idxname=None, varname=None, fillvalue='?',
239                  fillextern=True, lisfunc=None, name=None, numeric=False,
240                  npdtype=None, attrs=None, coord=False, **kwargs):
241        '''
242        Complete the Object and generate a Xarray DataArray with the dimension define by idx.
243        Only the first variable is incuded.
244
245        *Parameters*
246
247        - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray
248        - **idxname** : list (default none) - list of choosen primary fields. If None,
249        self.primary is used.
250        - **varname** : string (default none) - Name of the variable to use. If None,
251        first lvarname is used.
252        - **fillvalue** : object (default '?') - value used for the new extval
253        - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value
254        - **lisfunc** : function (default none) - list of function to apply to indexes before export
255        - **name** : string (default None) - DataArray name. If None, variable name
256        - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values.
257        - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None)
258        - **attrs** : dict (default None) - attributes for the DataArray
259        - **coord** : boolean (default False) - if True, add derivated coords
260        - **kwargs** : parameter for lisfunc
261
262        *Returns* : DataArray '''
263        option = {'dtype': None} | kwargs
264        if not self.consistent:
265            raise DatasetError("Dataset not consistent")
266        if idxname is None or idxname == []:
267            idxname = self.primaryname
268        ilf = self.full(idxname=idxname, varname=varname, fillvalue=fillvalue,
269                        fillextern=fillextern, inplace=False)
270        ilf.setcanonorder()
271        if not varname and len(ilf.lvarname) != 0:
272            varname = ilf.lvarname[0]
273        if not varname in ilf.lname:
274            ivar = -1
275        else:
276            ivar = ilf.lname.index(varname)
277        if isinstance(lisfunc, list) and len(lisfunc) == 1:
278            lisfunc = lisfunc * ilf.lenindex
279        elif isinstance(lisfunc, list) and len(lisfunc) != ilf.lenindex:
280            lisfunc = [None] * ilf.lenindex
281        elif not isinstance(lisfunc, list):
282            funcvar = lisfunc
283            lisfunc = [None] * ilf.lenindex
284            if ivar != -1:
285                lisfunc[ivar] = funcvar
286        lisfuncname = dict(zip(ilf.lname, lisfunc))
287        coords = ilf._xcoord(idxname, ivar, lisfuncname, coord, **option)
288        dims = idxname
289        if numeric:
290            #lisfunc[ivar] = util.cast
291            fillvalue = math.nan
292            npdtype = 'float'
293            option['dtype'] = 'float'
294        if ivar == -1:
295            data = self.field(list(range(len(ilf)))).to_numpy(npdtype='int')\
296                .reshape([len(ilf.nindex(name).codec) for name in idxname])
297        else:
298            data = ilf.lindex[ivar]\
299                .to_numpy(func=lisfunc[ivar], npdtype=npdtype, **option)\
300                .reshape([len(ilf.nindex(name).codec) for name in idxname])
301        if not name and ivar == -1:
302            name = ilf.name
303        elif not name:
304            name = ilf.lname[ivar]
305        if not isinstance(attrs, dict):
306            attrs = {}
307        for nam in ilf.lunicname:
308            attrs[nam] = ilf.nindex(nam).codec[0]
309        if info:
310            attrs |= ilf.indexinfos()
311        #print(data, coords, dims, attrs, name)
312        return xarray.DataArray(data, coords, dims, attrs=attrs, name=name)
313
314    def voxel(self, idxname=None, varname=None):
315        '''
316        Plot not null values in a cube with voxels and return indexes values.
317
318        *Parameters*
319
320        - **idxname** : list (default none) - list of idx to be completed. If None,
321        self.primary is used.
322        - **varname** : string (default none) - Name of the variable to use. If None,
323        first lvarname is used.
324
325        *Returns* : **dict of indexes values**
326        '''
327        if not self.consistent:
328            return None
329        if idxname is None or idxname == []:
330            idxname = self.primaryname
331        if varname is None and self.lvarname:
332            varname = self.lvarname[0]
333        if len(idxname) > 3:
334            raise DatasetError('number of idx > 3')
335        if len(idxname) == 2:
336            self.addindex(self.field('null', ' ', keys=[0]*len(self)))
337            idxname += [' ']
338        elif len(idxname) == 1:
339            self.addindex(self.field('null', ' ', keys=[0]*len(self)))
340            self.addindex(self.field('null', '  ', keys=[0]*len(self)))
341            idxname += [' ', '  ']
342        xar = self.to_xarray(idxname=idxname, varname=varname, fillvalue='?',
343                             fillextern=False, lisfunc=Cutil.is_not_equal, tovalue='?')
344        axe = plt.figure().add_subplot(projection='3d')
345        axe.voxels(xar, edgecolor='k')
346        axe.set_xticks(np.arange(self.idxlen[self.idxname.index(xar.dims[0])]))
347        axe.set_yticks(np.arange(self.idxlen[self.idxname.index(xar.dims[1])]))
348        axe.set_zticks(np.arange(self.idxlen[self.idxname.index(xar.dims[2])]))
349        axe.set(xlabel=xar.dims[0][:8],
350                ylabel=xar.dims[1][:8],
351                zlabel=xar.dims[2][:8])
352        plt.show()
353        self.delindex([' ', '  '])
354        return {xar.dims[i]: list(xar.coords[xar.dims[i]].values)
355                for i in range(len(xar.dims))}
356
357    def view(self, **kwargs):
358        '''
359        Generate tabular list to display data.
360
361        *Parameters (kwargs)*
362
363        - **name=listcode** : element (default None) - eg location='ns'
364            - listcode : string with Code for each index (j: json, n: name, s: simple).
365            - name : name of the index
366        - **defcode** : String (default : 'j') - default list code (if 'all' is True)
367        - **all** : Boolean (default : True) - 'defcode apply to all indexes or none
368        - **lenres** : Integer (default : 0) - Number of raws (all if 0)
369        - **header** : Boolean (default : True) - First line with names
370        - **width** : Integer (default None) - Number of characters displayed for each
371        attribute (all if None)
372        - **ifunc** : function (default None) - function to apply to indexes
373        - **tabulate params** : default 'tablefmt': 'simple', 'numalign': 'left',
374        'stralign': 'left', 'floatfmt': '.3f' - See tabulate module
375        - **other kwargs** : parameter for ifunc
376
377        *Returns* : list or html table (tabulate format) '''
378        opttab = {'defcode': 'j', 'all': True, 'lenres': 0, 'header': True}
379        optview = {'tablefmt': 'simple', 'numalign': 'decimal',
380                   'stralign': 'left', 'floatfmt': '.2f'}
381        option = opttab | optview | kwargs
382        tab = self._to_tab(**option)
383        width = ({'width': None} | kwargs)['width']
384        if width:
385            #tab = [[(lambda x: x[:width] if isinstance(x, str) else x)(val)
386            tab = [[val[:width] if isinstance(val, str) else val
387                    for val in lig] for lig in tab]
388        return tabulate(tab, headers='firstrow', **{k: option[k] for k in optview})
389
390    def vlist(self, *args, func=None, index=-1, **kwargs):
391        '''
392        Apply a function to an index and return the result.
393
394        *Parameters*
395
396        - **func** : function (default none) - function to apply to extval or extidx
397        - **args, kwargs** : parameters for the function
398        - **index** : integer - index to update (index=-1 for first variable)
399
400        *Returns* : list of func result'''
401        if index == -1 and self.lvar:
402            return self.lvar[0].vlist(func, *args, **kwargs)
403        if index == -1 and self.lenindex == 1:
404            index = 0
405        return self.lindex[index].vlist(func, *args, **kwargs)
406
407    # %%internal
408
409    def _to_tab(self, **kwargs):
410        ''' data preparation (dict of dict) for view or csv export.
411        Representation is included if :
412            - code is definie in the name element of the field
413            - or code is defined in 'defcode' element and 'all' element is True
414
415        *Parameters (kwargs)*
416
417        - **name=listcode** : element (default None) - eg location='ns'
418            - listcode : string with Code for each index (j: json, n: name, s: simple, f: ifunc).
419            - name : name of the index
420        - **defcode** : String (default : 'j') - default list code (if 'all' is True)
421        - **all** : Boolean (default : True) - 'defcode apply to all indexes or none
422        - **lenres** : Integer (default : 0) - Number of raws (all if 0)
423        - **ifunc** : function (default None) - function to apply to indexes
424        - **other kwargs** : parameter for ifunc'''
425
426        option = {'defcode': 'j', 'all': True, 'lenres': 0, 'ifunc': None,
427                  'header': True} | kwargs
428        tab = []
429        reslist = []
430        diccode = {'j': '', 'n': 'name-', 's': 'smpl-', 'f': 'func-'}
431        if option['header']:
432            for name in self.lname:
433                opt = name if name in option else 'defcode'
434                if opt != 'defcode' or option['all']:
435                    for char, code in diccode.items():
436                        if char in option[opt]:
437                            reslist.append(code + name)
438            tab.append(reslist)
439        lenres = option['lenres']
440        if lenres == 0:
441            lenres = len(self)
442        for i in range(min(lenres, len(self))):
443            reslist = []
444            for name in self.lname:
445                opt = name if name in option else 'defcode'
446                if opt != 'defcode' or option['all']:
447                    for char, code in diccode.items():
448                        if char in option[opt]:
449                            val = self.nindex(name).values[i]
450                            if char == 'j':
451                                #reslist.append(util.cast(val, dtype='json'))
452                                reslist.append(json.dumps(
453                                    self.field.s_to_e(val), cls=NtvJsonEncoder))
454                            elif char == 'n':
455                                reslist.append(self.field.i_to_name(val))
456                            elif char == 's':
457                                reslist.append(json.dumps(
458                                    self.field.s_to_e(val), cls=NtvJsonEncoder))
459                            elif char == 'f':
460                                reslist.append(Cutil.funclist(
461                                    val, option['ifunc'], **kwargs))
462            tab.append(reslist)
463        return tab
464
465    def _xcoord(self, axename, ivar, lisfuncname=None, coord=False, **kwargs):
466        ''' Coords generation for Xarray'''
467        #maxlen = kwargs.get('maxlen', 20)
468        #info = self.indexinfos()
469        dic_part = self.field_partition(axename)
470        coords = {}
471        ana = self.analysis
472        for i in range(self.lenindex):
473            #fieldi = info[i]
474            iname = self.lname[i]
475            # if fieldi['pparent'] == -1 or i == ivar:
476            if i in dic_part['variable'] or i in dic_part['unique'] or i == ivar:
477                continue
478            if isinstance(lisfuncname, dict) and len(lisfuncname) == self.lenindex:
479                funci = lisfuncname[iname]
480            else:
481                funci = None
482            if iname in axename:
483                coords[iname] = self.lindex[i].to_numpy(
484                    func=funci, codec=True, **kwargs)
485                if coord:
486                    coords[iname+'_row'] = (iname,
487                                            np.arange(len(coords[iname])))
488                    coords[iname+'_str'] = (iname,
489                                            self.lindex[i].to_numpy(func=str, codec=True))
490            else:
491                #ascendants = self.analysis.fields[i].ascendants('derived', 'index') # !!!!!!
492                #p_prim = [ind for ind in ascendants if self.lname[ind] in axename][0]
493                #p_prim = self.analysis.fields[i].ascendants('derived', 'index')[-1]
494                #self.lindex[i].setkeys(self.lindex[p_prim].keys)  # !!!
495                #coords[iname] = (self.lname[p_prim],
496                #                 self.lindex[i].to_numpy(func=funci, codec=True, **kwargs))
497                f_prim = [self.nindex(name) for name in axename if
498                  ana.get_relation(i, name).typecoupl in ['derived', 'coupled']][0]
499                self.lindex[i].setkeys(f_prim.keys)  # !!!
500                coords[iname] = (f_prim.name, self.lindex[i].to_numpy(
501                                            func=funci, codec=True, **kwargs))
502        return coords
def json(self, **kwargs):
41    def json(self, **kwargs):
42        '''
43        Return json dict, json string or Cbor binary.
44
45        *Parameters (kwargs)*
46
47        - **encoded** : boolean (default False) - choice for return format
48        (string/bytes if True, dict else)
49        - **format**  : string (default 'json')- choice for return format (json, cbor)
50        - **codif** : dict (default ES.codeb). Numerical value for string in CBOR encoder
51        - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec
52        if 'default' each index has keys, if 'optimize' keys are optimized,
53        if 'dict' dict format is used, if 'nokeys' keys are absent
54        - **name** : boolean (default False) - if False, default index name are not included
55        - **geojson** : boolean (default False) - geojson for LocationValue if True
56
57        *Returns* : string or dict'''
58        return self.to_obj(**kwargs)

Return json dict, json string or Cbor binary.

Parameters (kwargs)

  • encoded : boolean (default False) - choice for return format (string/bytes if True, dict else)
  • format : string (default 'json')- choice for return format (json, cbor)
  • codif : dict (default ES.codeb). Numerical value for string in CBOR encoder
  • modecodec : string (default 'optimize') - if 'full', each index is with a full codec if 'default' each index has keys, if 'optimize' keys are optimized, if 'dict' dict format is used, if 'nokeys' keys are absent
  • name : boolean (default False) - if False, default index name are not included
  • geojson : boolean (default False) - geojson for LocationValue if True

Returns : string or dict

def plot( self, varname=None, idxname=None, order=None, line=True, size=5, marker='o', maxlen=20):
 60    def plot(self, varname=None, idxname=None, order=None, line=True, size=5,
 61             marker='o', maxlen=20):
 62        '''
 63        This function visualize data with line or colormesh.
 64
 65        *Parameters*
 66
 67        - **varname** : string (default none) - Name of the variable to use. If None,
 68        first lvarname is used.
 69        - **line** : Boolean (default True) - Choice line or colormesh.
 70        - **order** : list (defaut None) - order of the axes (x, y, hue or col)
 71        - **size** : int (defaut 5) - plot size
 72        - **marker** : Char (default 'o') - Symbol for each point.
 73        - **maxlen** : Integer (default 20) - maximum length for string
 74
 75        *Returns*
 76
 77        - **None**  '''
 78        if not self.consistent:
 79            return None
 80        if idxname:
 81            idxname = [name for name in idxname if len(
 82                self.nindex(name).codec) > 1]
 83        #xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=[util.cast],##
 84        xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=None,
 85                             dtype='str', npdtype='str', maxlen=maxlen, coord=True)
 86        if not order:
 87            order = [0, 1, 2]
 88
 89        if len(xar.dims) == 1:
 90            xar.plot.line(x=xar.dims[0]+'_row', size=size, marker=marker)
 91        elif len(xar.dims) == 2 and line:
 92            xar.plot.line(x=xar.dims[order[0]] + '_row',
 93                          xticks=list(xar.coords[xar.dims[0]+'_row'].values),
 94                          hue=xar.dims[order[1]], size=size, marker=marker)
 95        elif len(xar.dims) == 2 and not line:
 96            xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row',
 97                     xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values),
 98                     yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values),
 99                     size=size)
100        elif len(xar.dims) == 3 and line:
101            xar.plot.line(x=xar.dims[order[0]] + '_row', col=xar.dims[order[1]],
102                          xticks=list(
103                xar.coords[xar.dims[order[0]]+'_row'].values),
104                hue=xar.dims[order[2]], col_wrap=2, size=size, marker=marker)
105        elif len(xar.dims) == 3 and not line:
106            xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row',
107                     xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values),
108                     yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values),
109                     col=xar.dims[order[2]], col_wrap=2, size=size)
110        plt.show()
111        return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) for i in range(len(xar.dims))}

This function visualize data with line or colormesh.

Parameters

  • varname : string (default none) - Name of the variable to use. If None, first lvarname is used.
  • line : Boolean (default True) - Choice line or colormesh.
  • order : list (defaut None) - order of the axes (x, y, hue or col)
  • size : int (defaut 5) - plot size
  • marker : Char (default 'o') - Symbol for each point.
  • maxlen : Integer (default 20) - maximum length for string

Returns

  • None
def to_csv(self, filename, optcsv={'quoting': 2}, **kwargs):
113    def to_csv(self, filename, optcsv={'quoting': csv.QUOTE_NONNUMERIC}, **kwargs):
114        '''
115        Generate csv file to display data.
116
117        *Parameters*
118
119        - **filename** : string - file name (with path)
120        - **optcsv** : parameter for csv.writer
121
122        *Parameters (kwargs)*
123
124        - **name=listcode** : element (default None) - eg location='ns'
125            - listcode : string with Code for each index (j: json, n: name, s: simple).
126            - name : name of the index
127        - **lenres** : Integer (default : 0) - Number of raws (all if 0)
128        - **header** : Boolean (default : True) - If True, first line with names
129        - **optcsv** : parameter for csv.writer
130        - **ifunc** : function (default None) - function to apply to indexes
131        - **other kwargs** : parameter for ifunc
132
133        *Returns* : size of csv file '''
134        size = 0
135        if not optcsv:
136            optcsv = {}
137        tab = self._to_tab(**kwargs)
138        with open(filename, 'w', newline='', encoding="utf-8") as csvfile:
139            writer = csv.writer(csvfile, **optcsv)
140            for lign in tab:
141                size += writer.writerow(lign)
142        return size

Generate csv file to display data.

Parameters

  • filename : string - file name (with path)
  • optcsv : parameter for csv.writer

Parameters (kwargs)

  • name=listcode : element (default None) - eg location='ns'
    • listcode : string with Code for each index (j: json, n: name, s: simple).
    • name : name of the index
  • lenres : Integer (default : 0) - Number of raws (all if 0)
  • header : Boolean (default : True) - If True, first line with names
  • optcsv : parameter for csv.writer
  • ifunc : function (default None) - function to apply to indexes
  • other kwargs : parameter for ifunc

Returns : size of csv file

def to_dataframe( self, info=False, idx=None, fillvalue='?', fillextern=True, lisfunc=None, name=None, numeric=False, npdtype=None, **kwargs):
144    def to_dataframe(self, info=False, idx=None, fillvalue='?', fillextern=True,
145                     lisfunc=None, name=None, numeric=False, npdtype=None, **kwargs):
146        '''
147        Complete the Object and generate a Pandas DataFrame with the dimension define by idx.
148
149        *Parameters*
150
151        - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray
152        - **idx** : list (default none) - list of idx to be completed. If [],
153        self.primary is used.
154        - **fillvalue** : object (default '?') - value used for the new extval
155        - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value
156        - **lisfunc** : function (default none) - list of function to apply to indexes before export
157        - **name** : string (default None) - DataArray name. If None, variable name
158        - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values.
159        - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None)
160        - **kwargs** : parameter for lisfunc
161
162        *Returns* : pandas.DataFrame '''
163        if self.consistent:
164            return self.to_xarray(info=info, idx=idx, fillvalue=fillvalue,
165                                  fillextern=fillextern, lisfunc=lisfunc, name=name,
166                                  numeric=numeric, npdtype=npdtype, **kwargs
167                                  ).to_dataframe(name=name)
168        return None

Complete the Object and generate a Pandas DataFrame with the dimension define by idx.

Parameters

  • info : boolean (default False) - if True, add _dict attributes to attrs Xarray
  • idx : list (default none) - list of idx to be completed. If [], self.primary is used.
  • fillvalue : object (default '?') - value used for the new extval
  • fillextern : boolean(default True) - if True, fillvalue is converted to internal value
  • lisfunc : function (default none) - list of function to apply to indexes before export
  • name : string (default None) - DataArray name. If None, variable name
  • numeric : Boolean (default False) - Generate a numeric DataArray.Values.
  • npdtype : string (default None) - numpy dtype for the DataArray ('object' if None)
  • kwargs : parameter for lisfunc

Returns : pandas.DataFrame

def to_file(self, filename, **kwargs):
170    def to_file(self, filename, **kwargs):
171        '''Generate file to display data.
172
173         *Parameters (kwargs)*
174
175        - **filename** : string - file name (with path)
176        - **kwargs** : see 'to_ntv' parameters
177
178        *Returns* : Integer - file lenght (bytes)  '''
179        option = {'format': 'cbor', 'modecodec': 'optimize'} | kwargs | {
180            'encoded': True}
181        data = self.to_ntv(modecodec=option['modecodec']).to_obj(**option)
182        if option['format'] == 'cbor':
183            size = len(data)
184            with open(filename, 'wb') as file:
185                file.write(data)
186        else:
187            size = len(bytes(data, 'UTF-8'))
188            with open(filename, 'w', newline='', encoding="utf-8") as file:
189                file.write(data)
190        return size

Generate file to display data.

Parameters (kwargs)

  • filename : string - file name (with path)
  • kwargs : see 'to_ntv' parameters

Returns : Integer - file lenght (bytes)

def to_ntv(self, modecodec='optimize', def_type='json', name=False):
192    def to_ntv(self, modecodec='optimize', def_type='json', name=False):
193        '''Return a Ntv tab value (whithout name) .
194
195        *Parameters (kwargs)*
196
197        - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec
198        if 'default' each index has keys, if 'optimize' keys are optimized,
199        if 'dict' dict format is used, if 'nokeys' keys are absent
200        - **def_type** : string (default 'json') - default ntv_type for NtvList or NtvSet
201        - **name** : boolean (default False) - if False, default index name are not included
202
203
204        *Returns* : Ntv object'''
205        idxname = [name or iname != 'i' + str(i)
206                   for i, iname in enumerate(self.lname)]
207        if modecodec != 'optimize':
208            lis = [index.to_ntv(modecodec=modecodec, name=iname)
209                   for index, iname in zip(self.lindex, idxname)]
210        else:
211            lis = []
212            anafields = self.anafields
213            for idx, iname, anafld in zip(self.lindex, idxname, anafields):
214                coef = Cutil.encode_coef(idx.keys)
215                parent = anafld.p_derived.view('index')
216                if anafld.category == 'unique':
217                    lis.append(idx.to_ntv(name=iname))
218                elif anafld.category == 'coupled':
219                    idx_coup = idx.setkeys(
220                        self.lindex[parent].keys, inplace=False)
221                    lis.append(idx_coup.to_ntv(parent=parent, name=iname))
222                elif coef:
223                    lis.append(idx.to_ntv(keys=[coef], name=iname))
224                elif parent == -1:  # cat='variable' or 'secondary'
225                    if idx.keys == list(range(len(self))):
226                        lis.append(idx.to_ntv(modecodec='full', name=iname))
227                    else:
228                        lis.append(idx.to_ntv(modecodec='default', name=iname))
229                else:  # derived
230                    if len(self.lindex[parent].codec) == len(self):
231                        lis.append(idx.to_ntv(modecodec='default', name=iname))
232                    else:  # derived
233                        keys = idx.derkeys(self.lindex[parent])
234                        lis.append(idx.to_ntv(
235                            keys=keys, parent=parent, name=iname))
236        return NtvList(lis, self.name, ntv_type=def_type)

Return a Ntv tab value (whithout name) .

Parameters (kwargs)

  • modecodec : string (default 'optimize') - if 'full', each index is with a full codec if 'default' each index has keys, if 'optimize' keys are optimized, if 'dict' dict format is used, if 'nokeys' keys are absent
  • def_type : string (default 'json') - default ntv_type for NtvList or NtvSet
  • name : boolean (default False) - if False, default index name are not included

Returns : Ntv object

def to_xarray( self, info=False, idxname=None, varname=None, fillvalue='?', fillextern=True, lisfunc=None, name=None, numeric=False, npdtype=None, attrs=None, coord=False, **kwargs):
238    def to_xarray(self, info=False, idxname=None, varname=None, fillvalue='?',
239                  fillextern=True, lisfunc=None, name=None, numeric=False,
240                  npdtype=None, attrs=None, coord=False, **kwargs):
241        '''
242        Complete the Object and generate a Xarray DataArray with the dimension define by idx.
243        Only the first variable is incuded.
244
245        *Parameters*
246
247        - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray
248        - **idxname** : list (default none) - list of choosen primary fields. If None,
249        self.primary is used.
250        - **varname** : string (default none) - Name of the variable to use. If None,
251        first lvarname is used.
252        - **fillvalue** : object (default '?') - value used for the new extval
253        - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value
254        - **lisfunc** : function (default none) - list of function to apply to indexes before export
255        - **name** : string (default None) - DataArray name. If None, variable name
256        - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values.
257        - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None)
258        - **attrs** : dict (default None) - attributes for the DataArray
259        - **coord** : boolean (default False) - if True, add derivated coords
260        - **kwargs** : parameter for lisfunc
261
262        *Returns* : DataArray '''
263        option = {'dtype': None} | kwargs
264        if not self.consistent:
265            raise DatasetError("Dataset not consistent")
266        if idxname is None or idxname == []:
267            idxname = self.primaryname
268        ilf = self.full(idxname=idxname, varname=varname, fillvalue=fillvalue,
269                        fillextern=fillextern, inplace=False)
270        ilf.setcanonorder()
271        if not varname and len(ilf.lvarname) != 0:
272            varname = ilf.lvarname[0]
273        if not varname in ilf.lname:
274            ivar = -1
275        else:
276            ivar = ilf.lname.index(varname)
277        if isinstance(lisfunc, list) and len(lisfunc) == 1:
278            lisfunc = lisfunc * ilf.lenindex
279        elif isinstance(lisfunc, list) and len(lisfunc) != ilf.lenindex:
280            lisfunc = [None] * ilf.lenindex
281        elif not isinstance(lisfunc, list):
282            funcvar = lisfunc
283            lisfunc = [None] * ilf.lenindex
284            if ivar != -1:
285                lisfunc[ivar] = funcvar
286        lisfuncname = dict(zip(ilf.lname, lisfunc))
287        coords = ilf._xcoord(idxname, ivar, lisfuncname, coord, **option)
288        dims = idxname
289        if numeric:
290            #lisfunc[ivar] = util.cast
291            fillvalue = math.nan
292            npdtype = 'float'
293            option['dtype'] = 'float'
294        if ivar == -1:
295            data = self.field(list(range(len(ilf)))).to_numpy(npdtype='int')\
296                .reshape([len(ilf.nindex(name).codec) for name in idxname])
297        else:
298            data = ilf.lindex[ivar]\
299                .to_numpy(func=lisfunc[ivar], npdtype=npdtype, **option)\
300                .reshape([len(ilf.nindex(name).codec) for name in idxname])
301        if not name and ivar == -1:
302            name = ilf.name
303        elif not name:
304            name = ilf.lname[ivar]
305        if not isinstance(attrs, dict):
306            attrs = {}
307        for nam in ilf.lunicname:
308            attrs[nam] = ilf.nindex(nam).codec[0]
309        if info:
310            attrs |= ilf.indexinfos()
311        #print(data, coords, dims, attrs, name)
312        return xarray.DataArray(data, coords, dims, attrs=attrs, name=name)

Complete the Object and generate a Xarray DataArray with the dimension define by idx. Only the first variable is incuded.

Parameters

  • info : boolean (default False) - if True, add _dict attributes to attrs Xarray
  • idxname : list (default none) - list of choosen primary fields. If None, self.primary is used.
  • varname : string (default none) - Name of the variable to use. If None, first lvarname is used.
  • fillvalue : object (default '?') - value used for the new extval
  • fillextern : boolean(default True) - if True, fillvalue is converted to internal value
  • lisfunc : function (default none) - list of function to apply to indexes before export
  • name : string (default None) - DataArray name. If None, variable name
  • numeric : Boolean (default False) - Generate a numeric DataArray.Values.
  • npdtype : string (default None) - numpy dtype for the DataArray ('object' if None)
  • attrs : dict (default None) - attributes for the DataArray
  • coord : boolean (default False) - if True, add derivated coords
  • kwargs : parameter for lisfunc

Returns : DataArray

def voxel(self, idxname=None, varname=None):
314    def voxel(self, idxname=None, varname=None):
315        '''
316        Plot not null values in a cube with voxels and return indexes values.
317
318        *Parameters*
319
320        - **idxname** : list (default none) - list of idx to be completed. If None,
321        self.primary is used.
322        - **varname** : string (default none) - Name of the variable to use. If None,
323        first lvarname is used.
324
325        *Returns* : **dict of indexes values**
326        '''
327        if not self.consistent:
328            return None
329        if idxname is None or idxname == []:
330            idxname = self.primaryname
331        if varname is None and self.lvarname:
332            varname = self.lvarname[0]
333        if len(idxname) > 3:
334            raise DatasetError('number of idx > 3')
335        if len(idxname) == 2:
336            self.addindex(self.field('null', ' ', keys=[0]*len(self)))
337            idxname += [' ']
338        elif len(idxname) == 1:
339            self.addindex(self.field('null', ' ', keys=[0]*len(self)))
340            self.addindex(self.field('null', '  ', keys=[0]*len(self)))
341            idxname += [' ', '  ']
342        xar = self.to_xarray(idxname=idxname, varname=varname, fillvalue='?',
343                             fillextern=False, lisfunc=Cutil.is_not_equal, tovalue='?')
344        axe = plt.figure().add_subplot(projection='3d')
345        axe.voxels(xar, edgecolor='k')
346        axe.set_xticks(np.arange(self.idxlen[self.idxname.index(xar.dims[0])]))
347        axe.set_yticks(np.arange(self.idxlen[self.idxname.index(xar.dims[1])]))
348        axe.set_zticks(np.arange(self.idxlen[self.idxname.index(xar.dims[2])]))
349        axe.set(xlabel=xar.dims[0][:8],
350                ylabel=xar.dims[1][:8],
351                zlabel=xar.dims[2][:8])
352        plt.show()
353        self.delindex([' ', '  '])
354        return {xar.dims[i]: list(xar.coords[xar.dims[i]].values)
355                for i in range(len(xar.dims))}

Plot not null values in a cube with voxels and return indexes values.

Parameters

  • idxname : list (default none) - list of idx to be completed. If None, self.primary is used.
  • varname : string (default none) - Name of the variable to use. If None, first lvarname is used.

Returns : dict of indexes values

def view(self, **kwargs):
357    def view(self, **kwargs):
358        '''
359        Generate tabular list to display data.
360
361        *Parameters (kwargs)*
362
363        - **name=listcode** : element (default None) - eg location='ns'
364            - listcode : string with Code for each index (j: json, n: name, s: simple).
365            - name : name of the index
366        - **defcode** : String (default : 'j') - default list code (if 'all' is True)
367        - **all** : Boolean (default : True) - 'defcode apply to all indexes or none
368        - **lenres** : Integer (default : 0) - Number of raws (all if 0)
369        - **header** : Boolean (default : True) - First line with names
370        - **width** : Integer (default None) - Number of characters displayed for each
371        attribute (all if None)
372        - **ifunc** : function (default None) - function to apply to indexes
373        - **tabulate params** : default 'tablefmt': 'simple', 'numalign': 'left',
374        'stralign': 'left', 'floatfmt': '.3f' - See tabulate module
375        - **other kwargs** : parameter for ifunc
376
377        *Returns* : list or html table (tabulate format) '''
378        opttab = {'defcode': 'j', 'all': True, 'lenres': 0, 'header': True}
379        optview = {'tablefmt': 'simple', 'numalign': 'decimal',
380                   'stralign': 'left', 'floatfmt': '.2f'}
381        option = opttab | optview | kwargs
382        tab = self._to_tab(**option)
383        width = ({'width': None} | kwargs)['width']
384        if width:
385            #tab = [[(lambda x: x[:width] if isinstance(x, str) else x)(val)
386            tab = [[val[:width] if isinstance(val, str) else val
387                    for val in lig] for lig in tab]
388        return tabulate(tab, headers='firstrow', **{k: option[k] for k in optview})

Generate tabular list to display data.

Parameters (kwargs)

  • name=listcode : element (default None) - eg location='ns'
    • listcode : string with Code for each index (j: json, n: name, s: simple).
    • name : name of the index
  • defcode : String (default : 'j') - default list code (if 'all' is True)
  • all : Boolean (default : True) - 'defcode apply to all indexes or none
  • lenres : Integer (default : 0) - Number of raws (all if 0)
  • header : Boolean (default : True) - First line with names
  • width : Integer (default None) - Number of characters displayed for each attribute (all if None)
  • ifunc : function (default None) - function to apply to indexes
  • tabulate params : default 'tablefmt': 'simple', 'numalign': 'left', 'stralign': 'left', 'floatfmt': '.3f' - See tabulate module
  • other kwargs : parameter for ifunc

Returns : list or html table (tabulate format)

def vlist(self, *args, func=None, index=-1, **kwargs):
390    def vlist(self, *args, func=None, index=-1, **kwargs):
391        '''
392        Apply a function to an index and return the result.
393
394        *Parameters*
395
396        - **func** : function (default none) - function to apply to extval or extidx
397        - **args, kwargs** : parameters for the function
398        - **index** : integer - index to update (index=-1 for first variable)
399
400        *Returns* : list of func result'''
401        if index == -1 and self.lvar:
402            return self.lvar[0].vlist(func, *args, **kwargs)
403        if index == -1 and self.lenindex == 1:
404            index = 0
405        return self.lindex[index].vlist(func, *args, **kwargs)

Apply a function to an index and return the result.

Parameters

  • func : function (default none) - function to apply to extval or extidx
  • args, kwargs : parameters for the function
  • index : integer - index to update (index=-1 for first variable)

Returns : list of func result