tab-dataset.tab_dataset.cdataset

The cdataset module is part of the tab-dataset package.

It contains the classes DatasetAnalysis, Cdataset for Dataset entities.

For more information, see the user guide or the github repository.

View Source

  1# -*- coding: utf-8 -*-
  2"""
  3The `cdataset` module is part of the `tab-dataset` package.
  4
  5It contains the classes `DatasetAnalysis`, `Cdataset` for Dataset entities.
  6
  7For more information, see the 
  8[user guide](https://loco-philippe.github.io/tab-dataset/docs/user_guide.html) 
  9or the [github repository](https://github.com/loco-philippe/tab-dataset).
 10"""
 11from copy import copy
 12
 13from tab_dataset.cfield import Cfield, Cutil
 14
 15from json_ntv import Ntv
 16from json_ntv.ntv_util import NtvUtil, NtvConnector
 17
 18from tab_analysis import AnaDataset, Util
 19
 20
 21class DatasetAnalysis:
 22    '''This class is the Cdataset interface class with the tab_analysis module.'''
 23
 24# %% property
 25    @property
 26    def analysis(self):
 27        '''The analysis attribute is associated to the AnaDataset object'''
 28        if self._analysis is None or self._analysis.hashd != self._hashd:
 29            self._analysis = AnaDataset(self.to_analysis(True))
 30        return self._analysis
 31
 32    @property
 33    def anafields(self):
 34        ''' list of AnaField'''
 35        return self.analysis.fields
 36
 37    @property
 38    def partitions(self):
 39        ''' list of partitions defined with index representation (AnaDataset method)'''
 40        return self.analysis.partitions('index')
 41
 42    @property
 43    def complete(self):
 44        ''' complete property of the dataset (AnaDataset method)'''
 45        return self.analysis.complete
 46
 47    @property
 48    def dimension(self):
 49        ''' dimension of the dataset (AnaDataset method)'''
 50        return self.analysis.dimension
 51
 52    @property
 53    def lvarname(self):
 54        ''' list of variable Field name (AnaDataset method)'''
 55        return Util.view(self.analysis.variable, mode='id')
 56
 57    @property
 58    def primaryname(self):
 59        ''' list of primary name (AnaDataset method)'''
 60        return Util.view(self.analysis.primary, mode='id')
 61
 62    @property
 63    def secondaryname(self):
 64        ''' list of secondary name (AnaDataset method)'''
 65        return Util.view(self.analysis.secondary, mode='id')
 66
 67
 68# %% methods
 69
 70    def indexinfos(self, keys=None):
 71        '''return a dict with infos of each index (AnaDataset method) :
 72            
 73        - num, name, cat, diffdistparent, child, parent, distparent,
 74        crossed, pparent, rateder (struct info)
 75        - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
 76
 77        *Parameters*
 78
 79        - **keys** : string, list or tuple (default None) - list of attributes
 80        to returned.
 81        if 'all' or None, all attributes are returned.
 82        if 'struct', only structural attributes are returned.
 83
 84        *Returns* : dict'''
 85        return self.analysis.to_dict(mode='index', keys=keys)
 86
 87    def field_partition(self, partition=None, mode='index'):
 88        '''return a partition dict with the list of primary, secondary, unique
 89        and variable fields (index).
 90
 91         *Parameters*
 92
 93        - **partition** : list (default None) - if None, partition is the first
 94        - **mode** : str (default 'index') - Field representation ('id', 'index')
 95        '''
 96        if not partition and len(self.partitions) > 0:
 97            partition = self.partitions[0]
 98        part = [self.analysis.dfield(fld)
 99                for fld in partition] if partition else None
100        return self.analysis.field_partition(mode=mode, partition=part,
101                                             distributed=True)
102
103    def relation(self, fld1, fld2):
104        '''relationship between two fields (AnaDataset method)'''
105        return self.analysis.get_relation(fld1, fld2)
106
107    def tree(self, mode='derived', width=5, lname=20, string=True):
108        '''return a string with a tree of derived Field (AnaDataset method).
109
110         *Parameters*
111
112        - **lname** : integer (default 20) - length of the names
113        - **width** : integer (default 5) - length of the lines
114        - **string** : boolean (default True) - if True return str else return dict
115        - **mode** : string (default 'derived') - kind of tree :
116            'derived' : derived tree
117            'distance': min distance tree
118            'distomin': min distomin tree
119        '''
120        return self.analysis.tree(mode=mode, width=width, lname=lname, string=string)
121
122    def indicator(self, fullsize=None, size=None):
123        '''generate size indicators: ol (object lightness), ul (unicity level),
124        gain (sizegain)
125
126        *Parameters*
127
128        - **fullsize** : int (default none) - size with full codec
129        - **size** : int (default none) - size with existing codec
130
131        *Returns* : dict'''
132        if not fullsize:
133            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
134        if not size:
135            size = len(self.to_obj(encoded=True))
136        return self.analysis.indicator(fullsize, size)
137
138
139class Cdataset(DatasetAnalysis):
140    # %% magic
141    '''
142    A `Cdataset` is a representation of a tabular data.
143
144    *Attributes (for @property see methods)* :
145
146    - **lindex** : list of Field
147    - **name** : name of the Cdataset
148    - **_analysis** : AnaDataset object
149
150    The methods defined in this class are :
151
152    *constructor (@classmethod)*
153
154    - `Cdataset.ntv`
155    - `Cdataset.from_ntv`
156
157    *dynamic value - module analysis (getters @property)*
158
159    - `DatasetAnalysis.analysis`
160    - `DatasetAnalysis.anafields`
161    - `DatasetAnalysis.lvarname`
162    - `DatasetAnalysis.partitions`
163    - `DatasetAnalysis.primaryname`
164    - `DatasetAnalysis.secondaryname`
165    - `DatasetAnalysis.complete`
166    - `DatasetAnalysis.dimension`
167
168    *selecting - infos methods (module analysis)*
169
170    - `DatasetAnalysis.field_partition`
171    - `DatasetAnalysis.indexinfos`
172    - `DatasetAnalysis.indicator`
173    - `DatasetAnalysis.relation`
174    - `DatasetAnalysis.tree`
175
176    *dynamic value (getters @property)*
177
178    - `Cdataset.keys`
179    - `Cdataset.iindex`
180    - `Cdataset.indexlen`
181    - `Cdataset.lenindex`
182    - `Cdataset.lname`
183    - `Cdataset.lunicname`
184    - `Cdataset.lunicrow`
185    - `Cdataset.tiindex`
186
187    *add - update methods (`observation.dataset_structure.DatasetStructure`)*
188
189    - `Cdataset.add`
190    - `Cdataset.delindex`
191    - `Cdataset.renameindex`
192    - `Cdataset.setname`
193
194    *structure management - methods (`observation.dataset_structure.DatasetStructure`)*
195
196    - `Cdataset.check_relation`
197    - `Cdataset.check_relationship`
198    - `Cdataset.nindex`
199    - `Cdataset.reindex`
200    - `Cdataset.reorder`
201    - `Cdataset.swapindex`
202    - `Cdataset.to_analysis`
203    '''
204    field_class = Cfield
205
206    def __init__(self, listidx=None, name=None, reindex=True):
207        '''
208        Dataset constructor.
209
210        *Parameters*
211
212        - **listidx** :  list (default None) - list of Field data
213        - **name** :  string (default None) - name of the dataset
214        - **reindex** : boolean (default True) - if True, default codec for each Field'''
215
216        if isinstance(listidx, Cdataset):
217            self.lindex = [copy(idx) for idx in listidx.lindex]
218            self.name = name if name else listidx.name
219            self._analysis = listidx._analysis
220            return
221        if listidx.__class__.__name__ == 'DataFrame':
222            lindex = NtvConnector.connector(
223            )['DataFrameConnec'].to_listidx(listidx)[0]
224            listidx = [Cfield(field['codec'], field['name'], field['keys'])
225                       for field in lindex]
226        self.name = name
227        self.lindex = [] if listidx is None else listidx
228        if reindex:
229            self.reindex()
230        self._analysis = None
231        return
232
233    def __repr__(self):
234        '''return classname, number of value and number of indexes'''
235        return self.__class__.__name__ + '[' + str(len(self)) + ', ' + str(self.lenindex) + ']'
236
237    def __str__(self):
238        '''return string format for var and lidx'''
239        stri = ''
240        stri += 'fields :\n'
241        for idx in self.lindex:
242            stri += '    ' + str(idx) + '\n'
243        return stri
244
245    def __len__(self):
246        ''' len of values'''
247        if not self.lindex:
248            return 0
249        return len(self.lindex[0])
250
251    def __contains__(self, item):
252        ''' list of lindex values'''
253        return item in self.lindex
254
255    def __getitem__(self, ind):
256        ''' return value record (value conversion)'''
257        res = [idx[ind] for idx in self.lindex]
258        if len(res) == 1:
259            return res[0]
260        return res
261
262    def __setitem__(self, ind, item):
263        ''' modify the Field values for each Field at the row ind'''
264        if not isinstance(item, list):
265            item = [item]
266        for val, idx in zip(item, self.lindex):
267            idx[ind] = val
268
269    def __delitem__(self, ind):
270        ''' remove all Field item at the row ind'''
271        for idx in self.lindex:
272            del idx[ind]
273
274    def __hash__(self):
275        '''return hash of all hash(Field)'''
276        #return hash(tuple(hash(idx) for idx in self.lindex))
277        return sum(hash(idx) for idx in self.lindex)
278
279    def __eq__(self, other):
280        ''' equal if hash values are equal'''
281        return hash(self) == hash(other)
282
283    def __copy__(self):
284        ''' Copy all the data '''
285        return self.__class__(self)
286
287# %% property
288    @property
289    def _hashd(self):
290        '''return hash of all hashf(Field)'''
291        # return sum([idx._hashi() for idx in self.lindex])
292        return hash(tuple(fld.hashf for fld in self.lindex))
293
294    @property
295    def indexlen(self):
296        ''' list of index codec length'''
297        return [len(idx.codec) for idx in self.lindex]
298
299    @property
300    def iindex(self):
301        ''' list of keys for each index'''
302        return [idx.keys for idx in self.lindex]
303
304    @property
305    def keys(self):
306        ''' list of keys for each index'''
307        return [idx.keys for idx in self.lindex]
308
309    @property
310    def lenindex(self):
311        ''' number of indexes'''
312        return len(self.lindex)
313
314    @property
315    def lunicname(self):
316        ''' list of unique index name'''
317        return [idx.name for idx in self.lindex if len(idx.codec) == 1]
318
319    @property
320    def lunicrow(self):
321        '''list of unic idx row'''
322        return [self.lname.index(name) for name in self.lunicname]
323
324    @property
325    def lname(self):
326        ''' list of index name'''
327        return [idx.name for idx in self.lindex]
328
329    @property
330    def tiindex(self):
331        ''' list of keys for each record'''
332        return Cutil.list(list(zip(*self.iindex)))
333
334# %%methods
335
336    @classmethod
337    def ntv(cls, ntv_value, reindex=True, fast=False):
338        '''Generate an Dataset Object from a ntv_value
339
340        *Parameters*
341
342        - **ntv_value** : bytes, string, Ntv object to convert
343        - **reindex** : boolean (default True) - if True, default codec for each Field
344        - **fast** : boolean (default False) - if True, ntv_value are not converted in json-value'''
345        return cls.from_ntv(ntv_value, reindex=reindex, fast=fast)
346
347    @classmethod
348    def from_ntv(cls, ntv_value, reindex=True, decode_str=False, fast=False):
349        '''Generate a Dataset Object from a ntv_value
350
351        *Parameters*
352
353        - **ntv_value** : bytes, string, Ntv object to convert
354        - **reindex** : boolean (default True) - if True, default codec for each Field
355        - **decode_str**: boolean (default False) - if True, string are loaded in json data
356        - **fast** : boolean (default False) - if True, ntv_value are not converted in json-value'''
357        ntv = Ntv.obj(ntv_value, decode_str=decode_str, fast=fast)
358        if len(ntv) == 0:
359            return cls()
360        lidx = [list(NtvUtil.decode_ntv_tab(
361            ntvf, cls.field_class.ntv_to_val)) for ntvf in ntv]
362        leng = max(idx[6] for idx in lidx)
363        for ind in range(len(lidx)):
364            if lidx[ind][0] == '':
365                lidx[ind][0] = 'i'+str(ind)
366            NtvConnector.init_ntv_keys(ind, lidx, leng)
367        lindex = [cls.field_class(idx[2], idx[0], idx[4], None,  # idx[1] pour le type,
368                                  reindex=reindex) for idx in lidx]
369        return cls(lindex, reindex=reindex, name=ntv.name)
370
371    def add(self, other, name=False, solve=True):
372        ''' Add other's values to self's values for each index
373
374        *Parameters*
375
376        - **other** : Dataset object to add to self object
377        - **name** : Boolean (default False) - Add values with same index name (True) or
378        same index row (False)
379        - **solve** : Boolean (default True) - If True, replace None other's codec value
380        with self codec value.
381
382        *Returns* : self '''
383        if self.lenindex != other.lenindex:
384            raise DatasetError('length are not identical')
385        if name and sorted(self.lname) != sorted(other.lname):
386            raise DatasetError('name are not identical')
387        for i in range(self.lenindex):
388            if name:
389                self.lindex[i].add(other.lindex[other.lname.index(self.lname[i])],
390                                   solve=solve)
391            else:
392                self.lindex[i].add(other.lindex[i], solve=solve)
393        return self
394
395    def to_analysis(self, distr=False):
396        '''return a dict with data used in AnaDataset module
397
398        *Parameters*
399
400        - **distr** : Boolean (default False) - If True, add distr information'''
401        return {'name': self.name, 'fields': [fld.to_analysis for fld in self.lindex],
402                'length': len(self), 'hashd': self._hashd,
403                'relations': {self.lindex[i].name:
404                              {self.lindex[j].name: Cutil.dist(
405                                  self.lindex[i].keys, self.lindex[j].keys, distr)
406                               for j in range(i+1, len(self.lindex))}
407                              for i in range(len(self.lindex)-1)}
408                }
409
410    def reindex(self):
411        '''Calculate a new default codec for each index (Return self)'''
412        for idx in self.lindex:
413            idx.reindex()
414        return self
415
416    def delindex(self, delname=None, savename=None):
417        '''remove an Field or a list of Field.
418
419        *Parameters*
420
421        - **delname** : string or list of string - name of index to remove
422        - **savename** : string or list of string - name of index to keep
423
424        *Returns* : none '''
425        if not delname and not savename:
426            return
427        if isinstance(delname, str):
428            delname = [delname]
429        if isinstance(savename, str):
430            savename = [savename]
431        if delname and savename:
432            delname = [name for name in delname if not name in savename]
433        if not delname:
434            delname = [name for name in self.lname if not name in savename]
435        for idxname in delname:
436            if idxname in self.lname:
437                self.lindex.pop(self.lname.index(idxname))
438
439    def nindex(self, name):
440        ''' index with name equal to attribute name'''
441        if name in self.lname:
442            return self.lindex[self.lname.index(name)]
443        return None
444
445    def renameindex(self, oldname, newname):
446        '''replace an index name 'oldname' by a new one 'newname'. '''
447        for i in range(self.lenindex):
448            if self.lname[i] == oldname:
449                self.lindex[i].setname(newname)
450        for i in range(len(self.lvarname)):
451            if self.lvarname[i] == oldname:
452                self.lvarname[i] = newname
453
454    def reorder(self, recorder=None):
455        '''Reorder records in the order define by 'recorder' '''
456        if recorder is None or set(recorder) != set(range(len(self))):
457            return None
458        for idx in self.lindex:
459            idx.set_keys([idx.keys[i] for i in recorder])
460        return None
461
462    def setname(self, listname=None):
463        '''Update Field name by the name in listname'''
464        for i in range(min(self.lenindex, len(listname))):
465            self.lindex[i].name = listname[i]
466
467    def swapindex(self, order):
468        '''
469        Change the order of the index .
470
471        *Parameters*
472
473        - **order** : list of int or list of name - new order of index to apply.
474
475        *Returns* : self '''
476        if self.lenindex != len(order):
477            raise DatasetError('length of order and Dataset different')
478        if not order or isinstance(order[0], int):
479            self.lindex = [self.lindex[ind] for ind in order]
480        elif isinstance(order[0], str):
481            self.lindex = [self.nindex(name) for name in order]
482        return self
483
484    def check_relation(self, field, parent, typecoupl, value=True):
485        '''get the inconsistent records for a relationship
486
487         *Parameters*
488
489        - **field** : int or str - index or name of the field involved in the relation
490        - **parent**: int or str - index or name of the second field involved in the relation
491        - **typecoupl**: str - relationship to check ('derived' or 'coupled')
492        - **value**: boolean (default True) - if True return a dict with inconsistent
493        values of the fields, else a tuple with index of records)
494
495        *Returns* :
496
497        - dict with inconsistent values of the fields
498        - or a tuple with index of records'''
499        f_parent = copy(self.nindex(parent) if isinstance(parent, str)
500                                            else self.lindex[parent])
501        f_field = copy(self.nindex(field) if isinstance(field, str)
502                                          else self.lindex[field])
503        match typecoupl:
504            case 'derived':
505                errors = f_parent.coupling(f_field, reindex=True)
506            case 'coupled':
507                errors = copy(f_parent).coupling(
508                    f_field, derived=False, reindex=True)
509            case _:
510                raise DatasetError(typecoupl + "is not a valid relationship")
511        if not value:
512            return errors
513        return {'row': list(errors), f_field.name: f_field[errors], f_parent.name: f_parent[errors]}
514
515    def check_relationship(self, relations):
516        '''get the inconsistent records for each relationship defined in relations
517
518         *Parameters*
519
520        - **relations** : list of dict or single dict - list of fields with relationship property
521
522        *Returns* :
523
524        - dict with for each relationship: key = string with the two fields name,
525        and value = list of inconsistent records
526        - or if single relationship : value'''
527        if not isinstance(relations, (list, dict)):
528            raise DatasetError("relations is not correct")
529        if isinstance(relations, dict):
530            relations = [relations]
531        dic_res = {}
532        for field in relations:
533            if not 'relationship' in field or not 'name' in field:
534                continue
535            if not 'parent' in field['relationship'] or not 'link' in field['relationship']:
536                raise DatasetError("relationship is not correct")
537            rel = field['relationship']['link']
538            f_parent = field['relationship']['parent']
539            f_field = field['name']
540            name_rel = f_field + ' - ' + f_parent
541            if self.nindex(f_parent) is None or self.nindex(f_field) is None:
542                raise DatasetError("field's name is not present in data")
543            dic_res[name_rel] = self.check_relation(
544                f_field, f_parent, rel, False)
545        if len(dic_res) == 1:
546            return list(dic_res.values())[0]
547        return dic_res
548
549
550class DatasetError(Exception):
551    # %% errors
552    ''' Dataset Exception'''
553    # pass

class DatasetAnalysis: View Source

 22class DatasetAnalysis:
 23    '''This class is the Cdataset interface class with the tab_analysis module.'''
 24
 25# %% property
 26    @property
 27    def analysis(self):
 28        '''The analysis attribute is associated to the AnaDataset object'''
 29        if self._analysis is None or self._analysis.hashd != self._hashd:
 30            self._analysis = AnaDataset(self.to_analysis(True))
 31        return self._analysis
 32
 33    @property
 34    def anafields(self):
 35        ''' list of AnaField'''
 36        return self.analysis.fields
 37
 38    @property
 39    def partitions(self):
 40        ''' list of partitions defined with index representation (AnaDataset method)'''
 41        return self.analysis.partitions('index')
 42
 43    @property
 44    def complete(self):
 45        ''' complete property of the dataset (AnaDataset method)'''
 46        return self.analysis.complete
 47
 48    @property
 49    def dimension(self):
 50        ''' dimension of the dataset (AnaDataset method)'''
 51        return self.analysis.dimension
 52
 53    @property
 54    def lvarname(self):
 55        ''' list of variable Field name (AnaDataset method)'''
 56        return Util.view(self.analysis.variable, mode='id')
 57
 58    @property
 59    def primaryname(self):
 60        ''' list of primary name (AnaDataset method)'''
 61        return Util.view(self.analysis.primary, mode='id')
 62
 63    @property
 64    def secondaryname(self):
 65        ''' list of secondary name (AnaDataset method)'''
 66        return Util.view(self.analysis.secondary, mode='id')
 67
 68
 69# %% methods
 70
 71    def indexinfos(self, keys=None):
 72        '''return a dict with infos of each index (AnaDataset method) :
 73            
 74        - num, name, cat, diffdistparent, child, parent, distparent,
 75        crossed, pparent, rateder (struct info)
 76        - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
 77
 78        *Parameters*
 79
 80        - **keys** : string, list or tuple (default None) - list of attributes
 81        to returned.
 82        if 'all' or None, all attributes are returned.
 83        if 'struct', only structural attributes are returned.
 84
 85        *Returns* : dict'''
 86        return self.analysis.to_dict(mode='index', keys=keys)
 87
 88    def field_partition(self, partition=None, mode='index'):
 89        '''return a partition dict with the list of primary, secondary, unique
 90        and variable fields (index).
 91
 92         *Parameters*
 93
 94        - **partition** : list (default None) - if None, partition is the first
 95        - **mode** : str (default 'index') - Field representation ('id', 'index')
 96        '''
 97        if not partition and len(self.partitions) > 0:
 98            partition = self.partitions[0]
 99        part = [self.analysis.dfield(fld)
100                for fld in partition] if partition else None
101        return self.analysis.field_partition(mode=mode, partition=part,
102                                             distributed=True)
103
104    def relation(self, fld1, fld2):
105        '''relationship between two fields (AnaDataset method)'''
106        return self.analysis.get_relation(fld1, fld2)
107
108    def tree(self, mode='derived', width=5, lname=20, string=True):
109        '''return a string with a tree of derived Field (AnaDataset method).
110
111         *Parameters*
112
113        - **lname** : integer (default 20) - length of the names
114        - **width** : integer (default 5) - length of the lines
115        - **string** : boolean (default True) - if True return str else return dict
116        - **mode** : string (default 'derived') - kind of tree :
117            'derived' : derived tree
118            'distance': min distance tree
119            'distomin': min distomin tree
120        '''
121        return self.analysis.tree(mode=mode, width=width, lname=lname, string=string)
122
123    def indicator(self, fullsize=None, size=None):
124        '''generate size indicators: ol (object lightness), ul (unicity level),
125        gain (sizegain)
126
127        *Parameters*
128
129        - **fullsize** : int (default none) - size with full codec
130        - **size** : int (default none) - size with existing codec
131
132        *Returns* : dict'''
133        if not fullsize:
134            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
135        if not size:
136            size = len(self.to_obj(encoded=True))
137        return self.analysis.indicator(fullsize, size)

This class is the Cdataset interface class with the tab_analysis module.

analysis

The analysis attribute is associated to the AnaDataset object

anafields

list of AnaField

partitions

list of partitions defined with index representation (AnaDataset method)

complete

complete property of the dataset (AnaDataset method)

dimension

dimension of the dataset (AnaDataset method)

lvarname

list of variable Field name (AnaDataset method)

primaryname

list of primary name (AnaDataset method)

secondaryname

list of secondary name (AnaDataset method)

def indexinfos(self, keys=None): View Source

71    def indexinfos(self, keys=None):
72        '''return a dict with infos of each index (AnaDataset method) :
73            
74        - num, name, cat, diffdistparent, child, parent, distparent,
75        crossed, pparent, rateder (struct info)
76        - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
77
78        *Parameters*
79
80        - **keys** : string, list or tuple (default None) - list of attributes
81        to returned.
82        if 'all' or None, all attributes are returned.
83        if 'struct', only structural attributes are returned.
84
85        *Returns* : dict'''
86        return self.analysis.to_dict(mode='index', keys=keys)

return a dict with infos of each index (AnaDataset method) :

num, name, cat, diffdistparent, child, parent, distparent, crossed, pparent, rateder (struct info)
lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)

Parameters

keys : string, list or tuple (default None) - list of attributes to returned. if 'all' or None, all attributes are returned. if 'struct', only structural attributes are returned.

Returns : dict

def field_partition(self, partition=None, mode='index'): View Source

 88    def field_partition(self, partition=None, mode='index'):
 89        '''return a partition dict with the list of primary, secondary, unique
 90        and variable fields (index).
 91
 92         *Parameters*
 93
 94        - **partition** : list (default None) - if None, partition is the first
 95        - **mode** : str (default 'index') - Field representation ('id', 'index')
 96        '''
 97        if not partition and len(self.partitions) > 0:
 98            partition = self.partitions[0]
 99        part = [self.analysis.dfield(fld)
100                for fld in partition] if partition else None
101        return self.analysis.field_partition(mode=mode, partition=part,
102                                             distributed=True)

return a partition dict with the list of primary, secondary, unique and variable fields (index).

Parameters

partition : list (default None) - if None, partition is the first
mode : str (default 'index') - Field representation ('id', 'index')

def relation(self, fld1, fld2): View Source

104    def relation(self, fld1, fld2):
105        '''relationship between two fields (AnaDataset method)'''
106        return self.analysis.get_relation(fld1, fld2)

relationship between two fields (AnaDataset method)

def tree(self, mode='derived', width=5, lname=20, string=True): View Source

108    def tree(self, mode='derived', width=5, lname=20, string=True):
109        '''return a string with a tree of derived Field (AnaDataset method).
110
111         *Parameters*
112
113        - **lname** : integer (default 20) - length of the names
114        - **width** : integer (default 5) - length of the lines
115        - **string** : boolean (default True) - if True return str else return dict
116        - **mode** : string (default 'derived') - kind of tree :
117            'derived' : derived tree
118            'distance': min distance tree
119            'distomin': min distomin tree
120        '''
121        return self.analysis.tree(mode=mode, width=width, lname=lname, string=string)

return a string with a tree of derived Field (AnaDataset method).

Parameters

lname : integer (default 20) - length of the names
width : integer (default 5) - length of the lines
string : boolean (default True) - if True return str else return dict
mode : string (default 'derived') - kind of tree : 'derived' : derived tree 'distance': min distance tree 'distomin': min distomin tree

def indicator(self, fullsize=None, size=None): View Source

123    def indicator(self, fullsize=None, size=None):
124        '''generate size indicators: ol (object lightness), ul (unicity level),
125        gain (sizegain)
126
127        *Parameters*
128
129        - **fullsize** : int (default none) - size with full codec
130        - **size** : int (default none) - size with existing codec
131
132        *Returns* : dict'''
133        if not fullsize:
134            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
135        if not size:
136            size = len(self.to_obj(encoded=True))
137        return self.analysis.indicator(fullsize, size)

generate size indicators: ol (object lightness), ul (unicity level), gain (sizegain)

Parameters

fullsize : int (default none) - size with full codec
size : int (default none) - size with existing codec

Returns : dict

class DatasetError(builtins.Exception): View Source

551class DatasetError(Exception):
552    # %% errors
553    ''' Dataset Exception'''
554    # pass

Dataset Exception

Inherited Members

builtins.Exception: Exception
builtins.BaseException: with_traceback