tab-dataset.tab_dataset.cdataset

The cdataset module is part of the tab-dataset package.

It contains the classes DatasetAnalysis, Cdataset for Dataset entities.

For more information, see the user guide or the github repository.

View Source

  1# -*- coding: utf-8 -*-
  2"""
  3The `cdataset` module is part of the `tab-dataset` package.
  4
  5It contains the classes `DatasetAnalysis`, `Cdataset` for Dataset entities.
  6
  7For more information, see the 
  8[user guide](https://loco-philippe.github.io/tab-dataset/docs/user_guide.html) 
  9or the [github repository](https://github.com/loco-philippe/tab-dataset).
 10"""
 11from copy import copy
 12
 13from tab_dataset.cfield import Cfield, Cutil
 14
 15from json_ntv.ntv import Ntv
 16from json_ntv.ntv_util import NtvUtil, NtvConnector
 17
 18from tab_analysis.analysis import AnaDataset, Util
 19
 20
 21class DatasetAnalysis:
 22    '''This class is the Cdataset interface class with the tab_analysis module.'''
 23
 24# %% property
 25    @property
 26    def analysis(self):
 27        '''The analysis attribute is associated to the AnaDataset object'''
 28        if self._analysis is None or self._analysis.hashd != self._hashd:
 29            self._analysis = AnaDataset(self.to_analysis(True))
 30        return self._analysis
 31
 32    @property
 33    def anafields(self):
 34        ''' list of AnaField'''
 35        return self.analysis.fields
 36
 37    @property
 38    def partitions(self):
 39        ''' list of partitions defined with index representation (AnaDataset method)'''
 40        return self.analysis.partitions('index')
 41
 42    @property
 43    def complete(self):
 44        ''' complete property of the dataset (AnaDataset method)'''
 45        return self.analysis.complete
 46
 47    @property
 48    def dimension(self):
 49        ''' dimension of the dataset (AnaDataset method)'''
 50        return self.analysis.dimension
 51
 52    @property
 53    def lvarname(self):
 54        ''' list of variable Field name (AnaDataset method)'''
 55        return Util.view(self.analysis.variable, mode='id')
 56
 57    @property
 58    def primaryname(self):
 59        ''' list of primary name (AnaDataset method)'''
 60        return Util.view(self.analysis.primary, mode='id')
 61
 62    @property
 63    def secondaryname(self):
 64        ''' list of secondary name (AnaDataset method)'''
 65        return Util.view(self.analysis.secondary, mode='id')
 66
 67
 68# %% methods
 69
 70    def indexinfos(self, keys=None):
 71        '''return a dict with infos of each index (AnaDataset method) :
 72            
 73        - num, name, cat, diffdistparent, child, parent, distparent,
 74        crossed, pparent, rateder (struct info)
 75        - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
 76
 77        *Parameters*
 78
 79        - **keys** : string, list or tuple (default None) - list of attributes
 80        to returned.
 81        if 'all' or None, all attributes are returned.
 82        if 'struct', only structural attributes are returned.
 83
 84        *Returns* : dict'''
 85        return self.analysis.to_dict(mode='index', keys=keys)
 86
 87    def field_partition(self, partition=None, mode='index'):
 88        '''return a partition dict with the list of primary, secondary, unique
 89        and variable fields (index).
 90
 91         *Parameters*
 92
 93        - **partition** : list (default None) - if None, partition is the first
 94        - **mode** : str (default 'index') - Field representation ('id', 'index')
 95        '''
 96        if not partition and len(self.partitions) > 0:
 97            partition = self.partitions[0]
 98        part = [self.analysis.dfield(fld)
 99                for fld in partition] if partition else None
100        return self.analysis.field_partition(mode=mode, partition=part,
101                                             distributed=True)
102
103    def relation(self, fld1, fld2):
104        '''relationship between two fields (AnaDataset method)'''
105        return self.analysis.get_relation(fld1, fld2)
106
107    def tree(self, mode='derived', width=5, lname=20, string=True):
108        '''return a string with a tree of derived Field (AnaDataset method).
109
110         *Parameters*
111
112        - **lname** : integer (default 20) - length of the names
113        - **width** : integer (default 5) - length of the lines
114        - **string** : boolean (default True) - if True return str else return dict
115        - **mode** : string (default 'derived') - kind of tree :
116            'derived' : derived tree
117            'distance': min distance tree
118            'distomin': min distomin tree
119        '''
120        return self.analysis.tree(mode=mode, width=width, lname=lname, string=string)
121
122    def indicator(self, fullsize=None, size=None):
123        '''generate size indicators: ol (object lightness), ul (unicity level),
124        gain (sizegain)
125
126        *Parameters*
127
128        - **fullsize** : int (default none) - size with full codec
129        - **size** : int (default none) - size with existing codec
130
131        *Returns* : dict'''
132        if not fullsize:
133            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
134        if not size:
135            size = len(self.to_obj(encoded=True))
136        return self.analysis.indicator(fullsize, size)
137
138
139class Cdataset(DatasetAnalysis):
140    # %% magic
141    '''
142    A `Cdataset` is a representation of a tabular data.
143
144    *Attributes (for @property see methods)* :
145
146    - **lindex** : list of Field
147    - **name** : name of the Cdataset
148    - **_analysis** : AnaDataset object
149
150    The methods defined in this class are :
151
152    *constructor (@classmethod)*
153
154    - `Cdataset.ntv`
155    - `Cdataset.from_ntv`
156
157    *dynamic value - module analysis (getters @property)*
158
159    - `DatasetAnalysis.analysis`
160    - `DatasetAnalysis.anafields`
161    - `DatasetAnalysis.lvarname`
162    - `DatasetAnalysis.partitions`
163    - `DatasetAnalysis.primaryname`
164    - `DatasetAnalysis.secondaryname`
165    - `DatasetAnalysis.complete`
166    - `DatasetAnalysis.dimension`
167
168    *selecting - infos methods (module analysis)*
169
170    - `DatasetAnalysis.field_partition`
171    - `DatasetAnalysis.indexinfos`
172    - `DatasetAnalysis.indicator`
173    - `DatasetAnalysis.relation`
174    - `DatasetAnalysis.tree`
175
176    *dynamic value (getters @property)*
177
178    - `Cdataset.keys`
179    - `Cdataset.iindex`
180    - `Cdataset.indexlen`
181    - `Cdataset.lenindex`
182    - `Cdataset.lname`
183    - `Cdataset.lunicname`
184    - `Cdataset.lunicrow`
185    - `Cdataset.tiindex`
186
187    *add - update methods (`observation.dataset_structure.DatasetStructure`)*
188
189    - `Cdataset.add`
190    - `Cdataset.delindex`
191    - `Cdataset.renameindex`
192    - `Cdataset.setname`
193
194    *structure management - methods (`observation.dataset_structure.DatasetStructure`)*
195
196    - `Cdataset.check_relation`
197    - `Cdataset.check_relationship`
198    - `Cdataset.nindex`
199    - `Cdataset.reindex`
200    - `Cdataset.reorder`
201    - `Cdataset.swapindex`
202    - `Cdataset.to_analysis`
203    '''
204    field_class = Cfield
205
206    def __init__(self, listidx=None, name=None, reindex=True):
207        '''
208        Dataset constructor.
209
210        *Parameters*
211
212        - **listidx** :  list (default None) - list of Field data
213        - **name** :  string (default None) - name of the dataset
214        - **reindex** : boolean (default True) - if True, default codec for each Field'''
215
216        if isinstance(listidx, Cdataset):
217            self.lindex = [copy(idx) for idx in listidx.lindex]
218            self.name = name if name else listidx.name
219            self._analysis = listidx._analysis
220            return
221        if listidx.__class__.__name__ == 'DataFrame':
222            lindex = NtvConnector.connector(
223            )['DataFrameConnec'].to_listidx(listidx)[0]
224            #listidx = [Cfield(field['codec'], field['name'], field['keys'])
225            listidx = [self.field_class(field['codec'], field['name'], field['keys'])
226                       for field in lindex]
227        self.name = name
228        self.lindex = [] if listidx is None else listidx
229        if reindex:
230            self.reindex()
231        self._analysis = None
232        return
233
234    def __repr__(self):
235        '''return classname, number of value and number of indexes'''
236        return self.__class__.__name__ + '[' + str(len(self)) + ', ' + str(self.lenindex) + ']'
237
238    def __str__(self):
239        '''return string format for var and lidx'''
240        stri = ''
241        stri += 'fields :\n'
242        for idx in self.lindex:
243            stri += '    ' + str(idx) + '\n'
244        return stri
245
246    def __len__(self):
247        ''' len of values'''
248        if not self.lindex:
249            return 0
250        return len(self.lindex[0])
251
252    def __contains__(self, item):
253        ''' list of lindex values'''
254        return item in self.lindex
255
256    def __getitem__(self, ind):
257        ''' return value record (value conversion)'''
258        res = [idx[ind] for idx in self.lindex]
259        if len(res) == 1:
260            return res[0]
261        return res
262
263    def __setitem__(self, ind, item):
264        ''' modify the Field values for each Field at the row ind'''
265        if not isinstance(item, list):
266            item = [item]
267        for val, idx in zip(item, self.lindex):
268            idx[ind] = val
269
270    def __delitem__(self, ind):
271        ''' remove all Field item at the row ind'''
272        for idx in self.lindex:
273            del idx[ind]
274
275    def __hash__(self):
276        '''return hash of all hash(Field)'''
277        #return hash(tuple(hash(idx) for idx in self.lindex))
278        return sum(hash(idx) for idx in self.lindex)
279
280    def __eq__(self, other):
281        ''' equal if hash values are equal'''
282        return hash(self) == hash(other)
283
284    def __copy__(self):
285        ''' Copy all the data '''
286        return self.__class__(self)
287
288# %% property
289    @property
290    def _hashd(self):
291        '''return hash of all hashf(Field)'''
292        # return sum([idx._hashi() for idx in self.lindex])
293        return hash(tuple(fld.hashf for fld in self.lindex))
294
295    @property
296    def indexlen(self):
297        ''' list of index codec length'''
298        return [len(idx.codec) for idx in self.lindex]
299
300    @property
301    def iindex(self):
302        ''' list of keys for each index'''
303        return [idx.keys for idx in self.lindex]
304
305    @property
306    def keys(self):
307        ''' list of keys for each index'''
308        return [idx.keys for idx in self.lindex]
309
310    @property
311    def lenindex(self):
312        ''' number of indexes'''
313        return len(self.lindex)
314
315    @property
316    def lunicname(self):
317        ''' list of unique index name'''
318        return [idx.name for idx in self.lindex if len(idx.codec) == 1]
319
320    @property
321    def lunicrow(self):
322        '''list of unic idx row'''
323        return [self.lname.index(name) for name in self.lunicname]
324
325    @property
326    def lname(self):
327        ''' list of index name'''
328        return [idx.name for idx in self.lindex]
329
330    @property
331    def tiindex(self):
332        ''' list of keys for each record'''
333        return Cutil.list(list(zip(*self.iindex)))
334
335# %%methods
336
337    @classmethod
338    def ntv(cls, ntv_value, reindex=True, fast=False):
339        '''Generate an Dataset Object from a ntv_value
340
341        *Parameters*
342
343        - **ntv_value** : bytes, string, Ntv object to convert
344        - **reindex** : boolean (default True) - if True, default codec for each Field
345        - **fast** : boolean (default False) - if True, ntv_value are not converted in json-value'''
346        return cls.from_ntv(ntv_value, reindex=reindex, fast=fast)
347
348    @classmethod
349    def from_ntv(cls, ntv_value, reindex=True, decode_str=False, fast=False):
350        '''Generate a Dataset Object from a ntv_value
351
352        *Parameters*
353
354        - **ntv_value** : bytes, string, Ntv object to convert
355        - **reindex** : boolean (default True) - if True, default codec for each Field
356        - **decode_str**: boolean (default False) - if True, string are loaded in json data
357        - **fast** : boolean (default False) - if True, ntv_value are not converted in json-value'''
358        ntv = Ntv.obj(ntv_value, decode_str=decode_str, fast=fast)
359        if len(ntv) == 0:
360            return cls()
361        lidx = [list(NtvUtil.decode_ntv_tab(
362            ntvf, cls.field_class.ntv_to_val)) for ntvf in ntv]
363        leng = max(idx[6] for idx in lidx)
364        for ind in range(len(lidx)):
365            if lidx[ind][0] == '':
366                lidx[ind][0] = 'i'+str(ind)
367            NtvConnector.init_ntv_keys(ind, lidx, leng)
368        lindex = [cls.field_class(idx[2], idx[0], idx[4], None,  # idx[1] pour le type,
369                                  reindex=reindex) for idx in lidx]
370        return cls(lindex, reindex=reindex, name=ntv.name)
371
372    def add(self, other, name=False, solve=True):
373        ''' Add other's values to self's values for each index
374
375        *Parameters*
376
377        - **other** : Dataset object to add to self object
378        - **name** : Boolean (default False) - Add values with same index name (True) or
379        same index row (False)
380        - **solve** : Boolean (default True) - If True, replace None other's codec value
381        with self codec value.
382
383        *Returns* : self '''
384        if self.lenindex != other.lenindex:
385            raise DatasetError('length are not identical')
386        if name and sorted(self.lname) != sorted(other.lname):
387            raise DatasetError('name are not identical')
388        for i in range(self.lenindex):
389            if name:
390                self.lindex[i].add(other.lindex[other.lname.index(self.lname[i])],
391                                   solve=solve)
392            else:
393                self.lindex[i].add(other.lindex[i], solve=solve)
394        return self
395
396    def to_analysis(self, distr=False):
397        '''return a dict with data used in AnaDataset module
398
399        *Parameters*
400
401        - **distr** : Boolean (default False) - If True, add distr information'''
402        return {'name': self.name, 'fields': [fld.to_analysis for fld in self.lindex],
403                'length': len(self), 'hashd': self._hashd,
404                'relations': {self.lindex[i].name:
405                              {self.lindex[j].name: Cutil.dist(
406                                  self.lindex[i].keys, self.lindex[j].keys, distr)
407                               for j in range(i+1, len(self.lindex))}
408                              for i in range(len(self.lindex)-1)}
409                }
410
411    def reindex(self):
412        '''Calculate a new default codec for each index (Return self)'''
413        for idx in self.lindex:
414            idx.reindex()
415        return self
416
417    def delindex(self, delname=None, savename=None):
418        '''remove an Field or a list of Field.
419
420        *Parameters*
421
422        - **delname** : string or list of string - name of index to remove
423        - **savename** : string or list of string - name of index to keep
424
425        *Returns* : none '''
426        if not delname and not savename:
427            return
428        if isinstance(delname, str):
429            delname = [delname]
430        if isinstance(savename, str):
431            savename = [savename]
432        if delname and savename:
433            delname = [name for name in delname if not name in savename]
434        if not delname:
435            delname = [name for name in self.lname if not name in savename]
436        for idxname in delname:
437            if idxname in self.lname:
438                self.lindex.pop(self.lname.index(idxname))
439
440    def nindex(self, name):
441        ''' index with name equal to attribute name'''
442        if name in self.lname:
443            return self.lindex[self.lname.index(name)]
444        return None
445
446    def renameindex(self, oldname, newname):
447        '''replace an index name 'oldname' by a new one 'newname'. '''
448        for i in range(self.lenindex):
449            if self.lname[i] == oldname:
450                self.lindex[i].setname(newname)
451        for i in range(len(self.lvarname)):
452            if self.lvarname[i] == oldname:
453                self.lvarname[i] = newname
454
455    def reorder(self, recorder=None):
456        '''Reorder records in the order define by 'recorder' '''
457        if recorder is None or set(recorder) != set(range(len(self))):
458            return None
459        for idx in self.lindex:
460            idx.set_keys([idx.keys[i] for i in recorder])
461        return None
462
463    def setname(self, listname=None):
464        '''Update Field name by the name in listname'''
465        for i in range(min(self.lenindex, len(listname))):
466            self.lindex[i].name = listname[i]
467
468    def swapindex(self, order):
469        '''
470        Change the order of the index .
471
472        *Parameters*
473
474        - **order** : list of int or list of name - new order of index to apply.
475
476        *Returns* : self '''
477        if self.lenindex != len(order):
478            raise DatasetError('length of order and Dataset different')
479        if not order or isinstance(order[0], int):
480            self.lindex = [self.lindex[ind] for ind in order]
481        elif isinstance(order[0], str):
482            self.lindex = [self.nindex(name) for name in order]
483        return self
484
485    def check_relation(self, parent, field, typecoupl, value=True):
486        '''get the inconsistent records for a relationship
487
488         *Parameters*
489
490        - **field** : int or str - index or name of the field involved in the relation
491        - **parent**: int or str - index or name of the second field involved in the relation
492        - **typecoupl**: str - relationship to check ('derived' or 'coupled')
493        - **value**: boolean (default True) - if True return a dict with inconsistent
494        values of the fields, else a tuple with index of records)
495
496        *Returns* :
497
498        - dict with inconsistent values of the fields
499        - or a tuple with index of records'''
500        f_parent = copy(self.nindex(parent) if isinstance(parent, str)
501                                            else self.lindex[parent])
502        f_field = copy(self.nindex(field) if isinstance(field, str)
503                                          else self.lindex[field])
504        return Cfield.check_relation(f_parent, f_field, typecoupl, value)
505
506    def check_relationship(self, relations):
507        '''get the inconsistent records for each relationship defined in relations
508
509         *Parameters*
510
511        - **relations** : list of dict or single dict - list of fields with relationship property
512
513        *Returns* :
514
515        - dict with for each relationship: key = string with the two fields name,
516        and value = list of inconsistent records
517        - or if single relationship : value'''
518        if not isinstance(relations, (list, dict)):
519            raise DatasetError("relations is not correct")
520        if isinstance(relations, dict):
521            relations = [relations]
522        dic_res = {}
523        for field in relations:
524            if not 'relationship' in field or not 'name' in field:
525                continue
526            if not 'parent' in field['relationship'] or not 'link' in field['relationship']:
527                raise DatasetError("relationship is not correct")
528            rel = field['relationship']['link']
529            f_parent = field['relationship']['parent']
530            f_field = field['name']
531            name_rel = f_field + ' - ' + f_parent
532            if self.nindex(f_parent) is None or self.nindex(f_field) is None:
533                raise DatasetError("field's name is not present in data")
534            dic_res[name_rel] = self.check_relation(f_parent, f_field, rel, False)
535        if len(dic_res) == 1:
536            return list(dic_res.values())[0]
537        return dic_res
538
539
540class DatasetError(Exception):
541    # %% errors
542    ''' Dataset Exception'''
543    # pass

class DatasetAnalysis: View Source

 22class DatasetAnalysis:
 23    '''This class is the Cdataset interface class with the tab_analysis module.'''
 24
 25# %% property
 26    @property
 27    def analysis(self):
 28        '''The analysis attribute is associated to the AnaDataset object'''
 29        if self._analysis is None or self._analysis.hashd != self._hashd:
 30            self._analysis = AnaDataset(self.to_analysis(True))
 31        return self._analysis
 32
 33    @property
 34    def anafields(self):
 35        ''' list of AnaField'''
 36        return self.analysis.fields
 37
 38    @property
 39    def partitions(self):
 40        ''' list of partitions defined with index representation (AnaDataset method)'''
 41        return self.analysis.partitions('index')
 42
 43    @property
 44    def complete(self):
 45        ''' complete property of the dataset (AnaDataset method)'''
 46        return self.analysis.complete
 47
 48    @property
 49    def dimension(self):
 50        ''' dimension of the dataset (AnaDataset method)'''
 51        return self.analysis.dimension
 52
 53    @property
 54    def lvarname(self):
 55        ''' list of variable Field name (AnaDataset method)'''
 56        return Util.view(self.analysis.variable, mode='id')
 57
 58    @property
 59    def primaryname(self):
 60        ''' list of primary name (AnaDataset method)'''
 61        return Util.view(self.analysis.primary, mode='id')
 62
 63    @property
 64    def secondaryname(self):
 65        ''' list of secondary name (AnaDataset method)'''
 66        return Util.view(self.analysis.secondary, mode='id')
 67
 68
 69# %% methods
 70
 71    def indexinfos(self, keys=None):
 72        '''return a dict with infos of each index (AnaDataset method) :
 73            
 74        - num, name, cat, diffdistparent, child, parent, distparent,
 75        crossed, pparent, rateder (struct info)
 76        - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
 77
 78        *Parameters*
 79
 80        - **keys** : string, list or tuple (default None) - list of attributes
 81        to returned.
 82        if 'all' or None, all attributes are returned.
 83        if 'struct', only structural attributes are returned.
 84
 85        *Returns* : dict'''
 86        return self.analysis.to_dict(mode='index', keys=keys)
 87
 88    def field_partition(self, partition=None, mode='index'):
 89        '''return a partition dict with the list of primary, secondary, unique
 90        and variable fields (index).
 91
 92         *Parameters*
 93
 94        - **partition** : list (default None) - if None, partition is the first
 95        - **mode** : str (default 'index') - Field representation ('id', 'index')
 96        '''
 97        if not partition and len(self.partitions) > 0:
 98            partition = self.partitions[0]
 99        part = [self.analysis.dfield(fld)
100                for fld in partition] if partition else None
101        return self.analysis.field_partition(mode=mode, partition=part,
102                                             distributed=True)
103
104    def relation(self, fld1, fld2):
105        '''relationship between two fields (AnaDataset method)'''
106        return self.analysis.get_relation(fld1, fld2)
107
108    def tree(self, mode='derived', width=5, lname=20, string=True):
109        '''return a string with a tree of derived Field (AnaDataset method).
110
111         *Parameters*
112
113        - **lname** : integer (default 20) - length of the names
114        - **width** : integer (default 5) - length of the lines
115        - **string** : boolean (default True) - if True return str else return dict
116        - **mode** : string (default 'derived') - kind of tree :
117            'derived' : derived tree
118            'distance': min distance tree
119            'distomin': min distomin tree
120        '''
121        return self.analysis.tree(mode=mode, width=width, lname=lname, string=string)
122
123    def indicator(self, fullsize=None, size=None):
124        '''generate size indicators: ol (object lightness), ul (unicity level),
125        gain (sizegain)
126
127        *Parameters*
128
129        - **fullsize** : int (default none) - size with full codec
130        - **size** : int (default none) - size with existing codec
131
132        *Returns* : dict'''
133        if not fullsize:
134            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
135        if not size:
136            size = len(self.to_obj(encoded=True))
137        return self.analysis.indicator(fullsize, size)

This class is the Cdataset interface class with the tab_analysis module.

analysis

The analysis attribute is associated to the AnaDataset object

anafields

list of AnaField

partitions

list of partitions defined with index representation (AnaDataset method)

complete

complete property of the dataset (AnaDataset method)

dimension

dimension of the dataset (AnaDataset method)

lvarname

list of variable Field name (AnaDataset method)

primaryname

list of primary name (AnaDataset method)

secondaryname

list of secondary name (AnaDataset method)

def indexinfos(self, keys=None): View Source

71    def indexinfos(self, keys=None):
72        '''return a dict with infos of each index (AnaDataset method) :
73            
74        - num, name, cat, diffdistparent, child, parent, distparent,
75        crossed, pparent, rateder (struct info)
76        - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
77
78        *Parameters*
79
80        - **keys** : string, list or tuple (default None) - list of attributes
81        to returned.
82        if 'all' or None, all attributes are returned.
83        if 'struct', only structural attributes are returned.
84
85        *Returns* : dict'''
86        return self.analysis.to_dict(mode='index', keys=keys)

return a dict with infos of each index (AnaDataset method) :

num, name, cat, diffdistparent, child, parent, distparent, crossed, pparent, rateder (struct info)
lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)

Parameters

keys : string, list or tuple (default None) - list of attributes to returned. if 'all' or None, all attributes are returned. if 'struct', only structural attributes are returned.

Returns : dict

def field_partition(self, partition=None, mode='index'): View Source

 88    def field_partition(self, partition=None, mode='index'):
 89        '''return a partition dict with the list of primary, secondary, unique
 90        and variable fields (index).
 91
 92         *Parameters*
 93
 94        - **partition** : list (default None) - if None, partition is the first
 95        - **mode** : str (default 'index') - Field representation ('id', 'index')
 96        '''
 97        if not partition and len(self.partitions) > 0:
 98            partition = self.partitions[0]
 99        part = [self.analysis.dfield(fld)
100                for fld in partition] if partition else None
101        return self.analysis.field_partition(mode=mode, partition=part,
102                                             distributed=True)

return a partition dict with the list of primary, secondary, unique and variable fields (index).

Parameters

partition : list (default None) - if None, partition is the first
mode : str (default 'index') - Field representation ('id', 'index')

def relation(self, fld1, fld2): View Source

104    def relation(self, fld1, fld2):
105        '''relationship between two fields (AnaDataset method)'''
106        return self.analysis.get_relation(fld1, fld2)

relationship between two fields (AnaDataset method)

def tree(self, mode='derived', width=5, lname=20, string=True): View Source

108    def tree(self, mode='derived', width=5, lname=20, string=True):
109        '''return a string with a tree of derived Field (AnaDataset method).
110
111         *Parameters*
112
113        - **lname** : integer (default 20) - length of the names
114        - **width** : integer (default 5) - length of the lines
115        - **string** : boolean (default True) - if True return str else return dict
116        - **mode** : string (default 'derived') - kind of tree :
117            'derived' : derived tree
118            'distance': min distance tree
119            'distomin': min distomin tree
120        '''
121        return self.analysis.tree(mode=mode, width=width, lname=lname, string=string)

return a string with a tree of derived Field (AnaDataset method).

Parameters

lname : integer (default 20) - length of the names
width : integer (default 5) - length of the lines
string : boolean (default True) - if True return str else return dict
mode : string (default 'derived') - kind of tree : 'derived' : derived tree 'distance': min distance tree 'distomin': min distomin tree

def indicator(self, fullsize=None, size=None): View Source

123    def indicator(self, fullsize=None, size=None):
124        '''generate size indicators: ol (object lightness), ul (unicity level),
125        gain (sizegain)
126
127        *Parameters*
128
129        - **fullsize** : int (default none) - size with full codec
130        - **size** : int (default none) - size with existing codec
131
132        *Returns* : dict'''
133        if not fullsize:
134            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
135        if not size:
136            size = len(self.to_obj(encoded=True))
137        return self.analysis.indicator(fullsize, size)

generate size indicators: ol (object lightness), ul (unicity level), gain (sizegain)

Parameters

fullsize : int (default none) - size with full codec
size : int (default none) - size with existing codec

Returns : dict

class DatasetError(builtins.Exception): View Source

541class DatasetError(Exception):
542    # %% errors
543    ''' Dataset Exception'''
544    # pass

Dataset Exception

Inherited Members

builtins.Exception: Exception
builtins.BaseException: with_traceback