python.observation.dataset_structure

Created on Sun Oct 2 22:24:59 2022

@author: philippe@loco-labs.io

The python.observation.dataset_structure module contains the DatasetStructure class (python.observation.dataset.Dataset methods).

  1# -*- coding: utf-8 -*-
  2"""
  3Created on Sun Oct  2 22:24:59 2022
  4
  5@author: philippe@loco-labs.io
  6
  7The `python.observation.dataset_structure` module contains the `DatasetStructure` class
  8(`python.observation.dataset.Dataset` methods).
  9"""
 10
 11# %% declarations
 12from copy import copy
 13
 14from observation.esconstante import ES
 15from observation.field import Field
 16from observation.util import util
 17from observation.dataset_interface import DatasetError
 18from observation.fields import Sfield
 19
 20
 21class DatasetStructure:
 22    '''this class includes Dataset methods :
 23
 24    *selecting - infos methods*
 25
 26    - `DatasetStructure.couplingmatrix`
 27    - `DatasetStructure.idxrecord`
 28    - `DatasetStructure.indexinfos`
 29    - `DatasetStructure.indicator`
 30    - `DatasetStructure.iscanonorder`
 31    - `DatasetStructure.isinrecord`
 32    - `DatasetStructure.keytoval`
 33    - `DatasetStructure.loc`
 34    - `DatasetStructure.nindex`
 35    - `DatasetStructure.record`
 36    - `DatasetStructure.recidx`
 37    - `DatasetStructure.recvar`
 38    - `DatasetStructure.tree`
 39    - `DatasetStructure.valtokey`
 40
 41    *add - update methods*
 42
 43    - `DatasetStructure.add`
 44    - `DatasetStructure.addindex`
 45    - `DatasetStructure.append`
 46    - `DatasetStructure.delindex`
 47    - `DatasetStructure.delrecord`
 48    - `DatasetStructure.orindex`
 49    - `DatasetStructure.renameindex`
 50    - `DatasetStructure.setvar`
 51    - `DatasetStructure.setname`
 52    - `DatasetStructure.updateindex`
 53
 54    *structure management - methods*
 55
 56    - `DatasetStructure.applyfilter`
 57    - `DatasetStructure.coupling`
 58    - `DatasetStructure.full`
 59    - `DatasetStructure.getduplicates`
 60    - `DatasetStructure.mix`
 61    - `DatasetStructure.merging`
 62    - `DatasetStructure.reindex`
 63    - `DatasetStructure.reorder`
 64    - `DatasetStructure.setfilter`
 65    - `DatasetStructure.sort`
 66    - `DatasetStructure.swapindex`
 67    - `DatasetStructure.setcanonorder`
 68    - `DatasetStructure.tostdcodec`
 69    '''
 70    # %% methods
 71
 72    def add(self, other, name=False, solve=True):
 73        ''' Add other's values to self's values for each index
 74
 75        *Parameters*
 76
 77        - **other** : Dataset object to add to self object
 78        - **name** : Boolean (default False) - Add values with same index name (True) or
 79        same index row (False)
 80        - **solve** : Boolean (default True) - If True, replace None other's codec value
 81        with self codec value.
 82
 83        *Returns* : self '''
 84        if self.lenindex != other.lenindex:
 85            raise DatasetError('length are not identical')
 86        if name and sorted(self.lname) != sorted(other.lname):
 87            raise DatasetError('name are not identical')
 88        for i in range(self.lenindex):
 89            if name:
 90                self.lindex[i].add(other.lindex[other.lname.index(self.lname[i])],
 91                                   solve=solve)
 92            else:
 93                self.lindex[i].add(other.lindex[i], solve=solve)
 94        return self
 95
 96    def addindex(self, index, first=False, merge=False, update=False):
 97        '''add a new index.
 98
 99        *Parameters*
100
101        - **index** : Field - index to add (can be index Ntv representation)
102        - **first** : If True insert index at the first row, else at the end
103        - **merge** : create a new index if merge is False
104        - **update** : if True, update actual values if index name is present (and merge is True)
105
106        *Returns* : none '''
107        idx = self.field.ntv(index)
108        idxname = self.lname
109        if len(idx) != len(self) and len(self) > 0:
110            raise DatasetError('sizes are different')
111        if not idx.name in idxname:
112            if first:
113                self.lindex.insert(0, idx)
114            else:
115                self.lindex.append(idx)
116        elif not merge:  # si idx.name in idxname
117            while idx.name in idxname:
118                idx.name += '(2)'
119            if first:
120                self.lindex.insert(0, idx)
121            else:
122                self.lindex.append(idx)
123        elif update:  # si merge et si idx.name in idxname
124            self.lindex[idxname.index(idx.name)].setlistvalue(idx.values)
125
126    def append(self, record, unique=False):
127        '''add a new record.
128
129        *Parameters*
130
131        - **record** :  list of new index values to add to Dataset
132        - **unique** :  boolean (default False) - Append isn't done if unique
133        is True and record present
134        
135        *Returns* : list - key record'''
136        if self.lenindex != len(record):
137            raise DatasetError('len(record) not consistent')
138        record = self.field.l_to_i(record)
139        if self.isinrecord(self.idxrecord(record), False) and unique:
140            return None
141        return [self.lindex[i].append(record[i]) for i in range(self.lenindex)]
142
143    def applyfilter(self, reverse=False, filtname=ES.filter, delfilter=True, inplace=True):
144        '''delete records with defined filter value.
145        Filter is deleted after record filtering.
146
147        *Parameters*
148
149        - **reverse** :  boolean (default False) - delete record with filter's 
150        value is reverse
151        - **filtname** : string (default ES.filter) - Name of the filter Field added
152        - **delfilter** :  boolean (default True) - If True, delete filter's Field
153        - **inplace** : boolean (default True) - if True, filter is apply to self,
154
155        *Returns* : self or new Dataset'''
156        if not filtname in self.lname:
157            return None
158        if inplace:
159            ilis = self
160        else:
161            ilis = copy(self)
162        ifilt = ilis.lname.index(filtname)       
163        if self.field.__name__ == 'Sfield':
164            ilis.sort([ifilt], reverse= not reverse, func=None)
165        else:
166            ilis.sort([ifilt], reverse=reverse, func=None)
167        lisind = ilis.lindex[ifilt].recordfromvalue(reverse)
168        if lisind:
169            minind = min(lisind)
170            for idx in ilis.lindex:
171                del idx.keys[minind:]
172        if inplace:
173            self.delindex(filtname)
174        else:
175            ilis.delindex(filtname)
176            if delfilter:
177                self.delindex(filtname)
178        ilis.reindex()
179        return ilis
180
181    def couplingmatrix(self, default=False, filename=None, att='ratecpl'):
182        '''return a matrix with coupling infos between each idx.
183        One info can be stored in a file (csv format).
184
185        *Parameters*
186
187        - **default** : comparison with default codec
188        - **filename** : string (default None) - name of the file to write the matrix
189        - **att** : string - name of the info to store in the file
190
191        *Returns* : array of array of dict'''
192        return self.analysis.getmatrix()
193
194    def coupling(self, derived=True, param='rateder', level=0.1):
195        '''Transform idx with low rate in coupled or derived indexes (codec extension).
196
197        *Parameters*
198
199        - **param** : string (default 'rateder') - coupling measurement 
200        ('rateder', 'diffdistparent', 'ratecpl', 'distance')
201        - **level** : float (default 0.1) - param threshold to apply coupling.
202        - **derived** : boolean (default : True). If True, indexes are derived, 
203        else coupled.
204
205        *Returns* : None'''
206        infos = self.indexinfos()
207        parent = {'rateder': 'distparent', 'diffdistparent': 'distparent',
208                  'ratecpl': 'minparent', 'distance': 'minparent'}
209        child = [None] * len(infos)
210        for idx in range(len(infos)):
211            iparent = infos[idx][parent[param]]
212            if iparent != -1:
213                if child[iparent] is None:
214                    child[iparent] = []
215                child[iparent].append(idx)
216        for idx in range(len(infos)):
217            self._couplingidx(idx, child, derived, param,
218                              parent[param], level, infos)
219
220    def _couplingidx(self, idx, child, derived, param, parentparam, level, infos):
221        ''' Field coupling (included childrens of the Field)'''
222        inf = infos[idx]
223        if inf['cat'] in ('coupled', 'unique') or inf[parentparam] == -1\
224                or inf[param] >= level or (derived and inf['cat'] == 'derived'):
225            return
226        if child[idx]:
227            for childidx in child[idx]:
228                self._couplingidx(childidx, child, derived,
229                                  param, parentparam, level, infos)
230        self.lindex[inf[parentparam]].coupling(self.lindex[idx], derived=derived,
231                                               duplicate=False)
232        return
233
234    def delrecord(self, record, extern=True):
235        '''remove a record.
236
237        *Parameters*
238
239        - **record** :  list - index values to remove to Dataset
240        - **extern** : if True, compare record values to external representation 
241        of self.value, else, internal
242
243        *Returns* : row deleted'''
244        self.reindex()
245        reckeys = self.valtokey(record, extern=extern)
246        if None in reckeys:
247            return None
248        row = self.tiindex.index(reckeys)
249        for idx in self:
250            del idx[row]
251        return row
252
253    def delindex(self, delname=None, savename=None):
254        '''remove an Field or a list of Field.
255
256        *Parameters*
257
258        - **delname** : string or list of string - name of index to remove
259        - **savename** : string or list of string - name of index to keep
260
261        *Returns* : none '''
262        if not delname and not savename :
263            return
264        if isinstance(delname, str):
265            delname = [delname]
266        if isinstance(savename, str):
267            savename = [savename]
268        if delname and savename:
269            delname = [name for name in delname if not name in savename]
270        if not delname:
271            delname = [name for name in self.lname if not name in savename]
272        for idxname in delname:
273            if idxname in self.lname:
274                self.lindex.pop(self.lname.index(idxname))
275
276    def _fullindex(self, ind, keysadd, indexname, varname, leng, fillvalue, fillextern):
277        if not varname:
278            varname = []
279        idx = self.lindex[ind]
280        lenadd = len(keysadd[0])
281        if len(idx) == leng:
282            return
283        inf = self.indexinfos()
284        if inf[ind]['cat'] == 'unique':
285            idx.set_keys(idx.keys + [0] * lenadd)
286        elif self.lname[ind] in indexname:
287            idx.set_keys(idx.keys + keysadd[indexname.index(self.lname[ind])])
288        elif inf[ind]['parent'] == -1 or self.lname[ind] in varname:
289            fillval = fillvalue
290            if fillextern:
291                fillval = self.field.s_to_i(fillvalue)
292            idx.set_keys(idx.keys + [len(idx.codec)] * len(keysadd[0]))
293            idx.set_codec(idx.codec + [fillval])
294        else:
295            parent = inf[ind]['parent']
296            if len(self.lindex[parent]) != leng:
297                self._fullindex(parent, keysadd, indexname, varname, leng,
298                                fillvalue, fillextern)
299            if inf[ind]['cat'] == 'coupled':
300                idx.tocoupled(self.lindex[parent], coupling=True)
301            else:
302                idx.tocoupled(self.lindex[parent], coupling=False)
303
304    def full(self, reindex=False, idxname=None, varname=None, fillvalue='-',
305             fillextern=True, inplace=True, complete=True):
306        '''tranform a list of indexes in crossed indexes (value extension).
307
308        *Parameters*
309
310        - **idxname** : list of string - name of indexes to transform
311        - **varname** : string - name of indexes to use
312        - **reindex** : boolean (default False) - if True, set default codec 
313        before transformation
314        - **fillvalue** : object value used for var extension
315        - **fillextern** : boolean(default True) - if True, fillvalue is converted 
316        to internal value
317        - **inplace** : boolean (default True) - if True, filter is apply to self,
318        - **complete** : boolean (default True) - if True, Field are ordered 
319        in canonical order
320
321        *Returns* : self or new Dataset'''
322        ilis = self if inplace else copy(self)
323        if not idxname:
324            idxname = ilis.primaryname
325        if reindex:
326            ilis.reindex()
327        keysadd = util.idxfull([ilis.nindex(name) for name in idxname])
328        if keysadd and len(keysadd) != 0:
329            lenadd = len(keysadd[0])
330            for ind in range(ilis.lenindex):
331                ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd,
332                                fillvalue, fillextern)
333        '''if not keysadd or len(keysadd) == 0:
334            return ilis
335        lenadd = len(keysadd[0])
336        for ind in range(ilis.lenindex):
337            ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd,
338                            fillvalue, fillextern)   '''     
339        if complete:
340            ilis.setcanonorder()
341        return ilis
342
343    def getduplicates(self, indexname=None, resindex=None, indexview=None):
344        '''check duplicate cod in a list of indexes. Result is add in a new 
345        index or returned.
346
347        *Parameters*
348
349        - **indexname** : list of string (default none) - name of indexes to check 
350        (if None, all Field)
351        - **resindex** : string (default None) - Add a new index named resindex 
352        with check result (False if duplicate)
353        - **indexview** : list of str (default None) - list of fields to return
354
355        *Returns* : list of int - list of rows with duplicate cod '''
356        if not indexname:
357            indexname = self.lname
358        duplicates = []
359        for name in indexname:
360            duplicates += self.nindex(name).getduplicates()
361        if resindex and isinstance(resindex, str):
362            newidx = self.field([True] * len(self), name=resindex)
363            for item in duplicates:
364                newidx[item] = False
365            self.addindex(newidx)
366        dupl = tuple(set(duplicates))
367        if not indexview:
368            return dupl
369        return [tuple(self.record(ind, indexview)) for ind in dupl]
370
371    def iscanonorder(self):
372        '''return True if primary indexes have canonical ordered keys'''
373        primary = self.primary
374        canonorder = util.canonorder(
375            [len(self.lidx[idx].codec) for idx in primary])
376        return canonorder == [self.lidx[idx].keys for idx in primary]
377
378    def isinrecord(self, record, extern=True):
379        '''Check if record is present in self.
380
381        *Parameters*
382
383        - **record** : list - value for each Field
384        - **extern** : if True, compare record values to external representation
385        of self.value, else, internal
386
387        *Returns boolean* : True if found'''
388        if extern:
389            return record in util.transpose(self.extidxext)
390        return record in util.transpose(self.extidx)
391
392    def idxrecord(self, record):
393        '''return rec array (without variable) from complete record (with variable)'''
394        return [record[self.lidxrow[i]] for i in range(len(self.lidxrow))]
395
396    def indexinfos(self, keys=None):
397        '''return a dict with infos of each index :
398            - num, name, cat, diffdistparent, child, parent, distparent, 
399            crossed, pparent, rateder (struct info)
400            - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
401
402        *Parameters*
403
404        - **keys** : string, list or tuple (default None) - list of attributes 
405        to returned.
406        if 'all' or None, all attributes are returned.
407        if 'struct', only structural attributes are returned.
408
409        *Returns* : dict'''
410        return self.analysis.getinfos(keys)
411
412    def indicator(self, fullsize=None, size=None):
413        '''generate size indicators: ol (object lightness), ul (unicity level), 
414        gain (sizegain)
415
416        *Parameters*
417
418        - **fullsize** : int (default none) - size with full codec
419        - **size** : int (default none) - size with existing codec
420        - **indexinfos** : list (default None) - indexinfos data
421
422        *Returns* : dict'''
423        if not fullsize:
424            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
425        if not size:
426            size = len(self.to_obj(encoded=True))
427        nval = len(self) * (self.lenindex + 1)
428        sval = fullsize / nval
429        ncod = sum(self.indexlen) + self.lenindex
430        if nval != ncod:
431            scod = (size - ncod * sval) / (nval - ncod)
432            olight = scod / sval
433        else:
434            olight = None
435        return {'total values': nval, 'mean size': round(sval, 3),
436                'unique values': ncod, 'mean coding size': round(scod, 3),
437                'unicity level': round(ncod / nval, 3),
438                'optimize level': round(size / fullsize, 3),
439                'object lightness': round(olight, 3),
440                'maxgain': round((nval - ncod) / nval, 3),
441                'gain': round((fullsize - size) / fullsize, 3)}
442
443    def keytoval(self, listkey, extern=True):
444        '''
445        convert a keys list (key for each index) to a values list (value for each index).
446
447        *Parameters*
448
449        - **listkey** : key for each index
450        - **extern** : boolean (default True) - if True, compare rec to val else to values
451
452        *Returns*
453
454        - **list** : value for each index'''
455        return [idx.keytoval(key, extern=extern) for idx, key in zip(self.lindex, listkey)]
456
457    def loc(self, rec, extern=True, row=False):
458        '''
459        Return record or row corresponding to a list of idx values.
460
461        *Parameters*
462
463        - **rec** : list - value for each idx
464        - **extern** : boolean (default True) - if True, compare rec to val,
465        else to values
466        - **row** : Boolean (default False) - if True, return list of row,
467        else list of records
468
469        *Returns*
470
471        - **object** : variable value or None if not found'''
472        locrow = None
473        try:
474            if len(rec) == self.lenindex:
475                locrow = list(set.intersection(*[set(self.lindex[i].loc(rec[i], extern))
476                                               for i in range(self.lenindex)]))
477            elif len(rec) == self.lenidx:
478                locrow = list(set.intersection(*[set(self.lidx[i].loc(rec[i], extern))
479                                               for i in range(self.lenidx)]))
480        except:
481            pass
482        if locrow is None:
483            return None
484        if row:
485            return locrow
486        return [self.record(locr, extern=extern) for locr in locrow]
487
488    def mix(self, other, fillvalue=None):
489        '''add other Field not included in self and add other's values'''
490        sname = set(self.lname)
491        oname = set(other.lname)
492        newself = copy(self)
493        copother = copy(other)
494        for nam in oname - sname:
495            newself.addindex({nam: [fillvalue] * len(newself)})
496        for nam in sname - oname:
497            copother.addindex({nam: [fillvalue] * len(copother)})
498        return newself.add(copother, name=True, solve=False)
499
500    def merging(self, listname=None):
501        ''' add a new Field build with Field define in listname.
502        Values of the new Field are set of values in listname Field'''
503        self.addindex(Field.merging([self.nindex(name) for name in listname]))
504
505    def nindex(self, name):
506        ''' index with name equal to attribute name'''
507        if name in self.lname:
508            return self.lindex[self.lname.index(name)]
509        return None
510
511    def orindex(self, other, first=False, merge=False, update=False):
512        ''' Add other's index to self's index (with same length)
513
514        *Parameters*
515
516        - **other** : self class - object to add
517        - **first** : Boolean (default False) - If True insert indexes
518        at the first row, else at the end
519        - **merge** : Boolean (default False) - create a new index 
520        if merge is False
521        - **update** : Boolean (default False) - if True, update actual 
522        values if index name is present (and merge is True)
523
524        *Returns* : none '''
525        if len(self) != 0 and len(self) != len(other) and len(other) != 0:
526            raise DatasetError("the sizes are not equal")
527        otherc = copy(other)
528        for idx in otherc.lindex:
529            self.addindex(idx, first=first, merge=merge, update=update)
530        return self
531
532    def record(self, row, indexname=None, extern=True):
533        '''return the record at the row
534
535        *Parameters*
536
537        - **row** : int - row of the record
538        - **extern** : boolean (default True) - if True, return val record else
539        value record
540        - **indexname** : list of str (default None) - list of fields to return
541        *Returns*
542
543        - **list** : val record or value record'''
544        if indexname is None:
545            indexname = self.lname
546        if extern:
547            record = [idx.val[row] for idx in self.lindex]
548            #record = [idx.values[row].to_obj() for idx in self.lindex]
549            #record = [idx.valrow(row) for idx in self.lindex]
550        else:
551            record = [idx.values[row] for idx in self.lindex]
552        return [record[self.lname.index(name)] for name in indexname]
553
554    def recidx(self, row, extern=True):
555        '''return the list of idx val or values at the row
556
557        *Parameters*
558
559        - **row** : int - row of the record
560        - **extern** : boolean (default True) - if True, return val rec else value rec
561
562        *Returns*
563
564        - **list** : val or value for idx'''
565        if extern:
566            return [idx.values[row].to_obj() for idx in self.lidx]
567            #return [idx.valrow(row) for idx in self.lidx]
568        return [idx.values[row] for idx in self.lidx]
569
570    def recvar(self, row, extern=True):
571        '''return the list of var val or values at the row
572
573        *Parameters*
574
575        - **row** : int - row of the record
576        - **extern** : boolean (default True) - if True, return val rec else value rec
577
578        *Returns*
579
580        - **list** : val or value for var'''
581        if extern:
582            return [idx.values[row].to_obj() for idx in self.lvar]
583            #return [idx.valrow(row) for idx in self.lvar]
584        return [idx.values[row] for idx in self.lvar]
585
586    def reindex(self):
587        '''Calculate a new default codec for each index (Return self)'''
588        for idx in self.lindex:
589            idx.reindex()
590        return self
591
592    def renameindex(self, oldname, newname):
593        '''replace an index name 'oldname' by a new one 'newname'. '''
594        for i in range(self.lenindex):
595            if self.lname[i] == oldname:
596                self.lindex[i].setname(newname)
597        for i in range(len(self.lvarname)):
598            if self.lvarname[i] == oldname:
599                self.lvarname[i] = newname
600
601    def reorder(self, recorder=None):
602        '''Reorder records in the order define by 'recorder' '''
603        if recorder is None or set(recorder) != set(range(len(self))):
604            return None
605        for idx in self.lindex:
606            idx.set_keys([idx.keys[i] for i in recorder])
607        return None
608
609    def setcanonorder(self, reindex=False):
610        '''Set the canonical index order : primary - secondary/unique - variable.
611        Set the canonical keys order : ordered keys in the first columns.
612
613        *Parameters*
614        - **reindex** : boolean (default False) - if True, set default codec after
615        transformation
616
617        *Return* : self'''
618        order = self.primaryname
619        order += self.secondaryname
620        order += self.lvarname
621        order += self.lunicname
622        self.swapindex(order)
623        self.sort(reindex=reindex)
624        self.analysis.actualize()
625        return self
626
627    def setfilter(self, filt=None, first=False, filtname=ES.filter, unique=False):
628        '''Add a filter index with boolean values
629
630        - **filt** : list of boolean - values of the filter idx to add
631        - **first** : boolean (default False) - If True insert index at the first row,
632        else at the end
633        - **filtname** : string (default ES.filter) - Name of the filter Field added
634
635        *Returns* : self'''
636        if not filt:
637            filt = [True] * len(self)
638        idx = self.field(filt, name=filtname)
639        idx.reindex()
640        if not idx.cod in ([True, False], [False, True], [True], [False]):
641            raise DatasetError('filt is not consistent')
642        if unique:
643            for name in self.lname:
644                if name[:len(ES.filter)] == ES.filter:
645                    self.delindex(ES.filter)
646        self.addindex(idx, first=first)
647        return self
648
649    def setname(self, listname=None):
650        '''Update Field name by the name in listname'''
651        for i in range(min(self.lenindex, len(listname))):
652            self.lindex[i].name = listname[i]
653        self.analysis.actualize()
654
655    def sort(self, order=None, reverse=False, func=str, reindex=True):
656        '''Sort data following the index order and apply the ascending or descending
657        sort function to values.
658
659        *Parameters*
660
661        - **order** : list (default None)- new order of index to apply. If None or [],
662        the sort function is applied to the existing order of indexes.
663        - **reverse** : boolean (default False)- ascending if True, descending if False
664        - **func**    : function (default str) - parameter key used in the sorted function
665        - **reindex** : boolean (default True) - if True, apply a new codec order (key = func)
666
667        *Returns* : self'''
668        if not order:
669            order = list(range(self.lenindex))
670        orderfull = order + list(set(range(self.lenindex)) - set(order))
671        if reindex:
672            for i in order:
673                self.lindex[i].reindex(codec=sorted(
674                    self.lindex[i].codec, key=func))
675        newidx = util.transpose(sorted(util.transpose(
676            [self.lindex[orderfull[i]].keys for i in range(self.lenindex)]),
677            reverse=reverse))
678        for i in range(self.lenindex):
679            self.lindex[orderfull[i]].set_keys(newidx[i])
680        return self
681
682    def swapindex(self, order):
683        '''
684        Change the order of the index .
685
686        *Parameters*
687
688        - **order** : list of int or list of name - new order of index to apply.
689
690        *Returns* : self '''
691        if self.lenindex != len(order):
692            raise DatasetError('length of order and Dataset different')
693        if not order or isinstance(order[0], int):
694            self.lindex = [self.lindex[ind] for ind in order]
695        elif isinstance(order[0], str):
696            self.lindex = [self.nindex(name) for name in order]
697        return self
698
699    def tostdcodec(self, inplace=False, full=True):
700        '''Transform all codec in full or default codec.
701
702        *Parameters*
703
704        - **inplace** : boolean  (default False) - if True apply transformation
705        to self, else to a new Dataset
706        - **full** : boolean (default True)- full codec if True, default if False
707
708
709        *Return Dataset* : self or new Dataset'''
710        lindex = [idx.tostdcodec(inplace=False, full=full)
711                  for idx in self.lindex]
712        if inplace:
713            self.lindex = lindex
714            return self
715        return self.__class__(lindex, self.lvarname)
716
717    def tree(self, mode='derived', width=5, lname=20, string=True):
718        '''return a string with a tree of derived Field.
719
720         *Parameters*
721
722        - **lname** : integer (default 20) - length of the names        
723        - **width** : integer (default 5) - length of the lines        
724        - **mode** : string (default 'derived') - kind of tree :
725            'derived' : derived tree
726            'distance': min distance tree
727            'diff': min dist rate tree
728        '''
729        return self.analysis.tree(width=width, lname=lname, mode=mode, string=string)
730
731    def updateindex(self, listvalue, index, extern=True):
732        '''update values of an index.
733
734        *Parameters*
735
736        - **listvalue** : list - index values to replace
737        - **index** : integer - index row to update
738        - **extern** : if True, the listvalue has external representation, else internal
739
740        *Returns* : none '''
741        self.lindex[index].setlistvalue(listvalue, extern=extern)
742
743    def valtokey(self, rec, extern=True):
744        '''convert a record list (value or val for each idx) to a key list 
745        (key for each index).
746
747        *Parameters*
748
749        - **rec** : list of value or val for each index
750        - **extern** : if True, the rec value has external representation, else internal
751
752        *Returns*
753
754        - **list of int** : record key for each index'''
755        return [idx.valtokey(val, extern=extern) for idx, val in zip(self.lindex, rec)]
class DatasetStructure:
 22class DatasetStructure:
 23    '''this class includes Dataset methods :
 24
 25    *selecting - infos methods*
 26
 27    - `DatasetStructure.couplingmatrix`
 28    - `DatasetStructure.idxrecord`
 29    - `DatasetStructure.indexinfos`
 30    - `DatasetStructure.indicator`
 31    - `DatasetStructure.iscanonorder`
 32    - `DatasetStructure.isinrecord`
 33    - `DatasetStructure.keytoval`
 34    - `DatasetStructure.loc`
 35    - `DatasetStructure.nindex`
 36    - `DatasetStructure.record`
 37    - `DatasetStructure.recidx`
 38    - `DatasetStructure.recvar`
 39    - `DatasetStructure.tree`
 40    - `DatasetStructure.valtokey`
 41
 42    *add - update methods*
 43
 44    - `DatasetStructure.add`
 45    - `DatasetStructure.addindex`
 46    - `DatasetStructure.append`
 47    - `DatasetStructure.delindex`
 48    - `DatasetStructure.delrecord`
 49    - `DatasetStructure.orindex`
 50    - `DatasetStructure.renameindex`
 51    - `DatasetStructure.setvar`
 52    - `DatasetStructure.setname`
 53    - `DatasetStructure.updateindex`
 54
 55    *structure management - methods*
 56
 57    - `DatasetStructure.applyfilter`
 58    - `DatasetStructure.coupling`
 59    - `DatasetStructure.full`
 60    - `DatasetStructure.getduplicates`
 61    - `DatasetStructure.mix`
 62    - `DatasetStructure.merging`
 63    - `DatasetStructure.reindex`
 64    - `DatasetStructure.reorder`
 65    - `DatasetStructure.setfilter`
 66    - `DatasetStructure.sort`
 67    - `DatasetStructure.swapindex`
 68    - `DatasetStructure.setcanonorder`
 69    - `DatasetStructure.tostdcodec`
 70    '''
 71    # %% methods
 72
 73    def add(self, other, name=False, solve=True):
 74        ''' Add other's values to self's values for each index
 75
 76        *Parameters*
 77
 78        - **other** : Dataset object to add to self object
 79        - **name** : Boolean (default False) - Add values with same index name (True) or
 80        same index row (False)
 81        - **solve** : Boolean (default True) - If True, replace None other's codec value
 82        with self codec value.
 83
 84        *Returns* : self '''
 85        if self.lenindex != other.lenindex:
 86            raise DatasetError('length are not identical')
 87        if name and sorted(self.lname) != sorted(other.lname):
 88            raise DatasetError('name are not identical')
 89        for i in range(self.lenindex):
 90            if name:
 91                self.lindex[i].add(other.lindex[other.lname.index(self.lname[i])],
 92                                   solve=solve)
 93            else:
 94                self.lindex[i].add(other.lindex[i], solve=solve)
 95        return self
 96
 97    def addindex(self, index, first=False, merge=False, update=False):
 98        '''add a new index.
 99
100        *Parameters*
101
102        - **index** : Field - index to add (can be index Ntv representation)
103        - **first** : If True insert index at the first row, else at the end
104        - **merge** : create a new index if merge is False
105        - **update** : if True, update actual values if index name is present (and merge is True)
106
107        *Returns* : none '''
108        idx = self.field.ntv(index)
109        idxname = self.lname
110        if len(idx) != len(self) and len(self) > 0:
111            raise DatasetError('sizes are different')
112        if not idx.name in idxname:
113            if first:
114                self.lindex.insert(0, idx)
115            else:
116                self.lindex.append(idx)
117        elif not merge:  # si idx.name in idxname
118            while idx.name in idxname:
119                idx.name += '(2)'
120            if first:
121                self.lindex.insert(0, idx)
122            else:
123                self.lindex.append(idx)
124        elif update:  # si merge et si idx.name in idxname
125            self.lindex[idxname.index(idx.name)].setlistvalue(idx.values)
126
127    def append(self, record, unique=False):
128        '''add a new record.
129
130        *Parameters*
131
132        - **record** :  list of new index values to add to Dataset
133        - **unique** :  boolean (default False) - Append isn't done if unique
134        is True and record present
135        
136        *Returns* : list - key record'''
137        if self.lenindex != len(record):
138            raise DatasetError('len(record) not consistent')
139        record = self.field.l_to_i(record)
140        if self.isinrecord(self.idxrecord(record), False) and unique:
141            return None
142        return [self.lindex[i].append(record[i]) for i in range(self.lenindex)]
143
144    def applyfilter(self, reverse=False, filtname=ES.filter, delfilter=True, inplace=True):
145        '''delete records with defined filter value.
146        Filter is deleted after record filtering.
147
148        *Parameters*
149
150        - **reverse** :  boolean (default False) - delete record with filter's 
151        value is reverse
152        - **filtname** : string (default ES.filter) - Name of the filter Field added
153        - **delfilter** :  boolean (default True) - If True, delete filter's Field
154        - **inplace** : boolean (default True) - if True, filter is apply to self,
155
156        *Returns* : self or new Dataset'''
157        if not filtname in self.lname:
158            return None
159        if inplace:
160            ilis = self
161        else:
162            ilis = copy(self)
163        ifilt = ilis.lname.index(filtname)       
164        if self.field.__name__ == 'Sfield':
165            ilis.sort([ifilt], reverse= not reverse, func=None)
166        else:
167            ilis.sort([ifilt], reverse=reverse, func=None)
168        lisind = ilis.lindex[ifilt].recordfromvalue(reverse)
169        if lisind:
170            minind = min(lisind)
171            for idx in ilis.lindex:
172                del idx.keys[minind:]
173        if inplace:
174            self.delindex(filtname)
175        else:
176            ilis.delindex(filtname)
177            if delfilter:
178                self.delindex(filtname)
179        ilis.reindex()
180        return ilis
181
182    def couplingmatrix(self, default=False, filename=None, att='ratecpl'):
183        '''return a matrix with coupling infos between each idx.
184        One info can be stored in a file (csv format).
185
186        *Parameters*
187
188        - **default** : comparison with default codec
189        - **filename** : string (default None) - name of the file to write the matrix
190        - **att** : string - name of the info to store in the file
191
192        *Returns* : array of array of dict'''
193        return self.analysis.getmatrix()
194
195    def coupling(self, derived=True, param='rateder', level=0.1):
196        '''Transform idx with low rate in coupled or derived indexes (codec extension).
197
198        *Parameters*
199
200        - **param** : string (default 'rateder') - coupling measurement 
201        ('rateder', 'diffdistparent', 'ratecpl', 'distance')
202        - **level** : float (default 0.1) - param threshold to apply coupling.
203        - **derived** : boolean (default : True). If True, indexes are derived, 
204        else coupled.
205
206        *Returns* : None'''
207        infos = self.indexinfos()
208        parent = {'rateder': 'distparent', 'diffdistparent': 'distparent',
209                  'ratecpl': 'minparent', 'distance': 'minparent'}
210        child = [None] * len(infos)
211        for idx in range(len(infos)):
212            iparent = infos[idx][parent[param]]
213            if iparent != -1:
214                if child[iparent] is None:
215                    child[iparent] = []
216                child[iparent].append(idx)
217        for idx in range(len(infos)):
218            self._couplingidx(idx, child, derived, param,
219                              parent[param], level, infos)
220
221    def _couplingidx(self, idx, child, derived, param, parentparam, level, infos):
222        ''' Field coupling (included childrens of the Field)'''
223        inf = infos[idx]
224        if inf['cat'] in ('coupled', 'unique') or inf[parentparam] == -1\
225                or inf[param] >= level or (derived and inf['cat'] == 'derived'):
226            return
227        if child[idx]:
228            for childidx in child[idx]:
229                self._couplingidx(childidx, child, derived,
230                                  param, parentparam, level, infos)
231        self.lindex[inf[parentparam]].coupling(self.lindex[idx], derived=derived,
232                                               duplicate=False)
233        return
234
235    def delrecord(self, record, extern=True):
236        '''remove a record.
237
238        *Parameters*
239
240        - **record** :  list - index values to remove to Dataset
241        - **extern** : if True, compare record values to external representation 
242        of self.value, else, internal
243
244        *Returns* : row deleted'''
245        self.reindex()
246        reckeys = self.valtokey(record, extern=extern)
247        if None in reckeys:
248            return None
249        row = self.tiindex.index(reckeys)
250        for idx in self:
251            del idx[row]
252        return row
253
254    def delindex(self, delname=None, savename=None):
255        '''remove an Field or a list of Field.
256
257        *Parameters*
258
259        - **delname** : string or list of string - name of index to remove
260        - **savename** : string or list of string - name of index to keep
261
262        *Returns* : none '''
263        if not delname and not savename :
264            return
265        if isinstance(delname, str):
266            delname = [delname]
267        if isinstance(savename, str):
268            savename = [savename]
269        if delname and savename:
270            delname = [name for name in delname if not name in savename]
271        if not delname:
272            delname = [name for name in self.lname if not name in savename]
273        for idxname in delname:
274            if idxname in self.lname:
275                self.lindex.pop(self.lname.index(idxname))
276
277    def _fullindex(self, ind, keysadd, indexname, varname, leng, fillvalue, fillextern):
278        if not varname:
279            varname = []
280        idx = self.lindex[ind]
281        lenadd = len(keysadd[0])
282        if len(idx) == leng:
283            return
284        inf = self.indexinfos()
285        if inf[ind]['cat'] == 'unique':
286            idx.set_keys(idx.keys + [0] * lenadd)
287        elif self.lname[ind] in indexname:
288            idx.set_keys(idx.keys + keysadd[indexname.index(self.lname[ind])])
289        elif inf[ind]['parent'] == -1 or self.lname[ind] in varname:
290            fillval = fillvalue
291            if fillextern:
292                fillval = self.field.s_to_i(fillvalue)
293            idx.set_keys(idx.keys + [len(idx.codec)] * len(keysadd[0]))
294            idx.set_codec(idx.codec + [fillval])
295        else:
296            parent = inf[ind]['parent']
297            if len(self.lindex[parent]) != leng:
298                self._fullindex(parent, keysadd, indexname, varname, leng,
299                                fillvalue, fillextern)
300            if inf[ind]['cat'] == 'coupled':
301                idx.tocoupled(self.lindex[parent], coupling=True)
302            else:
303                idx.tocoupled(self.lindex[parent], coupling=False)
304
305    def full(self, reindex=False, idxname=None, varname=None, fillvalue='-',
306             fillextern=True, inplace=True, complete=True):
307        '''tranform a list of indexes in crossed indexes (value extension).
308
309        *Parameters*
310
311        - **idxname** : list of string - name of indexes to transform
312        - **varname** : string - name of indexes to use
313        - **reindex** : boolean (default False) - if True, set default codec 
314        before transformation
315        - **fillvalue** : object value used for var extension
316        - **fillextern** : boolean(default True) - if True, fillvalue is converted 
317        to internal value
318        - **inplace** : boolean (default True) - if True, filter is apply to self,
319        - **complete** : boolean (default True) - if True, Field are ordered 
320        in canonical order
321
322        *Returns* : self or new Dataset'''
323        ilis = self if inplace else copy(self)
324        if not idxname:
325            idxname = ilis.primaryname
326        if reindex:
327            ilis.reindex()
328        keysadd = util.idxfull([ilis.nindex(name) for name in idxname])
329        if keysadd and len(keysadd) != 0:
330            lenadd = len(keysadd[0])
331            for ind in range(ilis.lenindex):
332                ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd,
333                                fillvalue, fillextern)
334        '''if not keysadd or len(keysadd) == 0:
335            return ilis
336        lenadd = len(keysadd[0])
337        for ind in range(ilis.lenindex):
338            ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd,
339                            fillvalue, fillextern)   '''     
340        if complete:
341            ilis.setcanonorder()
342        return ilis
343
344    def getduplicates(self, indexname=None, resindex=None, indexview=None):
345        '''check duplicate cod in a list of indexes. Result is add in a new 
346        index or returned.
347
348        *Parameters*
349
350        - **indexname** : list of string (default none) - name of indexes to check 
351        (if None, all Field)
352        - **resindex** : string (default None) - Add a new index named resindex 
353        with check result (False if duplicate)
354        - **indexview** : list of str (default None) - list of fields to return
355
356        *Returns* : list of int - list of rows with duplicate cod '''
357        if not indexname:
358            indexname = self.lname
359        duplicates = []
360        for name in indexname:
361            duplicates += self.nindex(name).getduplicates()
362        if resindex and isinstance(resindex, str):
363            newidx = self.field([True] * len(self), name=resindex)
364            for item in duplicates:
365                newidx[item] = False
366            self.addindex(newidx)
367        dupl = tuple(set(duplicates))
368        if not indexview:
369            return dupl
370        return [tuple(self.record(ind, indexview)) for ind in dupl]
371
372    def iscanonorder(self):
373        '''return True if primary indexes have canonical ordered keys'''
374        primary = self.primary
375        canonorder = util.canonorder(
376            [len(self.lidx[idx].codec) for idx in primary])
377        return canonorder == [self.lidx[idx].keys for idx in primary]
378
379    def isinrecord(self, record, extern=True):
380        '''Check if record is present in self.
381
382        *Parameters*
383
384        - **record** : list - value for each Field
385        - **extern** : if True, compare record values to external representation
386        of self.value, else, internal
387
388        *Returns boolean* : True if found'''
389        if extern:
390            return record in util.transpose(self.extidxext)
391        return record in util.transpose(self.extidx)
392
393    def idxrecord(self, record):
394        '''return rec array (without variable) from complete record (with variable)'''
395        return [record[self.lidxrow[i]] for i in range(len(self.lidxrow))]
396
397    def indexinfos(self, keys=None):
398        '''return a dict with infos of each index :
399            - num, name, cat, diffdistparent, child, parent, distparent, 
400            crossed, pparent, rateder (struct info)
401            - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
402
403        *Parameters*
404
405        - **keys** : string, list or tuple (default None) - list of attributes 
406        to returned.
407        if 'all' or None, all attributes are returned.
408        if 'struct', only structural attributes are returned.
409
410        *Returns* : dict'''
411        return self.analysis.getinfos(keys)
412
413    def indicator(self, fullsize=None, size=None):
414        '''generate size indicators: ol (object lightness), ul (unicity level), 
415        gain (sizegain)
416
417        *Parameters*
418
419        - **fullsize** : int (default none) - size with full codec
420        - **size** : int (default none) - size with existing codec
421        - **indexinfos** : list (default None) - indexinfos data
422
423        *Returns* : dict'''
424        if not fullsize:
425            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
426        if not size:
427            size = len(self.to_obj(encoded=True))
428        nval = len(self) * (self.lenindex + 1)
429        sval = fullsize / nval
430        ncod = sum(self.indexlen) + self.lenindex
431        if nval != ncod:
432            scod = (size - ncod * sval) / (nval - ncod)
433            olight = scod / sval
434        else:
435            olight = None
436        return {'total values': nval, 'mean size': round(sval, 3),
437                'unique values': ncod, 'mean coding size': round(scod, 3),
438                'unicity level': round(ncod / nval, 3),
439                'optimize level': round(size / fullsize, 3),
440                'object lightness': round(olight, 3),
441                'maxgain': round((nval - ncod) / nval, 3),
442                'gain': round((fullsize - size) / fullsize, 3)}
443
444    def keytoval(self, listkey, extern=True):
445        '''
446        convert a keys list (key for each index) to a values list (value for each index).
447
448        *Parameters*
449
450        - **listkey** : key for each index
451        - **extern** : boolean (default True) - if True, compare rec to val else to values
452
453        *Returns*
454
455        - **list** : value for each index'''
456        return [idx.keytoval(key, extern=extern) for idx, key in zip(self.lindex, listkey)]
457
458    def loc(self, rec, extern=True, row=False):
459        '''
460        Return record or row corresponding to a list of idx values.
461
462        *Parameters*
463
464        - **rec** : list - value for each idx
465        - **extern** : boolean (default True) - if True, compare rec to val,
466        else to values
467        - **row** : Boolean (default False) - if True, return list of row,
468        else list of records
469
470        *Returns*
471
472        - **object** : variable value or None if not found'''
473        locrow = None
474        try:
475            if len(rec) == self.lenindex:
476                locrow = list(set.intersection(*[set(self.lindex[i].loc(rec[i], extern))
477                                               for i in range(self.lenindex)]))
478            elif len(rec) == self.lenidx:
479                locrow = list(set.intersection(*[set(self.lidx[i].loc(rec[i], extern))
480                                               for i in range(self.lenidx)]))
481        except:
482            pass
483        if locrow is None:
484            return None
485        if row:
486            return locrow
487        return [self.record(locr, extern=extern) for locr in locrow]
488
489    def mix(self, other, fillvalue=None):
490        '''add other Field not included in self and add other's values'''
491        sname = set(self.lname)
492        oname = set(other.lname)
493        newself = copy(self)
494        copother = copy(other)
495        for nam in oname - sname:
496            newself.addindex({nam: [fillvalue] * len(newself)})
497        for nam in sname - oname:
498            copother.addindex({nam: [fillvalue] * len(copother)})
499        return newself.add(copother, name=True, solve=False)
500
501    def merging(self, listname=None):
502        ''' add a new Field build with Field define in listname.
503        Values of the new Field are set of values in listname Field'''
504        self.addindex(Field.merging([self.nindex(name) for name in listname]))
505
506    def nindex(self, name):
507        ''' index with name equal to attribute name'''
508        if name in self.lname:
509            return self.lindex[self.lname.index(name)]
510        return None
511
512    def orindex(self, other, first=False, merge=False, update=False):
513        ''' Add other's index to self's index (with same length)
514
515        *Parameters*
516
517        - **other** : self class - object to add
518        - **first** : Boolean (default False) - If True insert indexes
519        at the first row, else at the end
520        - **merge** : Boolean (default False) - create a new index 
521        if merge is False
522        - **update** : Boolean (default False) - if True, update actual 
523        values if index name is present (and merge is True)
524
525        *Returns* : none '''
526        if len(self) != 0 and len(self) != len(other) and len(other) != 0:
527            raise DatasetError("the sizes are not equal")
528        otherc = copy(other)
529        for idx in otherc.lindex:
530            self.addindex(idx, first=first, merge=merge, update=update)
531        return self
532
533    def record(self, row, indexname=None, extern=True):
534        '''return the record at the row
535
536        *Parameters*
537
538        - **row** : int - row of the record
539        - **extern** : boolean (default True) - if True, return val record else
540        value record
541        - **indexname** : list of str (default None) - list of fields to return
542        *Returns*
543
544        - **list** : val record or value record'''
545        if indexname is None:
546            indexname = self.lname
547        if extern:
548            record = [idx.val[row] for idx in self.lindex]
549            #record = [idx.values[row].to_obj() for idx in self.lindex]
550            #record = [idx.valrow(row) for idx in self.lindex]
551        else:
552            record = [idx.values[row] for idx in self.lindex]
553        return [record[self.lname.index(name)] for name in indexname]
554
555    def recidx(self, row, extern=True):
556        '''return the list of idx val or values at the row
557
558        *Parameters*
559
560        - **row** : int - row of the record
561        - **extern** : boolean (default True) - if True, return val rec else value rec
562
563        *Returns*
564
565        - **list** : val or value for idx'''
566        if extern:
567            return [idx.values[row].to_obj() for idx in self.lidx]
568            #return [idx.valrow(row) for idx in self.lidx]
569        return [idx.values[row] for idx in self.lidx]
570
571    def recvar(self, row, extern=True):
572        '''return the list of var val or values at the row
573
574        *Parameters*
575
576        - **row** : int - row of the record
577        - **extern** : boolean (default True) - if True, return val rec else value rec
578
579        *Returns*
580
581        - **list** : val or value for var'''
582        if extern:
583            return [idx.values[row].to_obj() for idx in self.lvar]
584            #return [idx.valrow(row) for idx in self.lvar]
585        return [idx.values[row] for idx in self.lvar]
586
587    def reindex(self):
588        '''Calculate a new default codec for each index (Return self)'''
589        for idx in self.lindex:
590            idx.reindex()
591        return self
592
593    def renameindex(self, oldname, newname):
594        '''replace an index name 'oldname' by a new one 'newname'. '''
595        for i in range(self.lenindex):
596            if self.lname[i] == oldname:
597                self.lindex[i].setname(newname)
598        for i in range(len(self.lvarname)):
599            if self.lvarname[i] == oldname:
600                self.lvarname[i] = newname
601
602    def reorder(self, recorder=None):
603        '''Reorder records in the order define by 'recorder' '''
604        if recorder is None or set(recorder) != set(range(len(self))):
605            return None
606        for idx in self.lindex:
607            idx.set_keys([idx.keys[i] for i in recorder])
608        return None
609
610    def setcanonorder(self, reindex=False):
611        '''Set the canonical index order : primary - secondary/unique - variable.
612        Set the canonical keys order : ordered keys in the first columns.
613
614        *Parameters*
615        - **reindex** : boolean (default False) - if True, set default codec after
616        transformation
617
618        *Return* : self'''
619        order = self.primaryname
620        order += self.secondaryname
621        order += self.lvarname
622        order += self.lunicname
623        self.swapindex(order)
624        self.sort(reindex=reindex)
625        self.analysis.actualize()
626        return self
627
628    def setfilter(self, filt=None, first=False, filtname=ES.filter, unique=False):
629        '''Add a filter index with boolean values
630
631        - **filt** : list of boolean - values of the filter idx to add
632        - **first** : boolean (default False) - If True insert index at the first row,
633        else at the end
634        - **filtname** : string (default ES.filter) - Name of the filter Field added
635
636        *Returns* : self'''
637        if not filt:
638            filt = [True] * len(self)
639        idx = self.field(filt, name=filtname)
640        idx.reindex()
641        if not idx.cod in ([True, False], [False, True], [True], [False]):
642            raise DatasetError('filt is not consistent')
643        if unique:
644            for name in self.lname:
645                if name[:len(ES.filter)] == ES.filter:
646                    self.delindex(ES.filter)
647        self.addindex(idx, first=first)
648        return self
649
650    def setname(self, listname=None):
651        '''Update Field name by the name in listname'''
652        for i in range(min(self.lenindex, len(listname))):
653            self.lindex[i].name = listname[i]
654        self.analysis.actualize()
655
656    def sort(self, order=None, reverse=False, func=str, reindex=True):
657        '''Sort data following the index order and apply the ascending or descending
658        sort function to values.
659
660        *Parameters*
661
662        - **order** : list (default None)- new order of index to apply. If None or [],
663        the sort function is applied to the existing order of indexes.
664        - **reverse** : boolean (default False)- ascending if True, descending if False
665        - **func**    : function (default str) - parameter key used in the sorted function
666        - **reindex** : boolean (default True) - if True, apply a new codec order (key = func)
667
668        *Returns* : self'''
669        if not order:
670            order = list(range(self.lenindex))
671        orderfull = order + list(set(range(self.lenindex)) - set(order))
672        if reindex:
673            for i in order:
674                self.lindex[i].reindex(codec=sorted(
675                    self.lindex[i].codec, key=func))
676        newidx = util.transpose(sorted(util.transpose(
677            [self.lindex[orderfull[i]].keys for i in range(self.lenindex)]),
678            reverse=reverse))
679        for i in range(self.lenindex):
680            self.lindex[orderfull[i]].set_keys(newidx[i])
681        return self
682
683    def swapindex(self, order):
684        '''
685        Change the order of the index .
686
687        *Parameters*
688
689        - **order** : list of int or list of name - new order of index to apply.
690
691        *Returns* : self '''
692        if self.lenindex != len(order):
693            raise DatasetError('length of order and Dataset different')
694        if not order or isinstance(order[0], int):
695            self.lindex = [self.lindex[ind] for ind in order]
696        elif isinstance(order[0], str):
697            self.lindex = [self.nindex(name) for name in order]
698        return self
699
700    def tostdcodec(self, inplace=False, full=True):
701        '''Transform all codec in full or default codec.
702
703        *Parameters*
704
705        - **inplace** : boolean  (default False) - if True apply transformation
706        to self, else to a new Dataset
707        - **full** : boolean (default True)- full codec if True, default if False
708
709
710        *Return Dataset* : self or new Dataset'''
711        lindex = [idx.tostdcodec(inplace=False, full=full)
712                  for idx in self.lindex]
713        if inplace:
714            self.lindex = lindex
715            return self
716        return self.__class__(lindex, self.lvarname)
717
718    def tree(self, mode='derived', width=5, lname=20, string=True):
719        '''return a string with a tree of derived Field.
720
721         *Parameters*
722
723        - **lname** : integer (default 20) - length of the names        
724        - **width** : integer (default 5) - length of the lines        
725        - **mode** : string (default 'derived') - kind of tree :
726            'derived' : derived tree
727            'distance': min distance tree
728            'diff': min dist rate tree
729        '''
730        return self.analysis.tree(width=width, lname=lname, mode=mode, string=string)
731
732    def updateindex(self, listvalue, index, extern=True):
733        '''update values of an index.
734
735        *Parameters*
736
737        - **listvalue** : list - index values to replace
738        - **index** : integer - index row to update
739        - **extern** : if True, the listvalue has external representation, else internal
740
741        *Returns* : none '''
742        self.lindex[index].setlistvalue(listvalue, extern=extern)
743
744    def valtokey(self, rec, extern=True):
745        '''convert a record list (value or val for each idx) to a key list 
746        (key for each index).
747
748        *Parameters*
749
750        - **rec** : list of value or val for each index
751        - **extern** : if True, the rec value has external representation, else internal
752
753        *Returns*
754
755        - **list of int** : record key for each index'''
756        return [idx.valtokey(val, extern=extern) for idx, val in zip(self.lindex, rec)]
def add(self, other, name=False, solve=True):
73    def add(self, other, name=False, solve=True):
74        ''' Add other's values to self's values for each index
75
76        *Parameters*
77
78        - **other** : Dataset object to add to self object
79        - **name** : Boolean (default False) - Add values with same index name (True) or
80        same index row (False)
81        - **solve** : Boolean (default True) - If True, replace None other's codec value
82        with self codec value.
83
84        *Returns* : self '''
85        if self.lenindex != other.lenindex:
86            raise DatasetError('length are not identical')
87        if name and sorted(self.lname) != sorted(other.lname):
88            raise DatasetError('name are not identical')
89        for i in range(self.lenindex):
90            if name:
91                self.lindex[i].add(other.lindex[other.lname.index(self.lname[i])],
92                                   solve=solve)
93            else:
94                self.lindex[i].add(other.lindex[i], solve=solve)
95        return self

Add other's values to self's values for each index

Parameters

  • other : Dataset object to add to self object
  • name : Boolean (default False) - Add values with same index name (True) or same index row (False)
  • solve : Boolean (default True) - If True, replace None other's codec value with self codec value.

Returns : self

def addindex(self, index, first=False, merge=False, update=False):
 97    def addindex(self, index, first=False, merge=False, update=False):
 98        '''add a new index.
 99
100        *Parameters*
101
102        - **index** : Field - index to add (can be index Ntv representation)
103        - **first** : If True insert index at the first row, else at the end
104        - **merge** : create a new index if merge is False
105        - **update** : if True, update actual values if index name is present (and merge is True)
106
107        *Returns* : none '''
108        idx = self.field.ntv(index)
109        idxname = self.lname
110        if len(idx) != len(self) and len(self) > 0:
111            raise DatasetError('sizes are different')
112        if not idx.name in idxname:
113            if first:
114                self.lindex.insert(0, idx)
115            else:
116                self.lindex.append(idx)
117        elif not merge:  # si idx.name in idxname
118            while idx.name in idxname:
119                idx.name += '(2)'
120            if first:
121                self.lindex.insert(0, idx)
122            else:
123                self.lindex.append(idx)
124        elif update:  # si merge et si idx.name in idxname
125            self.lindex[idxname.index(idx.name)].setlistvalue(idx.values)

add a new index.

Parameters

  • index : Field - index to add (can be index Ntv representation)
  • first : If True insert index at the first row, else at the end
  • merge : create a new index if merge is False
  • update : if True, update actual values if index name is present (and merge is True)

Returns : none

def append(self, record, unique=False):
127    def append(self, record, unique=False):
128        '''add a new record.
129
130        *Parameters*
131
132        - **record** :  list of new index values to add to Dataset
133        - **unique** :  boolean (default False) - Append isn't done if unique
134        is True and record present
135        
136        *Returns* : list - key record'''
137        if self.lenindex != len(record):
138            raise DatasetError('len(record) not consistent')
139        record = self.field.l_to_i(record)
140        if self.isinrecord(self.idxrecord(record), False) and unique:
141            return None
142        return [self.lindex[i].append(record[i]) for i in range(self.lenindex)]

add a new record.

Parameters

  • record : list of new index values to add to Dataset
  • unique : boolean (default False) - Append isn't done if unique is True and record present

Returns : list - key record

def applyfilter( self, reverse=False, filtname='$filter', delfilter=True, inplace=True):
144    def applyfilter(self, reverse=False, filtname=ES.filter, delfilter=True, inplace=True):
145        '''delete records with defined filter value.
146        Filter is deleted after record filtering.
147
148        *Parameters*
149
150        - **reverse** :  boolean (default False) - delete record with filter's 
151        value is reverse
152        - **filtname** : string (default ES.filter) - Name of the filter Field added
153        - **delfilter** :  boolean (default True) - If True, delete filter's Field
154        - **inplace** : boolean (default True) - if True, filter is apply to self,
155
156        *Returns* : self or new Dataset'''
157        if not filtname in self.lname:
158            return None
159        if inplace:
160            ilis = self
161        else:
162            ilis = copy(self)
163        ifilt = ilis.lname.index(filtname)       
164        if self.field.__name__ == 'Sfield':
165            ilis.sort([ifilt], reverse= not reverse, func=None)
166        else:
167            ilis.sort([ifilt], reverse=reverse, func=None)
168        lisind = ilis.lindex[ifilt].recordfromvalue(reverse)
169        if lisind:
170            minind = min(lisind)
171            for idx in ilis.lindex:
172                del idx.keys[minind:]
173        if inplace:
174            self.delindex(filtname)
175        else:
176            ilis.delindex(filtname)
177            if delfilter:
178                self.delindex(filtname)
179        ilis.reindex()
180        return ilis

delete records with defined filter value. Filter is deleted after record filtering.

Parameters

  • reverse : boolean (default False) - delete record with filter's value is reverse
  • filtname : string (default ES.filter) - Name of the filter Field added
  • delfilter : boolean (default True) - If True, delete filter's Field
  • inplace : boolean (default True) - if True, filter is apply to self,

Returns : self or new Dataset

def couplingmatrix(self, default=False, filename=None, att='ratecpl'):
182    def couplingmatrix(self, default=False, filename=None, att='ratecpl'):
183        '''return a matrix with coupling infos between each idx.
184        One info can be stored in a file (csv format).
185
186        *Parameters*
187
188        - **default** : comparison with default codec
189        - **filename** : string (default None) - name of the file to write the matrix
190        - **att** : string - name of the info to store in the file
191
192        *Returns* : array of array of dict'''
193        return self.analysis.getmatrix()

return a matrix with coupling infos between each idx. One info can be stored in a file (csv format).

Parameters

  • default : comparison with default codec
  • filename : string (default None) - name of the file to write the matrix
  • att : string - name of the info to store in the file

Returns : array of array of dict

def coupling(self, derived=True, param='rateder', level=0.1):
195    def coupling(self, derived=True, param='rateder', level=0.1):
196        '''Transform idx with low rate in coupled or derived indexes (codec extension).
197
198        *Parameters*
199
200        - **param** : string (default 'rateder') - coupling measurement 
201        ('rateder', 'diffdistparent', 'ratecpl', 'distance')
202        - **level** : float (default 0.1) - param threshold to apply coupling.
203        - **derived** : boolean (default : True). If True, indexes are derived, 
204        else coupled.
205
206        *Returns* : None'''
207        infos = self.indexinfos()
208        parent = {'rateder': 'distparent', 'diffdistparent': 'distparent',
209                  'ratecpl': 'minparent', 'distance': 'minparent'}
210        child = [None] * len(infos)
211        for idx in range(len(infos)):
212            iparent = infos[idx][parent[param]]
213            if iparent != -1:
214                if child[iparent] is None:
215                    child[iparent] = []
216                child[iparent].append(idx)
217        for idx in range(len(infos)):
218            self._couplingidx(idx, child, derived, param,
219                              parent[param], level, infos)

Transform idx with low rate in coupled or derived indexes (codec extension).

Parameters

  • param : string (default 'rateder') - coupling measurement ('rateder', 'diffdistparent', 'ratecpl', 'distance')
  • level : float (default 0.1) - param threshold to apply coupling.
  • derived : boolean (default : True). If True, indexes are derived, else coupled.

Returns : None

def delrecord(self, record, extern=True):
235    def delrecord(self, record, extern=True):
236        '''remove a record.
237
238        *Parameters*
239
240        - **record** :  list - index values to remove to Dataset
241        - **extern** : if True, compare record values to external representation 
242        of self.value, else, internal
243
244        *Returns* : row deleted'''
245        self.reindex()
246        reckeys = self.valtokey(record, extern=extern)
247        if None in reckeys:
248            return None
249        row = self.tiindex.index(reckeys)
250        for idx in self:
251            del idx[row]
252        return row

remove a record.

Parameters

  • record : list - index values to remove to Dataset
  • extern : if True, compare record values to external representation of self.value, else, internal

Returns : row deleted

def delindex(self, delname=None, savename=None):
254    def delindex(self, delname=None, savename=None):
255        '''remove an Field or a list of Field.
256
257        *Parameters*
258
259        - **delname** : string or list of string - name of index to remove
260        - **savename** : string or list of string - name of index to keep
261
262        *Returns* : none '''
263        if not delname and not savename :
264            return
265        if isinstance(delname, str):
266            delname = [delname]
267        if isinstance(savename, str):
268            savename = [savename]
269        if delname and savename:
270            delname = [name for name in delname if not name in savename]
271        if not delname:
272            delname = [name for name in self.lname if not name in savename]
273        for idxname in delname:
274            if idxname in self.lname:
275                self.lindex.pop(self.lname.index(idxname))

remove an Field or a list of Field.

Parameters

  • delname : string or list of string - name of index to remove
  • savename : string or list of string - name of index to keep

Returns : none

def full( self, reindex=False, idxname=None, varname=None, fillvalue='-', fillextern=True, inplace=True, complete=True):
305    def full(self, reindex=False, idxname=None, varname=None, fillvalue='-',
306             fillextern=True, inplace=True, complete=True):
307        '''tranform a list of indexes in crossed indexes (value extension).
308
309        *Parameters*
310
311        - **idxname** : list of string - name of indexes to transform
312        - **varname** : string - name of indexes to use
313        - **reindex** : boolean (default False) - if True, set default codec 
314        before transformation
315        - **fillvalue** : object value used for var extension
316        - **fillextern** : boolean(default True) - if True, fillvalue is converted 
317        to internal value
318        - **inplace** : boolean (default True) - if True, filter is apply to self,
319        - **complete** : boolean (default True) - if True, Field are ordered 
320        in canonical order
321
322        *Returns* : self or new Dataset'''
323        ilis = self if inplace else copy(self)
324        if not idxname:
325            idxname = ilis.primaryname
326        if reindex:
327            ilis.reindex()
328        keysadd = util.idxfull([ilis.nindex(name) for name in idxname])
329        if keysadd and len(keysadd) != 0:
330            lenadd = len(keysadd[0])
331            for ind in range(ilis.lenindex):
332                ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd,
333                                fillvalue, fillextern)
334        '''if not keysadd or len(keysadd) == 0:
335            return ilis
336        lenadd = len(keysadd[0])
337        for ind in range(ilis.lenindex):
338            ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd,
339                            fillvalue, fillextern)   '''     
340        if complete:
341            ilis.setcanonorder()
342        return ilis

tranform a list of indexes in crossed indexes (value extension).

Parameters

  • idxname : list of string - name of indexes to transform
  • varname : string - name of indexes to use
  • reindex : boolean (default False) - if True, set default codec before transformation
  • fillvalue : object value used for var extension
  • fillextern : boolean(default True) - if True, fillvalue is converted to internal value
  • inplace : boolean (default True) - if True, filter is apply to self,
  • complete : boolean (default True) - if True, Field are ordered in canonical order

Returns : self or new Dataset

def getduplicates(self, indexname=None, resindex=None, indexview=None):
344    def getduplicates(self, indexname=None, resindex=None, indexview=None):
345        '''check duplicate cod in a list of indexes. Result is add in a new 
346        index or returned.
347
348        *Parameters*
349
350        - **indexname** : list of string (default none) - name of indexes to check 
351        (if None, all Field)
352        - **resindex** : string (default None) - Add a new index named resindex 
353        with check result (False if duplicate)
354        - **indexview** : list of str (default None) - list of fields to return
355
356        *Returns* : list of int - list of rows with duplicate cod '''
357        if not indexname:
358            indexname = self.lname
359        duplicates = []
360        for name in indexname:
361            duplicates += self.nindex(name).getduplicates()
362        if resindex and isinstance(resindex, str):
363            newidx = self.field([True] * len(self), name=resindex)
364            for item in duplicates:
365                newidx[item] = False
366            self.addindex(newidx)
367        dupl = tuple(set(duplicates))
368        if not indexview:
369            return dupl
370        return [tuple(self.record(ind, indexview)) for ind in dupl]

check duplicate cod in a list of indexes. Result is add in a new index or returned.

Parameters

  • indexname : list of string (default none) - name of indexes to check (if None, all Field)
  • resindex : string (default None) - Add a new index named resindex with check result (False if duplicate)
  • indexview : list of str (default None) - list of fields to return

Returns : list of int - list of rows with duplicate cod

def iscanonorder(self):
372    def iscanonorder(self):
373        '''return True if primary indexes have canonical ordered keys'''
374        primary = self.primary
375        canonorder = util.canonorder(
376            [len(self.lidx[idx].codec) for idx in primary])
377        return canonorder == [self.lidx[idx].keys for idx in primary]

return True if primary indexes have canonical ordered keys

def isinrecord(self, record, extern=True):
379    def isinrecord(self, record, extern=True):
380        '''Check if record is present in self.
381
382        *Parameters*
383
384        - **record** : list - value for each Field
385        - **extern** : if True, compare record values to external representation
386        of self.value, else, internal
387
388        *Returns boolean* : True if found'''
389        if extern:
390            return record in util.transpose(self.extidxext)
391        return record in util.transpose(self.extidx)

Check if record is present in self.

Parameters

  • record : list - value for each Field
  • extern : if True, compare record values to external representation of self.value, else, internal

Returns boolean : True if found

def idxrecord(self, record):
393    def idxrecord(self, record):
394        '''return rec array (without variable) from complete record (with variable)'''
395        return [record[self.lidxrow[i]] for i in range(len(self.lidxrow))]

return rec array (without variable) from complete record (with variable)

def indexinfos(self, keys=None):
397    def indexinfos(self, keys=None):
398        '''return a dict with infos of each index :
399            - num, name, cat, diffdistparent, child, parent, distparent, 
400            crossed, pparent, rateder (struct info)
401            - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
402
403        *Parameters*
404
405        - **keys** : string, list or tuple (default None) - list of attributes 
406        to returned.
407        if 'all' or None, all attributes are returned.
408        if 'struct', only structural attributes are returned.
409
410        *Returns* : dict'''
411        return self.analysis.getinfos(keys)

return a dict with infos of each index : - num, name, cat, diffdistparent, child, parent, distparent, crossed, pparent, rateder (struct info) - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)

Parameters

  • keys : string, list or tuple (default None) - list of attributes to returned. if 'all' or None, all attributes are returned. if 'struct', only structural attributes are returned.

Returns : dict

def indicator(self, fullsize=None, size=None):
413    def indicator(self, fullsize=None, size=None):
414        '''generate size indicators: ol (object lightness), ul (unicity level), 
415        gain (sizegain)
416
417        *Parameters*
418
419        - **fullsize** : int (default none) - size with full codec
420        - **size** : int (default none) - size with existing codec
421        - **indexinfos** : list (default None) - indexinfos data
422
423        *Returns* : dict'''
424        if not fullsize:
425            fullsize = len(self.to_obj(encoded=True, modecodec='full'))
426        if not size:
427            size = len(self.to_obj(encoded=True))
428        nval = len(self) * (self.lenindex + 1)
429        sval = fullsize / nval
430        ncod = sum(self.indexlen) + self.lenindex
431        if nval != ncod:
432            scod = (size - ncod * sval) / (nval - ncod)
433            olight = scod / sval
434        else:
435            olight = None
436        return {'total values': nval, 'mean size': round(sval, 3),
437                'unique values': ncod, 'mean coding size': round(scod, 3),
438                'unicity level': round(ncod / nval, 3),
439                'optimize level': round(size / fullsize, 3),
440                'object lightness': round(olight, 3),
441                'maxgain': round((nval - ncod) / nval, 3),
442                'gain': round((fullsize - size) / fullsize, 3)}

generate size indicators: ol (object lightness), ul (unicity level), gain (sizegain)

Parameters

  • fullsize : int (default none) - size with full codec
  • size : int (default none) - size with existing codec
  • indexinfos : list (default None) - indexinfos data

Returns : dict

def keytoval(self, listkey, extern=True):
444    def keytoval(self, listkey, extern=True):
445        '''
446        convert a keys list (key for each index) to a values list (value for each index).
447
448        *Parameters*
449
450        - **listkey** : key for each index
451        - **extern** : boolean (default True) - if True, compare rec to val else to values
452
453        *Returns*
454
455        - **list** : value for each index'''
456        return [idx.keytoval(key, extern=extern) for idx, key in zip(self.lindex, listkey)]

convert a keys list (key for each index) to a values list (value for each index).

Parameters

  • listkey : key for each index
  • extern : boolean (default True) - if True, compare rec to val else to values

Returns

  • list : value for each index
def loc(self, rec, extern=True, row=False):
458    def loc(self, rec, extern=True, row=False):
459        '''
460        Return record or row corresponding to a list of idx values.
461
462        *Parameters*
463
464        - **rec** : list - value for each idx
465        - **extern** : boolean (default True) - if True, compare rec to val,
466        else to values
467        - **row** : Boolean (default False) - if True, return list of row,
468        else list of records
469
470        *Returns*
471
472        - **object** : variable value or None if not found'''
473        locrow = None
474        try:
475            if len(rec) == self.lenindex:
476                locrow = list(set.intersection(*[set(self.lindex[i].loc(rec[i], extern))
477                                               for i in range(self.lenindex)]))
478            elif len(rec) == self.lenidx:
479                locrow = list(set.intersection(*[set(self.lidx[i].loc(rec[i], extern))
480                                               for i in range(self.lenidx)]))
481        except:
482            pass
483        if locrow is None:
484            return None
485        if row:
486            return locrow
487        return [self.record(locr, extern=extern) for locr in locrow]

Return record or row corresponding to a list of idx values.

Parameters

  • rec : list - value for each idx
  • extern : boolean (default True) - if True, compare rec to val, else to values
  • row : Boolean (default False) - if True, return list of row, else list of records

Returns

  • object : variable value or None if not found
def mix(self, other, fillvalue=None):
489    def mix(self, other, fillvalue=None):
490        '''add other Field not included in self and add other's values'''
491        sname = set(self.lname)
492        oname = set(other.lname)
493        newself = copy(self)
494        copother = copy(other)
495        for nam in oname - sname:
496            newself.addindex({nam: [fillvalue] * len(newself)})
497        for nam in sname - oname:
498            copother.addindex({nam: [fillvalue] * len(copother)})
499        return newself.add(copother, name=True, solve=False)

add other Field not included in self and add other's values

def merging(self, listname=None):
501    def merging(self, listname=None):
502        ''' add a new Field build with Field define in listname.
503        Values of the new Field are set of values in listname Field'''
504        self.addindex(Field.merging([self.nindex(name) for name in listname]))

add a new Field build with Field define in listname. Values of the new Field are set of values in listname Field

def nindex(self, name):
506    def nindex(self, name):
507        ''' index with name equal to attribute name'''
508        if name in self.lname:
509            return self.lindex[self.lname.index(name)]
510        return None

index with name equal to attribute name

def orindex(self, other, first=False, merge=False, update=False):
512    def orindex(self, other, first=False, merge=False, update=False):
513        ''' Add other's index to self's index (with same length)
514
515        *Parameters*
516
517        - **other** : self class - object to add
518        - **first** : Boolean (default False) - If True insert indexes
519        at the first row, else at the end
520        - **merge** : Boolean (default False) - create a new index 
521        if merge is False
522        - **update** : Boolean (default False) - if True, update actual 
523        values if index name is present (and merge is True)
524
525        *Returns* : none '''
526        if len(self) != 0 and len(self) != len(other) and len(other) != 0:
527            raise DatasetError("the sizes are not equal")
528        otherc = copy(other)
529        for idx in otherc.lindex:
530            self.addindex(idx, first=first, merge=merge, update=update)
531        return self

Add other's index to self's index (with same length)

Parameters

  • other : self class - object to add
  • first : Boolean (default False) - If True insert indexes at the first row, else at the end
  • merge : Boolean (default False) - create a new index if merge is False
  • update : Boolean (default False) - if True, update actual values if index name is present (and merge is True)

Returns : none

def record(self, row, indexname=None, extern=True):
533    def record(self, row, indexname=None, extern=True):
534        '''return the record at the row
535
536        *Parameters*
537
538        - **row** : int - row of the record
539        - **extern** : boolean (default True) - if True, return val record else
540        value record
541        - **indexname** : list of str (default None) - list of fields to return
542        *Returns*
543
544        - **list** : val record or value record'''
545        if indexname is None:
546            indexname = self.lname
547        if extern:
548            record = [idx.val[row] for idx in self.lindex]
549            #record = [idx.values[row].to_obj() for idx in self.lindex]
550            #record = [idx.valrow(row) for idx in self.lindex]
551        else:
552            record = [idx.values[row] for idx in self.lindex]
553        return [record[self.lname.index(name)] for name in indexname]

return the record at the row

Parameters

  • row : int - row of the record
  • extern : boolean (default True) - if True, return val record else value record
  • indexname : list of str (default None) - list of fields to return Returns

  • list : val record or value record

def recidx(self, row, extern=True):
555    def recidx(self, row, extern=True):
556        '''return the list of idx val or values at the row
557
558        *Parameters*
559
560        - **row** : int - row of the record
561        - **extern** : boolean (default True) - if True, return val rec else value rec
562
563        *Returns*
564
565        - **list** : val or value for idx'''
566        if extern:
567            return [idx.values[row].to_obj() for idx in self.lidx]
568            #return [idx.valrow(row) for idx in self.lidx]
569        return [idx.values[row] for idx in self.lidx]

return the list of idx val or values at the row

Parameters

  • row : int - row of the record
  • extern : boolean (default True) - if True, return val rec else value rec

Returns

  • list : val or value for idx
def recvar(self, row, extern=True):
571    def recvar(self, row, extern=True):
572        '''return the list of var val or values at the row
573
574        *Parameters*
575
576        - **row** : int - row of the record
577        - **extern** : boolean (default True) - if True, return val rec else value rec
578
579        *Returns*
580
581        - **list** : val or value for var'''
582        if extern:
583            return [idx.values[row].to_obj() for idx in self.lvar]
584            #return [idx.valrow(row) for idx in self.lvar]
585        return [idx.values[row] for idx in self.lvar]

return the list of var val or values at the row

Parameters

  • row : int - row of the record
  • extern : boolean (default True) - if True, return val rec else value rec

Returns

  • list : val or value for var
def reindex(self):
587    def reindex(self):
588        '''Calculate a new default codec for each index (Return self)'''
589        for idx in self.lindex:
590            idx.reindex()
591        return self

Calculate a new default codec for each index (Return self)

def renameindex(self, oldname, newname):
593    def renameindex(self, oldname, newname):
594        '''replace an index name 'oldname' by a new one 'newname'. '''
595        for i in range(self.lenindex):
596            if self.lname[i] == oldname:
597                self.lindex[i].setname(newname)
598        for i in range(len(self.lvarname)):
599            if self.lvarname[i] == oldname:
600                self.lvarname[i] = newname

replace an index name 'oldname' by a new one 'newname'.

def reorder(self, recorder=None):
602    def reorder(self, recorder=None):
603        '''Reorder records in the order define by 'recorder' '''
604        if recorder is None or set(recorder) != set(range(len(self))):
605            return None
606        for idx in self.lindex:
607            idx.set_keys([idx.keys[i] for i in recorder])
608        return None

Reorder records in the order define by 'recorder'

def setcanonorder(self, reindex=False):
610    def setcanonorder(self, reindex=False):
611        '''Set the canonical index order : primary - secondary/unique - variable.
612        Set the canonical keys order : ordered keys in the first columns.
613
614        *Parameters*
615        - **reindex** : boolean (default False) - if True, set default codec after
616        transformation
617
618        *Return* : self'''
619        order = self.primaryname
620        order += self.secondaryname
621        order += self.lvarname
622        order += self.lunicname
623        self.swapindex(order)
624        self.sort(reindex=reindex)
625        self.analysis.actualize()
626        return self

Set the canonical index order : primary - secondary/unique - variable. Set the canonical keys order : ordered keys in the first columns.

Parameters

  • reindex : boolean (default False) - if True, set default codec after transformation

Return : self

def setfilter(self, filt=None, first=False, filtname='$filter', unique=False):
628    def setfilter(self, filt=None, first=False, filtname=ES.filter, unique=False):
629        '''Add a filter index with boolean values
630
631        - **filt** : list of boolean - values of the filter idx to add
632        - **first** : boolean (default False) - If True insert index at the first row,
633        else at the end
634        - **filtname** : string (default ES.filter) - Name of the filter Field added
635
636        *Returns* : self'''
637        if not filt:
638            filt = [True] * len(self)
639        idx = self.field(filt, name=filtname)
640        idx.reindex()
641        if not idx.cod in ([True, False], [False, True], [True], [False]):
642            raise DatasetError('filt is not consistent')
643        if unique:
644            for name in self.lname:
645                if name[:len(ES.filter)] == ES.filter:
646                    self.delindex(ES.filter)
647        self.addindex(idx, first=first)
648        return self

Add a filter index with boolean values

  • filt : list of boolean - values of the filter idx to add
  • first : boolean (default False) - If True insert index at the first row, else at the end
  • filtname : string (default ES.filter) - Name of the filter Field added

Returns : self

def setname(self, listname=None):
650    def setname(self, listname=None):
651        '''Update Field name by the name in listname'''
652        for i in range(min(self.lenindex, len(listname))):
653            self.lindex[i].name = listname[i]
654        self.analysis.actualize()

Update Field name by the name in listname

def sort(self, order=None, reverse=False, func=<class 'str'>, reindex=True):
656    def sort(self, order=None, reverse=False, func=str, reindex=True):
657        '''Sort data following the index order and apply the ascending or descending
658        sort function to values.
659
660        *Parameters*
661
662        - **order** : list (default None)- new order of index to apply. If None or [],
663        the sort function is applied to the existing order of indexes.
664        - **reverse** : boolean (default False)- ascending if True, descending if False
665        - **func**    : function (default str) - parameter key used in the sorted function
666        - **reindex** : boolean (default True) - if True, apply a new codec order (key = func)
667
668        *Returns* : self'''
669        if not order:
670            order = list(range(self.lenindex))
671        orderfull = order + list(set(range(self.lenindex)) - set(order))
672        if reindex:
673            for i in order:
674                self.lindex[i].reindex(codec=sorted(
675                    self.lindex[i].codec, key=func))
676        newidx = util.transpose(sorted(util.transpose(
677            [self.lindex[orderfull[i]].keys for i in range(self.lenindex)]),
678            reverse=reverse))
679        for i in range(self.lenindex):
680            self.lindex[orderfull[i]].set_keys(newidx[i])
681        return self

Sort data following the index order and apply the ascending or descending sort function to values.

Parameters

  • order : list (default None)- new order of index to apply. If None or [], the sort function is applied to the existing order of indexes.
  • reverse : boolean (default False)- ascending if True, descending if False
  • func : function (default str) - parameter key used in the sorted function
  • reindex : boolean (default True) - if True, apply a new codec order (key = func)

Returns : self

def swapindex(self, order):
683    def swapindex(self, order):
684        '''
685        Change the order of the index .
686
687        *Parameters*
688
689        - **order** : list of int or list of name - new order of index to apply.
690
691        *Returns* : self '''
692        if self.lenindex != len(order):
693            raise DatasetError('length of order and Dataset different')
694        if not order or isinstance(order[0], int):
695            self.lindex = [self.lindex[ind] for ind in order]
696        elif isinstance(order[0], str):
697            self.lindex = [self.nindex(name) for name in order]
698        return self

Change the order of the index .

Parameters

  • order : list of int or list of name - new order of index to apply.

Returns : self

def tostdcodec(self, inplace=False, full=True):
700    def tostdcodec(self, inplace=False, full=True):
701        '''Transform all codec in full or default codec.
702
703        *Parameters*
704
705        - **inplace** : boolean  (default False) - if True apply transformation
706        to self, else to a new Dataset
707        - **full** : boolean (default True)- full codec if True, default if False
708
709
710        *Return Dataset* : self or new Dataset'''
711        lindex = [idx.tostdcodec(inplace=False, full=full)
712                  for idx in self.lindex]
713        if inplace:
714            self.lindex = lindex
715            return self
716        return self.__class__(lindex, self.lvarname)

Transform all codec in full or default codec.

Parameters

  • inplace : boolean (default False) - if True apply transformation to self, else to a new Dataset
  • full : boolean (default True)- full codec if True, default if False

Return Dataset : self or new Dataset

def tree(self, mode='derived', width=5, lname=20, string=True):
718    def tree(self, mode='derived', width=5, lname=20, string=True):
719        '''return a string with a tree of derived Field.
720
721         *Parameters*
722
723        - **lname** : integer (default 20) - length of the names        
724        - **width** : integer (default 5) - length of the lines        
725        - **mode** : string (default 'derived') - kind of tree :
726            'derived' : derived tree
727            'distance': min distance tree
728            'diff': min dist rate tree
729        '''
730        return self.analysis.tree(width=width, lname=lname, mode=mode, string=string)

return a string with a tree of derived Field.

Parameters

  • lname : integer (default 20) - length of the names
  • width : integer (default 5) - length of the lines
  • mode : string (default 'derived') - kind of tree : 'derived' : derived tree 'distance': min distance tree 'diff': min dist rate tree
def updateindex(self, listvalue, index, extern=True):
732    def updateindex(self, listvalue, index, extern=True):
733        '''update values of an index.
734
735        *Parameters*
736
737        - **listvalue** : list - index values to replace
738        - **index** : integer - index row to update
739        - **extern** : if True, the listvalue has external representation, else internal
740
741        *Returns* : none '''
742        self.lindex[index].setlistvalue(listvalue, extern=extern)

update values of an index.

Parameters

  • listvalue : list - index values to replace
  • index : integer - index row to update
  • extern : if True, the listvalue has external representation, else internal

Returns : none

def valtokey(self, rec, extern=True):
744    def valtokey(self, rec, extern=True):
745        '''convert a record list (value or val for each idx) to a key list 
746        (key for each index).
747
748        *Parameters*
749
750        - **rec** : list of value or val for each index
751        - **extern** : if True, the rec value has external representation, else internal
752
753        *Returns*
754
755        - **list of int** : record key for each index'''
756        return [idx.valtokey(val, extern=extern) for idx, val in zip(self.lindex, rec)]

convert a record list (value or val for each idx) to a key list (key for each index).

Parameters

  • rec : list of value or val for each index
  • extern : if True, the rec value has external representation, else internal

Returns

  • list of int : record key for each index