python.observation.dataset_structure
Created on Sun Oct 2 22:24:59 2022
@author: philippe@loco-labs.io
The python.observation.dataset_structure
module contains the DatasetStructure
class
(python.observation.dataset.Dataset
methods).
1# -*- coding: utf-8 -*- 2""" 3Created on Sun Oct 2 22:24:59 2022 4 5@author: philippe@loco-labs.io 6 7The `python.observation.dataset_structure` module contains the `DatasetStructure` class 8(`python.observation.dataset.Dataset` methods). 9""" 10 11# %% declarations 12from copy import copy 13 14from observation.esconstante import ES 15from observation.field import Field 16from observation.util import util 17from observation.dataset_interface import DatasetError 18from observation.fields import Sfield 19 20 21class DatasetStructure: 22 '''this class includes Dataset methods : 23 24 *selecting - infos methods* 25 26 - `DatasetStructure.couplingmatrix` 27 - `DatasetStructure.idxrecord` 28 - `DatasetStructure.indexinfos` 29 - `DatasetStructure.indicator` 30 - `DatasetStructure.iscanonorder` 31 - `DatasetStructure.isinrecord` 32 - `DatasetStructure.keytoval` 33 - `DatasetStructure.loc` 34 - `DatasetStructure.nindex` 35 - `DatasetStructure.record` 36 - `DatasetStructure.recidx` 37 - `DatasetStructure.recvar` 38 - `DatasetStructure.tree` 39 - `DatasetStructure.valtokey` 40 41 *add - update methods* 42 43 - `DatasetStructure.add` 44 - `DatasetStructure.addindex` 45 - `DatasetStructure.append` 46 - `DatasetStructure.delindex` 47 - `DatasetStructure.delrecord` 48 - `DatasetStructure.orindex` 49 - `DatasetStructure.renameindex` 50 - `DatasetStructure.setvar` 51 - `DatasetStructure.setname` 52 - `DatasetStructure.updateindex` 53 54 *structure management - methods* 55 56 - `DatasetStructure.applyfilter` 57 - `DatasetStructure.coupling` 58 - `DatasetStructure.full` 59 - `DatasetStructure.getduplicates` 60 - `DatasetStructure.mix` 61 - `DatasetStructure.merging` 62 - `DatasetStructure.reindex` 63 - `DatasetStructure.reorder` 64 - `DatasetStructure.setfilter` 65 - `DatasetStructure.sort` 66 - `DatasetStructure.swapindex` 67 - `DatasetStructure.setcanonorder` 68 - `DatasetStructure.tostdcodec` 69 ''' 70 # %% methods 71 72 def add(self, other, name=False, solve=True): 73 ''' Add other's values to self's values for each index 74 75 *Parameters* 76 77 - **other** : Dataset object to add to self object 78 - **name** : Boolean (default False) - Add values with same index name (True) or 79 same index row (False) 80 - **solve** : Boolean (default True) - If True, replace None other's codec value 81 with self codec value. 82 83 *Returns* : self ''' 84 if self.lenindex != other.lenindex: 85 raise DatasetError('length are not identical') 86 if name and sorted(self.lname) != sorted(other.lname): 87 raise DatasetError('name are not identical') 88 for i in range(self.lenindex): 89 if name: 90 self.lindex[i].add(other.lindex[other.lname.index(self.lname[i])], 91 solve=solve) 92 else: 93 self.lindex[i].add(other.lindex[i], solve=solve) 94 return self 95 96 def addindex(self, index, first=False, merge=False, update=False): 97 '''add a new index. 98 99 *Parameters* 100 101 - **index** : Field - index to add (can be index Ntv representation) 102 - **first** : If True insert index at the first row, else at the end 103 - **merge** : create a new index if merge is False 104 - **update** : if True, update actual values if index name is present (and merge is True) 105 106 *Returns* : none ''' 107 idx = self.field.ntv(index) 108 idxname = self.lname 109 if len(idx) != len(self) and len(self) > 0: 110 raise DatasetError('sizes are different') 111 if not idx.name in idxname: 112 if first: 113 self.lindex.insert(0, idx) 114 else: 115 self.lindex.append(idx) 116 elif not merge: # si idx.name in idxname 117 while idx.name in idxname: 118 idx.name += '(2)' 119 if first: 120 self.lindex.insert(0, idx) 121 else: 122 self.lindex.append(idx) 123 elif update: # si merge et si idx.name in idxname 124 self.lindex[idxname.index(idx.name)].setlistvalue(idx.values) 125 126 def append(self, record, unique=False): 127 '''add a new record. 128 129 *Parameters* 130 131 - **record** : list of new index values to add to Dataset 132 - **unique** : boolean (default False) - Append isn't done if unique 133 is True and record present 134 135 *Returns* : list - key record''' 136 if self.lenindex != len(record): 137 raise DatasetError('len(record) not consistent') 138 record = self.field.l_to_i(record) 139 if self.isinrecord(self.idxrecord(record), False) and unique: 140 return None 141 return [self.lindex[i].append(record[i]) for i in range(self.lenindex)] 142 143 def applyfilter(self, reverse=False, filtname=ES.filter, delfilter=True, inplace=True): 144 '''delete records with defined filter value. 145 Filter is deleted after record filtering. 146 147 *Parameters* 148 149 - **reverse** : boolean (default False) - delete record with filter's 150 value is reverse 151 - **filtname** : string (default ES.filter) - Name of the filter Field added 152 - **delfilter** : boolean (default True) - If True, delete filter's Field 153 - **inplace** : boolean (default True) - if True, filter is apply to self, 154 155 *Returns* : self or new Dataset''' 156 if not filtname in self.lname: 157 return None 158 if inplace: 159 ilis = self 160 else: 161 ilis = copy(self) 162 ifilt = ilis.lname.index(filtname) 163 if self.field.__name__ == 'Sfield': 164 ilis.sort([ifilt], reverse= not reverse, func=None) 165 else: 166 ilis.sort([ifilt], reverse=reverse, func=None) 167 lisind = ilis.lindex[ifilt].recordfromvalue(reverse) 168 if lisind: 169 minind = min(lisind) 170 for idx in ilis.lindex: 171 del idx.keys[minind:] 172 if inplace: 173 self.delindex(filtname) 174 else: 175 ilis.delindex(filtname) 176 if delfilter: 177 self.delindex(filtname) 178 ilis.reindex() 179 return ilis 180 181 def couplingmatrix(self, default=False, filename=None, att='ratecpl'): 182 '''return a matrix with coupling infos between each idx. 183 One info can be stored in a file (csv format). 184 185 *Parameters* 186 187 - **default** : comparison with default codec 188 - **filename** : string (default None) - name of the file to write the matrix 189 - **att** : string - name of the info to store in the file 190 191 *Returns* : array of array of dict''' 192 return self.analysis.getmatrix() 193 194 def coupling(self, derived=True, param='rateder', level=0.1): 195 '''Transform idx with low rate in coupled or derived indexes (codec extension). 196 197 *Parameters* 198 199 - **param** : string (default 'rateder') - coupling measurement 200 ('rateder', 'diffdistparent', 'ratecpl', 'distance') 201 - **level** : float (default 0.1) - param threshold to apply coupling. 202 - **derived** : boolean (default : True). If True, indexes are derived, 203 else coupled. 204 205 *Returns* : None''' 206 infos = self.indexinfos() 207 parent = {'rateder': 'distparent', 'diffdistparent': 'distparent', 208 'ratecpl': 'minparent', 'distance': 'minparent'} 209 child = [None] * len(infos) 210 for idx in range(len(infos)): 211 iparent = infos[idx][parent[param]] 212 if iparent != -1: 213 if child[iparent] is None: 214 child[iparent] = [] 215 child[iparent].append(idx) 216 for idx in range(len(infos)): 217 self._couplingidx(idx, child, derived, param, 218 parent[param], level, infos) 219 220 def _couplingidx(self, idx, child, derived, param, parentparam, level, infos): 221 ''' Field coupling (included childrens of the Field)''' 222 inf = infos[idx] 223 if inf['cat'] in ('coupled', 'unique') or inf[parentparam] == -1\ 224 or inf[param] >= level or (derived and inf['cat'] == 'derived'): 225 return 226 if child[idx]: 227 for childidx in child[idx]: 228 self._couplingidx(childidx, child, derived, 229 param, parentparam, level, infos) 230 self.lindex[inf[parentparam]].coupling(self.lindex[idx], derived=derived, 231 duplicate=False) 232 return 233 234 def delrecord(self, record, extern=True): 235 '''remove a record. 236 237 *Parameters* 238 239 - **record** : list - index values to remove to Dataset 240 - **extern** : if True, compare record values to external representation 241 of self.value, else, internal 242 243 *Returns* : row deleted''' 244 self.reindex() 245 reckeys = self.valtokey(record, extern=extern) 246 if None in reckeys: 247 return None 248 row = self.tiindex.index(reckeys) 249 for idx in self: 250 del idx[row] 251 return row 252 253 def delindex(self, delname=None, savename=None): 254 '''remove an Field or a list of Field. 255 256 *Parameters* 257 258 - **delname** : string or list of string - name of index to remove 259 - **savename** : string or list of string - name of index to keep 260 261 *Returns* : none ''' 262 if not delname and not savename : 263 return 264 if isinstance(delname, str): 265 delname = [delname] 266 if isinstance(savename, str): 267 savename = [savename] 268 if delname and savename: 269 delname = [name for name in delname if not name in savename] 270 if not delname: 271 delname = [name for name in self.lname if not name in savename] 272 for idxname in delname: 273 if idxname in self.lname: 274 self.lindex.pop(self.lname.index(idxname)) 275 276 def _fullindex(self, ind, keysadd, indexname, varname, leng, fillvalue, fillextern): 277 if not varname: 278 varname = [] 279 idx = self.lindex[ind] 280 lenadd = len(keysadd[0]) 281 if len(idx) == leng: 282 return 283 inf = self.indexinfos() 284 if inf[ind]['cat'] == 'unique': 285 idx.set_keys(idx.keys + [0] * lenadd) 286 elif self.lname[ind] in indexname: 287 idx.set_keys(idx.keys + keysadd[indexname.index(self.lname[ind])]) 288 elif inf[ind]['parent'] == -1 or self.lname[ind] in varname: 289 fillval = fillvalue 290 if fillextern: 291 fillval = self.field.s_to_i(fillvalue) 292 idx.set_keys(idx.keys + [len(idx.codec)] * len(keysadd[0])) 293 idx.set_codec(idx.codec + [fillval]) 294 else: 295 parent = inf[ind]['parent'] 296 if len(self.lindex[parent]) != leng: 297 self._fullindex(parent, keysadd, indexname, varname, leng, 298 fillvalue, fillextern) 299 if inf[ind]['cat'] == 'coupled': 300 idx.tocoupled(self.lindex[parent], coupling=True) 301 else: 302 idx.tocoupled(self.lindex[parent], coupling=False) 303 304 def full(self, reindex=False, idxname=None, varname=None, fillvalue='-', 305 fillextern=True, inplace=True, complete=True): 306 '''tranform a list of indexes in crossed indexes (value extension). 307 308 *Parameters* 309 310 - **idxname** : list of string - name of indexes to transform 311 - **varname** : string - name of indexes to use 312 - **reindex** : boolean (default False) - if True, set default codec 313 before transformation 314 - **fillvalue** : object value used for var extension 315 - **fillextern** : boolean(default True) - if True, fillvalue is converted 316 to internal value 317 - **inplace** : boolean (default True) - if True, filter is apply to self, 318 - **complete** : boolean (default True) - if True, Field are ordered 319 in canonical order 320 321 *Returns* : self or new Dataset''' 322 ilis = self if inplace else copy(self) 323 if not idxname: 324 idxname = ilis.primaryname 325 if reindex: 326 ilis.reindex() 327 keysadd = util.idxfull([ilis.nindex(name) for name in idxname]) 328 if keysadd and len(keysadd) != 0: 329 lenadd = len(keysadd[0]) 330 for ind in range(ilis.lenindex): 331 ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd, 332 fillvalue, fillextern) 333 '''if not keysadd or len(keysadd) == 0: 334 return ilis 335 lenadd = len(keysadd[0]) 336 for ind in range(ilis.lenindex): 337 ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd, 338 fillvalue, fillextern) ''' 339 if complete: 340 ilis.setcanonorder() 341 return ilis 342 343 def getduplicates(self, indexname=None, resindex=None, indexview=None): 344 '''check duplicate cod in a list of indexes. Result is add in a new 345 index or returned. 346 347 *Parameters* 348 349 - **indexname** : list of string (default none) - name of indexes to check 350 (if None, all Field) 351 - **resindex** : string (default None) - Add a new index named resindex 352 with check result (False if duplicate) 353 - **indexview** : list of str (default None) - list of fields to return 354 355 *Returns* : list of int - list of rows with duplicate cod ''' 356 if not indexname: 357 indexname = self.lname 358 duplicates = [] 359 for name in indexname: 360 duplicates += self.nindex(name).getduplicates() 361 if resindex and isinstance(resindex, str): 362 newidx = self.field([True] * len(self), name=resindex) 363 for item in duplicates: 364 newidx[item] = False 365 self.addindex(newidx) 366 dupl = tuple(set(duplicates)) 367 if not indexview: 368 return dupl 369 return [tuple(self.record(ind, indexview)) for ind in dupl] 370 371 def iscanonorder(self): 372 '''return True if primary indexes have canonical ordered keys''' 373 primary = self.primary 374 canonorder = util.canonorder( 375 [len(self.lidx[idx].codec) for idx in primary]) 376 return canonorder == [self.lidx[idx].keys for idx in primary] 377 378 def isinrecord(self, record, extern=True): 379 '''Check if record is present in self. 380 381 *Parameters* 382 383 - **record** : list - value for each Field 384 - **extern** : if True, compare record values to external representation 385 of self.value, else, internal 386 387 *Returns boolean* : True if found''' 388 if extern: 389 return record in util.transpose(self.extidxext) 390 return record in util.transpose(self.extidx) 391 392 def idxrecord(self, record): 393 '''return rec array (without variable) from complete record (with variable)''' 394 return [record[self.lidxrow[i]] for i in range(len(self.lidxrow))] 395 396 def indexinfos(self, keys=None): 397 '''return a dict with infos of each index : 398 - num, name, cat, diffdistparent, child, parent, distparent, 399 crossed, pparent, rateder (struct info) 400 - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info) 401 402 *Parameters* 403 404 - **keys** : string, list or tuple (default None) - list of attributes 405 to returned. 406 if 'all' or None, all attributes are returned. 407 if 'struct', only structural attributes are returned. 408 409 *Returns* : dict''' 410 return self.analysis.getinfos(keys) 411 412 def indicator(self, fullsize=None, size=None): 413 '''generate size indicators: ol (object lightness), ul (unicity level), 414 gain (sizegain) 415 416 *Parameters* 417 418 - **fullsize** : int (default none) - size with full codec 419 - **size** : int (default none) - size with existing codec 420 - **indexinfos** : list (default None) - indexinfos data 421 422 *Returns* : dict''' 423 if not fullsize: 424 fullsize = len(self.to_obj(encoded=True, modecodec='full')) 425 if not size: 426 size = len(self.to_obj(encoded=True)) 427 nval = len(self) * (self.lenindex + 1) 428 sval = fullsize / nval 429 ncod = sum(self.indexlen) + self.lenindex 430 if nval != ncod: 431 scod = (size - ncod * sval) / (nval - ncod) 432 olight = scod / sval 433 else: 434 olight = None 435 return {'total values': nval, 'mean size': round(sval, 3), 436 'unique values': ncod, 'mean coding size': round(scod, 3), 437 'unicity level': round(ncod / nval, 3), 438 'optimize level': round(size / fullsize, 3), 439 'object lightness': round(olight, 3), 440 'maxgain': round((nval - ncod) / nval, 3), 441 'gain': round((fullsize - size) / fullsize, 3)} 442 443 def keytoval(self, listkey, extern=True): 444 ''' 445 convert a keys list (key for each index) to a values list (value for each index). 446 447 *Parameters* 448 449 - **listkey** : key for each index 450 - **extern** : boolean (default True) - if True, compare rec to val else to values 451 452 *Returns* 453 454 - **list** : value for each index''' 455 return [idx.keytoval(key, extern=extern) for idx, key in zip(self.lindex, listkey)] 456 457 def loc(self, rec, extern=True, row=False): 458 ''' 459 Return record or row corresponding to a list of idx values. 460 461 *Parameters* 462 463 - **rec** : list - value for each idx 464 - **extern** : boolean (default True) - if True, compare rec to val, 465 else to values 466 - **row** : Boolean (default False) - if True, return list of row, 467 else list of records 468 469 *Returns* 470 471 - **object** : variable value or None if not found''' 472 locrow = None 473 try: 474 if len(rec) == self.lenindex: 475 locrow = list(set.intersection(*[set(self.lindex[i].loc(rec[i], extern)) 476 for i in range(self.lenindex)])) 477 elif len(rec) == self.lenidx: 478 locrow = list(set.intersection(*[set(self.lidx[i].loc(rec[i], extern)) 479 for i in range(self.lenidx)])) 480 except: 481 pass 482 if locrow is None: 483 return None 484 if row: 485 return locrow 486 return [self.record(locr, extern=extern) for locr in locrow] 487 488 def mix(self, other, fillvalue=None): 489 '''add other Field not included in self and add other's values''' 490 sname = set(self.lname) 491 oname = set(other.lname) 492 newself = copy(self) 493 copother = copy(other) 494 for nam in oname - sname: 495 newself.addindex({nam: [fillvalue] * len(newself)}) 496 for nam in sname - oname: 497 copother.addindex({nam: [fillvalue] * len(copother)}) 498 return newself.add(copother, name=True, solve=False) 499 500 def merging(self, listname=None): 501 ''' add a new Field build with Field define in listname. 502 Values of the new Field are set of values in listname Field''' 503 self.addindex(Field.merging([self.nindex(name) for name in listname])) 504 505 def nindex(self, name): 506 ''' index with name equal to attribute name''' 507 if name in self.lname: 508 return self.lindex[self.lname.index(name)] 509 return None 510 511 def orindex(self, other, first=False, merge=False, update=False): 512 ''' Add other's index to self's index (with same length) 513 514 *Parameters* 515 516 - **other** : self class - object to add 517 - **first** : Boolean (default False) - If True insert indexes 518 at the first row, else at the end 519 - **merge** : Boolean (default False) - create a new index 520 if merge is False 521 - **update** : Boolean (default False) - if True, update actual 522 values if index name is present (and merge is True) 523 524 *Returns* : none ''' 525 if len(self) != 0 and len(self) != len(other) and len(other) != 0: 526 raise DatasetError("the sizes are not equal") 527 otherc = copy(other) 528 for idx in otherc.lindex: 529 self.addindex(idx, first=first, merge=merge, update=update) 530 return self 531 532 def record(self, row, indexname=None, extern=True): 533 '''return the record at the row 534 535 *Parameters* 536 537 - **row** : int - row of the record 538 - **extern** : boolean (default True) - if True, return val record else 539 value record 540 - **indexname** : list of str (default None) - list of fields to return 541 *Returns* 542 543 - **list** : val record or value record''' 544 if indexname is None: 545 indexname = self.lname 546 if extern: 547 record = [idx.val[row] for idx in self.lindex] 548 #record = [idx.values[row].to_obj() for idx in self.lindex] 549 #record = [idx.valrow(row) for idx in self.lindex] 550 else: 551 record = [idx.values[row] for idx in self.lindex] 552 return [record[self.lname.index(name)] for name in indexname] 553 554 def recidx(self, row, extern=True): 555 '''return the list of idx val or values at the row 556 557 *Parameters* 558 559 - **row** : int - row of the record 560 - **extern** : boolean (default True) - if True, return val rec else value rec 561 562 *Returns* 563 564 - **list** : val or value for idx''' 565 if extern: 566 return [idx.values[row].to_obj() for idx in self.lidx] 567 #return [idx.valrow(row) for idx in self.lidx] 568 return [idx.values[row] for idx in self.lidx] 569 570 def recvar(self, row, extern=True): 571 '''return the list of var val or values at the row 572 573 *Parameters* 574 575 - **row** : int - row of the record 576 - **extern** : boolean (default True) - if True, return val rec else value rec 577 578 *Returns* 579 580 - **list** : val or value for var''' 581 if extern: 582 return [idx.values[row].to_obj() for idx in self.lvar] 583 #return [idx.valrow(row) for idx in self.lvar] 584 return [idx.values[row] for idx in self.lvar] 585 586 def reindex(self): 587 '''Calculate a new default codec for each index (Return self)''' 588 for idx in self.lindex: 589 idx.reindex() 590 return self 591 592 def renameindex(self, oldname, newname): 593 '''replace an index name 'oldname' by a new one 'newname'. ''' 594 for i in range(self.lenindex): 595 if self.lname[i] == oldname: 596 self.lindex[i].setname(newname) 597 for i in range(len(self.lvarname)): 598 if self.lvarname[i] == oldname: 599 self.lvarname[i] = newname 600 601 def reorder(self, recorder=None): 602 '''Reorder records in the order define by 'recorder' ''' 603 if recorder is None or set(recorder) != set(range(len(self))): 604 return None 605 for idx in self.lindex: 606 idx.set_keys([idx.keys[i] for i in recorder]) 607 return None 608 609 def setcanonorder(self, reindex=False): 610 '''Set the canonical index order : primary - secondary/unique - variable. 611 Set the canonical keys order : ordered keys in the first columns. 612 613 *Parameters* 614 - **reindex** : boolean (default False) - if True, set default codec after 615 transformation 616 617 *Return* : self''' 618 order = self.primaryname 619 order += self.secondaryname 620 order += self.lvarname 621 order += self.lunicname 622 self.swapindex(order) 623 self.sort(reindex=reindex) 624 self.analysis.actualize() 625 return self 626 627 def setfilter(self, filt=None, first=False, filtname=ES.filter, unique=False): 628 '''Add a filter index with boolean values 629 630 - **filt** : list of boolean - values of the filter idx to add 631 - **first** : boolean (default False) - If True insert index at the first row, 632 else at the end 633 - **filtname** : string (default ES.filter) - Name of the filter Field added 634 635 *Returns* : self''' 636 if not filt: 637 filt = [True] * len(self) 638 idx = self.field(filt, name=filtname) 639 idx.reindex() 640 if not idx.cod in ([True, False], [False, True], [True], [False]): 641 raise DatasetError('filt is not consistent') 642 if unique: 643 for name in self.lname: 644 if name[:len(ES.filter)] == ES.filter: 645 self.delindex(ES.filter) 646 self.addindex(idx, first=first) 647 return self 648 649 def setname(self, listname=None): 650 '''Update Field name by the name in listname''' 651 for i in range(min(self.lenindex, len(listname))): 652 self.lindex[i].name = listname[i] 653 self.analysis.actualize() 654 655 def sort(self, order=None, reverse=False, func=str, reindex=True): 656 '''Sort data following the index order and apply the ascending or descending 657 sort function to values. 658 659 *Parameters* 660 661 - **order** : list (default None)- new order of index to apply. If None or [], 662 the sort function is applied to the existing order of indexes. 663 - **reverse** : boolean (default False)- ascending if True, descending if False 664 - **func** : function (default str) - parameter key used in the sorted function 665 - **reindex** : boolean (default True) - if True, apply a new codec order (key = func) 666 667 *Returns* : self''' 668 if not order: 669 order = list(range(self.lenindex)) 670 orderfull = order + list(set(range(self.lenindex)) - set(order)) 671 if reindex: 672 for i in order: 673 self.lindex[i].reindex(codec=sorted( 674 self.lindex[i].codec, key=func)) 675 newidx = util.transpose(sorted(util.transpose( 676 [self.lindex[orderfull[i]].keys for i in range(self.lenindex)]), 677 reverse=reverse)) 678 for i in range(self.lenindex): 679 self.lindex[orderfull[i]].set_keys(newidx[i]) 680 return self 681 682 def swapindex(self, order): 683 ''' 684 Change the order of the index . 685 686 *Parameters* 687 688 - **order** : list of int or list of name - new order of index to apply. 689 690 *Returns* : self ''' 691 if self.lenindex != len(order): 692 raise DatasetError('length of order and Dataset different') 693 if not order or isinstance(order[0], int): 694 self.lindex = [self.lindex[ind] for ind in order] 695 elif isinstance(order[0], str): 696 self.lindex = [self.nindex(name) for name in order] 697 return self 698 699 def tostdcodec(self, inplace=False, full=True): 700 '''Transform all codec in full or default codec. 701 702 *Parameters* 703 704 - **inplace** : boolean (default False) - if True apply transformation 705 to self, else to a new Dataset 706 - **full** : boolean (default True)- full codec if True, default if False 707 708 709 *Return Dataset* : self or new Dataset''' 710 lindex = [idx.tostdcodec(inplace=False, full=full) 711 for idx in self.lindex] 712 if inplace: 713 self.lindex = lindex 714 return self 715 return self.__class__(lindex, self.lvarname) 716 717 def tree(self, mode='derived', width=5, lname=20, string=True): 718 '''return a string with a tree of derived Field. 719 720 *Parameters* 721 722 - **lname** : integer (default 20) - length of the names 723 - **width** : integer (default 5) - length of the lines 724 - **mode** : string (default 'derived') - kind of tree : 725 'derived' : derived tree 726 'distance': min distance tree 727 'diff': min dist rate tree 728 ''' 729 return self.analysis.tree(width=width, lname=lname, mode=mode, string=string) 730 731 def updateindex(self, listvalue, index, extern=True): 732 '''update values of an index. 733 734 *Parameters* 735 736 - **listvalue** : list - index values to replace 737 - **index** : integer - index row to update 738 - **extern** : if True, the listvalue has external representation, else internal 739 740 *Returns* : none ''' 741 self.lindex[index].setlistvalue(listvalue, extern=extern) 742 743 def valtokey(self, rec, extern=True): 744 '''convert a record list (value or val for each idx) to a key list 745 (key for each index). 746 747 *Parameters* 748 749 - **rec** : list of value or val for each index 750 - **extern** : if True, the rec value has external representation, else internal 751 752 *Returns* 753 754 - **list of int** : record key for each index''' 755 return [idx.valtokey(val, extern=extern) for idx, val in zip(self.lindex, rec)]
22class DatasetStructure: 23 '''this class includes Dataset methods : 24 25 *selecting - infos methods* 26 27 - `DatasetStructure.couplingmatrix` 28 - `DatasetStructure.idxrecord` 29 - `DatasetStructure.indexinfos` 30 - `DatasetStructure.indicator` 31 - `DatasetStructure.iscanonorder` 32 - `DatasetStructure.isinrecord` 33 - `DatasetStructure.keytoval` 34 - `DatasetStructure.loc` 35 - `DatasetStructure.nindex` 36 - `DatasetStructure.record` 37 - `DatasetStructure.recidx` 38 - `DatasetStructure.recvar` 39 - `DatasetStructure.tree` 40 - `DatasetStructure.valtokey` 41 42 *add - update methods* 43 44 - `DatasetStructure.add` 45 - `DatasetStructure.addindex` 46 - `DatasetStructure.append` 47 - `DatasetStructure.delindex` 48 - `DatasetStructure.delrecord` 49 - `DatasetStructure.orindex` 50 - `DatasetStructure.renameindex` 51 - `DatasetStructure.setvar` 52 - `DatasetStructure.setname` 53 - `DatasetStructure.updateindex` 54 55 *structure management - methods* 56 57 - `DatasetStructure.applyfilter` 58 - `DatasetStructure.coupling` 59 - `DatasetStructure.full` 60 - `DatasetStructure.getduplicates` 61 - `DatasetStructure.mix` 62 - `DatasetStructure.merging` 63 - `DatasetStructure.reindex` 64 - `DatasetStructure.reorder` 65 - `DatasetStructure.setfilter` 66 - `DatasetStructure.sort` 67 - `DatasetStructure.swapindex` 68 - `DatasetStructure.setcanonorder` 69 - `DatasetStructure.tostdcodec` 70 ''' 71 # %% methods 72 73 def add(self, other, name=False, solve=True): 74 ''' Add other's values to self's values for each index 75 76 *Parameters* 77 78 - **other** : Dataset object to add to self object 79 - **name** : Boolean (default False) - Add values with same index name (True) or 80 same index row (False) 81 - **solve** : Boolean (default True) - If True, replace None other's codec value 82 with self codec value. 83 84 *Returns* : self ''' 85 if self.lenindex != other.lenindex: 86 raise DatasetError('length are not identical') 87 if name and sorted(self.lname) != sorted(other.lname): 88 raise DatasetError('name are not identical') 89 for i in range(self.lenindex): 90 if name: 91 self.lindex[i].add(other.lindex[other.lname.index(self.lname[i])], 92 solve=solve) 93 else: 94 self.lindex[i].add(other.lindex[i], solve=solve) 95 return self 96 97 def addindex(self, index, first=False, merge=False, update=False): 98 '''add a new index. 99 100 *Parameters* 101 102 - **index** : Field - index to add (can be index Ntv representation) 103 - **first** : If True insert index at the first row, else at the end 104 - **merge** : create a new index if merge is False 105 - **update** : if True, update actual values if index name is present (and merge is True) 106 107 *Returns* : none ''' 108 idx = self.field.ntv(index) 109 idxname = self.lname 110 if len(idx) != len(self) and len(self) > 0: 111 raise DatasetError('sizes are different') 112 if not idx.name in idxname: 113 if first: 114 self.lindex.insert(0, idx) 115 else: 116 self.lindex.append(idx) 117 elif not merge: # si idx.name in idxname 118 while idx.name in idxname: 119 idx.name += '(2)' 120 if first: 121 self.lindex.insert(0, idx) 122 else: 123 self.lindex.append(idx) 124 elif update: # si merge et si idx.name in idxname 125 self.lindex[idxname.index(idx.name)].setlistvalue(idx.values) 126 127 def append(self, record, unique=False): 128 '''add a new record. 129 130 *Parameters* 131 132 - **record** : list of new index values to add to Dataset 133 - **unique** : boolean (default False) - Append isn't done if unique 134 is True and record present 135 136 *Returns* : list - key record''' 137 if self.lenindex != len(record): 138 raise DatasetError('len(record) not consistent') 139 record = self.field.l_to_i(record) 140 if self.isinrecord(self.idxrecord(record), False) and unique: 141 return None 142 return [self.lindex[i].append(record[i]) for i in range(self.lenindex)] 143 144 def applyfilter(self, reverse=False, filtname=ES.filter, delfilter=True, inplace=True): 145 '''delete records with defined filter value. 146 Filter is deleted after record filtering. 147 148 *Parameters* 149 150 - **reverse** : boolean (default False) - delete record with filter's 151 value is reverse 152 - **filtname** : string (default ES.filter) - Name of the filter Field added 153 - **delfilter** : boolean (default True) - If True, delete filter's Field 154 - **inplace** : boolean (default True) - if True, filter is apply to self, 155 156 *Returns* : self or new Dataset''' 157 if not filtname in self.lname: 158 return None 159 if inplace: 160 ilis = self 161 else: 162 ilis = copy(self) 163 ifilt = ilis.lname.index(filtname) 164 if self.field.__name__ == 'Sfield': 165 ilis.sort([ifilt], reverse= not reverse, func=None) 166 else: 167 ilis.sort([ifilt], reverse=reverse, func=None) 168 lisind = ilis.lindex[ifilt].recordfromvalue(reverse) 169 if lisind: 170 minind = min(lisind) 171 for idx in ilis.lindex: 172 del idx.keys[minind:] 173 if inplace: 174 self.delindex(filtname) 175 else: 176 ilis.delindex(filtname) 177 if delfilter: 178 self.delindex(filtname) 179 ilis.reindex() 180 return ilis 181 182 def couplingmatrix(self, default=False, filename=None, att='ratecpl'): 183 '''return a matrix with coupling infos between each idx. 184 One info can be stored in a file (csv format). 185 186 *Parameters* 187 188 - **default** : comparison with default codec 189 - **filename** : string (default None) - name of the file to write the matrix 190 - **att** : string - name of the info to store in the file 191 192 *Returns* : array of array of dict''' 193 return self.analysis.getmatrix() 194 195 def coupling(self, derived=True, param='rateder', level=0.1): 196 '''Transform idx with low rate in coupled or derived indexes (codec extension). 197 198 *Parameters* 199 200 - **param** : string (default 'rateder') - coupling measurement 201 ('rateder', 'diffdistparent', 'ratecpl', 'distance') 202 - **level** : float (default 0.1) - param threshold to apply coupling. 203 - **derived** : boolean (default : True). If True, indexes are derived, 204 else coupled. 205 206 *Returns* : None''' 207 infos = self.indexinfos() 208 parent = {'rateder': 'distparent', 'diffdistparent': 'distparent', 209 'ratecpl': 'minparent', 'distance': 'minparent'} 210 child = [None] * len(infos) 211 for idx in range(len(infos)): 212 iparent = infos[idx][parent[param]] 213 if iparent != -1: 214 if child[iparent] is None: 215 child[iparent] = [] 216 child[iparent].append(idx) 217 for idx in range(len(infos)): 218 self._couplingidx(idx, child, derived, param, 219 parent[param], level, infos) 220 221 def _couplingidx(self, idx, child, derived, param, parentparam, level, infos): 222 ''' Field coupling (included childrens of the Field)''' 223 inf = infos[idx] 224 if inf['cat'] in ('coupled', 'unique') or inf[parentparam] == -1\ 225 or inf[param] >= level or (derived and inf['cat'] == 'derived'): 226 return 227 if child[idx]: 228 for childidx in child[idx]: 229 self._couplingidx(childidx, child, derived, 230 param, parentparam, level, infos) 231 self.lindex[inf[parentparam]].coupling(self.lindex[idx], derived=derived, 232 duplicate=False) 233 return 234 235 def delrecord(self, record, extern=True): 236 '''remove a record. 237 238 *Parameters* 239 240 - **record** : list - index values to remove to Dataset 241 - **extern** : if True, compare record values to external representation 242 of self.value, else, internal 243 244 *Returns* : row deleted''' 245 self.reindex() 246 reckeys = self.valtokey(record, extern=extern) 247 if None in reckeys: 248 return None 249 row = self.tiindex.index(reckeys) 250 for idx in self: 251 del idx[row] 252 return row 253 254 def delindex(self, delname=None, savename=None): 255 '''remove an Field or a list of Field. 256 257 *Parameters* 258 259 - **delname** : string or list of string - name of index to remove 260 - **savename** : string or list of string - name of index to keep 261 262 *Returns* : none ''' 263 if not delname and not savename : 264 return 265 if isinstance(delname, str): 266 delname = [delname] 267 if isinstance(savename, str): 268 savename = [savename] 269 if delname and savename: 270 delname = [name for name in delname if not name in savename] 271 if not delname: 272 delname = [name for name in self.lname if not name in savename] 273 for idxname in delname: 274 if idxname in self.lname: 275 self.lindex.pop(self.lname.index(idxname)) 276 277 def _fullindex(self, ind, keysadd, indexname, varname, leng, fillvalue, fillextern): 278 if not varname: 279 varname = [] 280 idx = self.lindex[ind] 281 lenadd = len(keysadd[0]) 282 if len(idx) == leng: 283 return 284 inf = self.indexinfos() 285 if inf[ind]['cat'] == 'unique': 286 idx.set_keys(idx.keys + [0] * lenadd) 287 elif self.lname[ind] in indexname: 288 idx.set_keys(idx.keys + keysadd[indexname.index(self.lname[ind])]) 289 elif inf[ind]['parent'] == -1 or self.lname[ind] in varname: 290 fillval = fillvalue 291 if fillextern: 292 fillval = self.field.s_to_i(fillvalue) 293 idx.set_keys(idx.keys + [len(idx.codec)] * len(keysadd[0])) 294 idx.set_codec(idx.codec + [fillval]) 295 else: 296 parent = inf[ind]['parent'] 297 if len(self.lindex[parent]) != leng: 298 self._fullindex(parent, keysadd, indexname, varname, leng, 299 fillvalue, fillextern) 300 if inf[ind]['cat'] == 'coupled': 301 idx.tocoupled(self.lindex[parent], coupling=True) 302 else: 303 idx.tocoupled(self.lindex[parent], coupling=False) 304 305 def full(self, reindex=False, idxname=None, varname=None, fillvalue='-', 306 fillextern=True, inplace=True, complete=True): 307 '''tranform a list of indexes in crossed indexes (value extension). 308 309 *Parameters* 310 311 - **idxname** : list of string - name of indexes to transform 312 - **varname** : string - name of indexes to use 313 - **reindex** : boolean (default False) - if True, set default codec 314 before transformation 315 - **fillvalue** : object value used for var extension 316 - **fillextern** : boolean(default True) - if True, fillvalue is converted 317 to internal value 318 - **inplace** : boolean (default True) - if True, filter is apply to self, 319 - **complete** : boolean (default True) - if True, Field are ordered 320 in canonical order 321 322 *Returns* : self or new Dataset''' 323 ilis = self if inplace else copy(self) 324 if not idxname: 325 idxname = ilis.primaryname 326 if reindex: 327 ilis.reindex() 328 keysadd = util.idxfull([ilis.nindex(name) for name in idxname]) 329 if keysadd and len(keysadd) != 0: 330 lenadd = len(keysadd[0]) 331 for ind in range(ilis.lenindex): 332 ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd, 333 fillvalue, fillextern) 334 '''if not keysadd or len(keysadd) == 0: 335 return ilis 336 lenadd = len(keysadd[0]) 337 for ind in range(ilis.lenindex): 338 ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd, 339 fillvalue, fillextern) ''' 340 if complete: 341 ilis.setcanonorder() 342 return ilis 343 344 def getduplicates(self, indexname=None, resindex=None, indexview=None): 345 '''check duplicate cod in a list of indexes. Result is add in a new 346 index or returned. 347 348 *Parameters* 349 350 - **indexname** : list of string (default none) - name of indexes to check 351 (if None, all Field) 352 - **resindex** : string (default None) - Add a new index named resindex 353 with check result (False if duplicate) 354 - **indexview** : list of str (default None) - list of fields to return 355 356 *Returns* : list of int - list of rows with duplicate cod ''' 357 if not indexname: 358 indexname = self.lname 359 duplicates = [] 360 for name in indexname: 361 duplicates += self.nindex(name).getduplicates() 362 if resindex and isinstance(resindex, str): 363 newidx = self.field([True] * len(self), name=resindex) 364 for item in duplicates: 365 newidx[item] = False 366 self.addindex(newidx) 367 dupl = tuple(set(duplicates)) 368 if not indexview: 369 return dupl 370 return [tuple(self.record(ind, indexview)) for ind in dupl] 371 372 def iscanonorder(self): 373 '''return True if primary indexes have canonical ordered keys''' 374 primary = self.primary 375 canonorder = util.canonorder( 376 [len(self.lidx[idx].codec) for idx in primary]) 377 return canonorder == [self.lidx[idx].keys for idx in primary] 378 379 def isinrecord(self, record, extern=True): 380 '''Check if record is present in self. 381 382 *Parameters* 383 384 - **record** : list - value for each Field 385 - **extern** : if True, compare record values to external representation 386 of self.value, else, internal 387 388 *Returns boolean* : True if found''' 389 if extern: 390 return record in util.transpose(self.extidxext) 391 return record in util.transpose(self.extidx) 392 393 def idxrecord(self, record): 394 '''return rec array (without variable) from complete record (with variable)''' 395 return [record[self.lidxrow[i]] for i in range(len(self.lidxrow))] 396 397 def indexinfos(self, keys=None): 398 '''return a dict with infos of each index : 399 - num, name, cat, diffdistparent, child, parent, distparent, 400 crossed, pparent, rateder (struct info) 401 - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info) 402 403 *Parameters* 404 405 - **keys** : string, list or tuple (default None) - list of attributes 406 to returned. 407 if 'all' or None, all attributes are returned. 408 if 'struct', only structural attributes are returned. 409 410 *Returns* : dict''' 411 return self.analysis.getinfos(keys) 412 413 def indicator(self, fullsize=None, size=None): 414 '''generate size indicators: ol (object lightness), ul (unicity level), 415 gain (sizegain) 416 417 *Parameters* 418 419 - **fullsize** : int (default none) - size with full codec 420 - **size** : int (default none) - size with existing codec 421 - **indexinfos** : list (default None) - indexinfos data 422 423 *Returns* : dict''' 424 if not fullsize: 425 fullsize = len(self.to_obj(encoded=True, modecodec='full')) 426 if not size: 427 size = len(self.to_obj(encoded=True)) 428 nval = len(self) * (self.lenindex + 1) 429 sval = fullsize / nval 430 ncod = sum(self.indexlen) + self.lenindex 431 if nval != ncod: 432 scod = (size - ncod * sval) / (nval - ncod) 433 olight = scod / sval 434 else: 435 olight = None 436 return {'total values': nval, 'mean size': round(sval, 3), 437 'unique values': ncod, 'mean coding size': round(scod, 3), 438 'unicity level': round(ncod / nval, 3), 439 'optimize level': round(size / fullsize, 3), 440 'object lightness': round(olight, 3), 441 'maxgain': round((nval - ncod) / nval, 3), 442 'gain': round((fullsize - size) / fullsize, 3)} 443 444 def keytoval(self, listkey, extern=True): 445 ''' 446 convert a keys list (key for each index) to a values list (value for each index). 447 448 *Parameters* 449 450 - **listkey** : key for each index 451 - **extern** : boolean (default True) - if True, compare rec to val else to values 452 453 *Returns* 454 455 - **list** : value for each index''' 456 return [idx.keytoval(key, extern=extern) for idx, key in zip(self.lindex, listkey)] 457 458 def loc(self, rec, extern=True, row=False): 459 ''' 460 Return record or row corresponding to a list of idx values. 461 462 *Parameters* 463 464 - **rec** : list - value for each idx 465 - **extern** : boolean (default True) - if True, compare rec to val, 466 else to values 467 - **row** : Boolean (default False) - if True, return list of row, 468 else list of records 469 470 *Returns* 471 472 - **object** : variable value or None if not found''' 473 locrow = None 474 try: 475 if len(rec) == self.lenindex: 476 locrow = list(set.intersection(*[set(self.lindex[i].loc(rec[i], extern)) 477 for i in range(self.lenindex)])) 478 elif len(rec) == self.lenidx: 479 locrow = list(set.intersection(*[set(self.lidx[i].loc(rec[i], extern)) 480 for i in range(self.lenidx)])) 481 except: 482 pass 483 if locrow is None: 484 return None 485 if row: 486 return locrow 487 return [self.record(locr, extern=extern) for locr in locrow] 488 489 def mix(self, other, fillvalue=None): 490 '''add other Field not included in self and add other's values''' 491 sname = set(self.lname) 492 oname = set(other.lname) 493 newself = copy(self) 494 copother = copy(other) 495 for nam in oname - sname: 496 newself.addindex({nam: [fillvalue] * len(newself)}) 497 for nam in sname - oname: 498 copother.addindex({nam: [fillvalue] * len(copother)}) 499 return newself.add(copother, name=True, solve=False) 500 501 def merging(self, listname=None): 502 ''' add a new Field build with Field define in listname. 503 Values of the new Field are set of values in listname Field''' 504 self.addindex(Field.merging([self.nindex(name) for name in listname])) 505 506 def nindex(self, name): 507 ''' index with name equal to attribute name''' 508 if name in self.lname: 509 return self.lindex[self.lname.index(name)] 510 return None 511 512 def orindex(self, other, first=False, merge=False, update=False): 513 ''' Add other's index to self's index (with same length) 514 515 *Parameters* 516 517 - **other** : self class - object to add 518 - **first** : Boolean (default False) - If True insert indexes 519 at the first row, else at the end 520 - **merge** : Boolean (default False) - create a new index 521 if merge is False 522 - **update** : Boolean (default False) - if True, update actual 523 values if index name is present (and merge is True) 524 525 *Returns* : none ''' 526 if len(self) != 0 and len(self) != len(other) and len(other) != 0: 527 raise DatasetError("the sizes are not equal") 528 otherc = copy(other) 529 for idx in otherc.lindex: 530 self.addindex(idx, first=first, merge=merge, update=update) 531 return self 532 533 def record(self, row, indexname=None, extern=True): 534 '''return the record at the row 535 536 *Parameters* 537 538 - **row** : int - row of the record 539 - **extern** : boolean (default True) - if True, return val record else 540 value record 541 - **indexname** : list of str (default None) - list of fields to return 542 *Returns* 543 544 - **list** : val record or value record''' 545 if indexname is None: 546 indexname = self.lname 547 if extern: 548 record = [idx.val[row] for idx in self.lindex] 549 #record = [idx.values[row].to_obj() for idx in self.lindex] 550 #record = [idx.valrow(row) for idx in self.lindex] 551 else: 552 record = [idx.values[row] for idx in self.lindex] 553 return [record[self.lname.index(name)] for name in indexname] 554 555 def recidx(self, row, extern=True): 556 '''return the list of idx val or values at the row 557 558 *Parameters* 559 560 - **row** : int - row of the record 561 - **extern** : boolean (default True) - if True, return val rec else value rec 562 563 *Returns* 564 565 - **list** : val or value for idx''' 566 if extern: 567 return [idx.values[row].to_obj() for idx in self.lidx] 568 #return [idx.valrow(row) for idx in self.lidx] 569 return [idx.values[row] for idx in self.lidx] 570 571 def recvar(self, row, extern=True): 572 '''return the list of var val or values at the row 573 574 *Parameters* 575 576 - **row** : int - row of the record 577 - **extern** : boolean (default True) - if True, return val rec else value rec 578 579 *Returns* 580 581 - **list** : val or value for var''' 582 if extern: 583 return [idx.values[row].to_obj() for idx in self.lvar] 584 #return [idx.valrow(row) for idx in self.lvar] 585 return [idx.values[row] for idx in self.lvar] 586 587 def reindex(self): 588 '''Calculate a new default codec for each index (Return self)''' 589 for idx in self.lindex: 590 idx.reindex() 591 return self 592 593 def renameindex(self, oldname, newname): 594 '''replace an index name 'oldname' by a new one 'newname'. ''' 595 for i in range(self.lenindex): 596 if self.lname[i] == oldname: 597 self.lindex[i].setname(newname) 598 for i in range(len(self.lvarname)): 599 if self.lvarname[i] == oldname: 600 self.lvarname[i] = newname 601 602 def reorder(self, recorder=None): 603 '''Reorder records in the order define by 'recorder' ''' 604 if recorder is None or set(recorder) != set(range(len(self))): 605 return None 606 for idx in self.lindex: 607 idx.set_keys([idx.keys[i] for i in recorder]) 608 return None 609 610 def setcanonorder(self, reindex=False): 611 '''Set the canonical index order : primary - secondary/unique - variable. 612 Set the canonical keys order : ordered keys in the first columns. 613 614 *Parameters* 615 - **reindex** : boolean (default False) - if True, set default codec after 616 transformation 617 618 *Return* : self''' 619 order = self.primaryname 620 order += self.secondaryname 621 order += self.lvarname 622 order += self.lunicname 623 self.swapindex(order) 624 self.sort(reindex=reindex) 625 self.analysis.actualize() 626 return self 627 628 def setfilter(self, filt=None, first=False, filtname=ES.filter, unique=False): 629 '''Add a filter index with boolean values 630 631 - **filt** : list of boolean - values of the filter idx to add 632 - **first** : boolean (default False) - If True insert index at the first row, 633 else at the end 634 - **filtname** : string (default ES.filter) - Name of the filter Field added 635 636 *Returns* : self''' 637 if not filt: 638 filt = [True] * len(self) 639 idx = self.field(filt, name=filtname) 640 idx.reindex() 641 if not idx.cod in ([True, False], [False, True], [True], [False]): 642 raise DatasetError('filt is not consistent') 643 if unique: 644 for name in self.lname: 645 if name[:len(ES.filter)] == ES.filter: 646 self.delindex(ES.filter) 647 self.addindex(idx, first=first) 648 return self 649 650 def setname(self, listname=None): 651 '''Update Field name by the name in listname''' 652 for i in range(min(self.lenindex, len(listname))): 653 self.lindex[i].name = listname[i] 654 self.analysis.actualize() 655 656 def sort(self, order=None, reverse=False, func=str, reindex=True): 657 '''Sort data following the index order and apply the ascending or descending 658 sort function to values. 659 660 *Parameters* 661 662 - **order** : list (default None)- new order of index to apply. If None or [], 663 the sort function is applied to the existing order of indexes. 664 - **reverse** : boolean (default False)- ascending if True, descending if False 665 - **func** : function (default str) - parameter key used in the sorted function 666 - **reindex** : boolean (default True) - if True, apply a new codec order (key = func) 667 668 *Returns* : self''' 669 if not order: 670 order = list(range(self.lenindex)) 671 orderfull = order + list(set(range(self.lenindex)) - set(order)) 672 if reindex: 673 for i in order: 674 self.lindex[i].reindex(codec=sorted( 675 self.lindex[i].codec, key=func)) 676 newidx = util.transpose(sorted(util.transpose( 677 [self.lindex[orderfull[i]].keys for i in range(self.lenindex)]), 678 reverse=reverse)) 679 for i in range(self.lenindex): 680 self.lindex[orderfull[i]].set_keys(newidx[i]) 681 return self 682 683 def swapindex(self, order): 684 ''' 685 Change the order of the index . 686 687 *Parameters* 688 689 - **order** : list of int or list of name - new order of index to apply. 690 691 *Returns* : self ''' 692 if self.lenindex != len(order): 693 raise DatasetError('length of order and Dataset different') 694 if not order or isinstance(order[0], int): 695 self.lindex = [self.lindex[ind] for ind in order] 696 elif isinstance(order[0], str): 697 self.lindex = [self.nindex(name) for name in order] 698 return self 699 700 def tostdcodec(self, inplace=False, full=True): 701 '''Transform all codec in full or default codec. 702 703 *Parameters* 704 705 - **inplace** : boolean (default False) - if True apply transformation 706 to self, else to a new Dataset 707 - **full** : boolean (default True)- full codec if True, default if False 708 709 710 *Return Dataset* : self or new Dataset''' 711 lindex = [idx.tostdcodec(inplace=False, full=full) 712 for idx in self.lindex] 713 if inplace: 714 self.lindex = lindex 715 return self 716 return self.__class__(lindex, self.lvarname) 717 718 def tree(self, mode='derived', width=5, lname=20, string=True): 719 '''return a string with a tree of derived Field. 720 721 *Parameters* 722 723 - **lname** : integer (default 20) - length of the names 724 - **width** : integer (default 5) - length of the lines 725 - **mode** : string (default 'derived') - kind of tree : 726 'derived' : derived tree 727 'distance': min distance tree 728 'diff': min dist rate tree 729 ''' 730 return self.analysis.tree(width=width, lname=lname, mode=mode, string=string) 731 732 def updateindex(self, listvalue, index, extern=True): 733 '''update values of an index. 734 735 *Parameters* 736 737 - **listvalue** : list - index values to replace 738 - **index** : integer - index row to update 739 - **extern** : if True, the listvalue has external representation, else internal 740 741 *Returns* : none ''' 742 self.lindex[index].setlistvalue(listvalue, extern=extern) 743 744 def valtokey(self, rec, extern=True): 745 '''convert a record list (value or val for each idx) to a key list 746 (key for each index). 747 748 *Parameters* 749 750 - **rec** : list of value or val for each index 751 - **extern** : if True, the rec value has external representation, else internal 752 753 *Returns* 754 755 - **list of int** : record key for each index''' 756 return [idx.valtokey(val, extern=extern) for idx, val in zip(self.lindex, rec)]
this class includes Dataset methods :
selecting - infos methods
DatasetStructure.couplingmatrix
DatasetStructure.idxrecord
DatasetStructure.indexinfos
DatasetStructure.indicator
DatasetStructure.iscanonorder
DatasetStructure.isinrecord
DatasetStructure.keytoval
DatasetStructure.loc
DatasetStructure.nindex
DatasetStructure.record
DatasetStructure.recidx
DatasetStructure.recvar
DatasetStructure.tree
DatasetStructure.valtokey
add - update methods
DatasetStructure.add
DatasetStructure.addindex
DatasetStructure.append
DatasetStructure.delindex
DatasetStructure.delrecord
DatasetStructure.orindex
DatasetStructure.renameindex
DatasetStructure.setvar
DatasetStructure.setname
DatasetStructure.updateindex
structure management - methods
DatasetStructure.applyfilter
DatasetStructure.coupling
DatasetStructure.full
DatasetStructure.getduplicates
DatasetStructure.mix
DatasetStructure.merging
DatasetStructure.reindex
DatasetStructure.reorder
DatasetStructure.setfilter
DatasetStructure.sort
DatasetStructure.swapindex
DatasetStructure.setcanonorder
DatasetStructure.tostdcodec
73 def add(self, other, name=False, solve=True): 74 ''' Add other's values to self's values for each index 75 76 *Parameters* 77 78 - **other** : Dataset object to add to self object 79 - **name** : Boolean (default False) - Add values with same index name (True) or 80 same index row (False) 81 - **solve** : Boolean (default True) - If True, replace None other's codec value 82 with self codec value. 83 84 *Returns* : self ''' 85 if self.lenindex != other.lenindex: 86 raise DatasetError('length are not identical') 87 if name and sorted(self.lname) != sorted(other.lname): 88 raise DatasetError('name are not identical') 89 for i in range(self.lenindex): 90 if name: 91 self.lindex[i].add(other.lindex[other.lname.index(self.lname[i])], 92 solve=solve) 93 else: 94 self.lindex[i].add(other.lindex[i], solve=solve) 95 return self
Add other's values to self's values for each index
Parameters
- other : Dataset object to add to self object
- name : Boolean (default False) - Add values with same index name (True) or same index row (False)
- solve : Boolean (default True) - If True, replace None other's codec value with self codec value.
Returns : self
97 def addindex(self, index, first=False, merge=False, update=False): 98 '''add a new index. 99 100 *Parameters* 101 102 - **index** : Field - index to add (can be index Ntv representation) 103 - **first** : If True insert index at the first row, else at the end 104 - **merge** : create a new index if merge is False 105 - **update** : if True, update actual values if index name is present (and merge is True) 106 107 *Returns* : none ''' 108 idx = self.field.ntv(index) 109 idxname = self.lname 110 if len(idx) != len(self) and len(self) > 0: 111 raise DatasetError('sizes are different') 112 if not idx.name in idxname: 113 if first: 114 self.lindex.insert(0, idx) 115 else: 116 self.lindex.append(idx) 117 elif not merge: # si idx.name in idxname 118 while idx.name in idxname: 119 idx.name += '(2)' 120 if first: 121 self.lindex.insert(0, idx) 122 else: 123 self.lindex.append(idx) 124 elif update: # si merge et si idx.name in idxname 125 self.lindex[idxname.index(idx.name)].setlistvalue(idx.values)
add a new index.
Parameters
- index : Field - index to add (can be index Ntv representation)
- first : If True insert index at the first row, else at the end
- merge : create a new index if merge is False
- update : if True, update actual values if index name is present (and merge is True)
Returns : none
127 def append(self, record, unique=False): 128 '''add a new record. 129 130 *Parameters* 131 132 - **record** : list of new index values to add to Dataset 133 - **unique** : boolean (default False) - Append isn't done if unique 134 is True and record present 135 136 *Returns* : list - key record''' 137 if self.lenindex != len(record): 138 raise DatasetError('len(record) not consistent') 139 record = self.field.l_to_i(record) 140 if self.isinrecord(self.idxrecord(record), False) and unique: 141 return None 142 return [self.lindex[i].append(record[i]) for i in range(self.lenindex)]
add a new record.
Parameters
- record : list of new index values to add to Dataset
- unique : boolean (default False) - Append isn't done if unique is True and record present
Returns : list - key record
144 def applyfilter(self, reverse=False, filtname=ES.filter, delfilter=True, inplace=True): 145 '''delete records with defined filter value. 146 Filter is deleted after record filtering. 147 148 *Parameters* 149 150 - **reverse** : boolean (default False) - delete record with filter's 151 value is reverse 152 - **filtname** : string (default ES.filter) - Name of the filter Field added 153 - **delfilter** : boolean (default True) - If True, delete filter's Field 154 - **inplace** : boolean (default True) - if True, filter is apply to self, 155 156 *Returns* : self or new Dataset''' 157 if not filtname in self.lname: 158 return None 159 if inplace: 160 ilis = self 161 else: 162 ilis = copy(self) 163 ifilt = ilis.lname.index(filtname) 164 if self.field.__name__ == 'Sfield': 165 ilis.sort([ifilt], reverse= not reverse, func=None) 166 else: 167 ilis.sort([ifilt], reverse=reverse, func=None) 168 lisind = ilis.lindex[ifilt].recordfromvalue(reverse) 169 if lisind: 170 minind = min(lisind) 171 for idx in ilis.lindex: 172 del idx.keys[minind:] 173 if inplace: 174 self.delindex(filtname) 175 else: 176 ilis.delindex(filtname) 177 if delfilter: 178 self.delindex(filtname) 179 ilis.reindex() 180 return ilis
delete records with defined filter value. Filter is deleted after record filtering.
Parameters
- reverse : boolean (default False) - delete record with filter's value is reverse
- filtname : string (default ES.filter) - Name of the filter Field added
- delfilter : boolean (default True) - If True, delete filter's Field
- inplace : boolean (default True) - if True, filter is apply to self,
Returns : self or new Dataset
182 def couplingmatrix(self, default=False, filename=None, att='ratecpl'): 183 '''return a matrix with coupling infos between each idx. 184 One info can be stored in a file (csv format). 185 186 *Parameters* 187 188 - **default** : comparison with default codec 189 - **filename** : string (default None) - name of the file to write the matrix 190 - **att** : string - name of the info to store in the file 191 192 *Returns* : array of array of dict''' 193 return self.analysis.getmatrix()
return a matrix with coupling infos between each idx. One info can be stored in a file (csv format).
Parameters
- default : comparison with default codec
- filename : string (default None) - name of the file to write the matrix
- att : string - name of the info to store in the file
Returns : array of array of dict
195 def coupling(self, derived=True, param='rateder', level=0.1): 196 '''Transform idx with low rate in coupled or derived indexes (codec extension). 197 198 *Parameters* 199 200 - **param** : string (default 'rateder') - coupling measurement 201 ('rateder', 'diffdistparent', 'ratecpl', 'distance') 202 - **level** : float (default 0.1) - param threshold to apply coupling. 203 - **derived** : boolean (default : True). If True, indexes are derived, 204 else coupled. 205 206 *Returns* : None''' 207 infos = self.indexinfos() 208 parent = {'rateder': 'distparent', 'diffdistparent': 'distparent', 209 'ratecpl': 'minparent', 'distance': 'minparent'} 210 child = [None] * len(infos) 211 for idx in range(len(infos)): 212 iparent = infos[idx][parent[param]] 213 if iparent != -1: 214 if child[iparent] is None: 215 child[iparent] = [] 216 child[iparent].append(idx) 217 for idx in range(len(infos)): 218 self._couplingidx(idx, child, derived, param, 219 parent[param], level, infos)
Transform idx with low rate in coupled or derived indexes (codec extension).
Parameters
- param : string (default 'rateder') - coupling measurement ('rateder', 'diffdistparent', 'ratecpl', 'distance')
- level : float (default 0.1) - param threshold to apply coupling.
- derived : boolean (default : True). If True, indexes are derived, else coupled.
Returns : None
235 def delrecord(self, record, extern=True): 236 '''remove a record. 237 238 *Parameters* 239 240 - **record** : list - index values to remove to Dataset 241 - **extern** : if True, compare record values to external representation 242 of self.value, else, internal 243 244 *Returns* : row deleted''' 245 self.reindex() 246 reckeys = self.valtokey(record, extern=extern) 247 if None in reckeys: 248 return None 249 row = self.tiindex.index(reckeys) 250 for idx in self: 251 del idx[row] 252 return row
remove a record.
Parameters
- record : list - index values to remove to Dataset
- extern : if True, compare record values to external representation of self.value, else, internal
Returns : row deleted
254 def delindex(self, delname=None, savename=None): 255 '''remove an Field or a list of Field. 256 257 *Parameters* 258 259 - **delname** : string or list of string - name of index to remove 260 - **savename** : string or list of string - name of index to keep 261 262 *Returns* : none ''' 263 if not delname and not savename : 264 return 265 if isinstance(delname, str): 266 delname = [delname] 267 if isinstance(savename, str): 268 savename = [savename] 269 if delname and savename: 270 delname = [name for name in delname if not name in savename] 271 if not delname: 272 delname = [name for name in self.lname if not name in savename] 273 for idxname in delname: 274 if idxname in self.lname: 275 self.lindex.pop(self.lname.index(idxname))
remove an Field or a list of Field.
Parameters
- delname : string or list of string - name of index to remove
- savename : string or list of string - name of index to keep
Returns : none
305 def full(self, reindex=False, idxname=None, varname=None, fillvalue='-', 306 fillextern=True, inplace=True, complete=True): 307 '''tranform a list of indexes in crossed indexes (value extension). 308 309 *Parameters* 310 311 - **idxname** : list of string - name of indexes to transform 312 - **varname** : string - name of indexes to use 313 - **reindex** : boolean (default False) - if True, set default codec 314 before transformation 315 - **fillvalue** : object value used for var extension 316 - **fillextern** : boolean(default True) - if True, fillvalue is converted 317 to internal value 318 - **inplace** : boolean (default True) - if True, filter is apply to self, 319 - **complete** : boolean (default True) - if True, Field are ordered 320 in canonical order 321 322 *Returns* : self or new Dataset''' 323 ilis = self if inplace else copy(self) 324 if not idxname: 325 idxname = ilis.primaryname 326 if reindex: 327 ilis.reindex() 328 keysadd = util.idxfull([ilis.nindex(name) for name in idxname]) 329 if keysadd and len(keysadd) != 0: 330 lenadd = len(keysadd[0]) 331 for ind in range(ilis.lenindex): 332 ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd, 333 fillvalue, fillextern) 334 '''if not keysadd or len(keysadd) == 0: 335 return ilis 336 lenadd = len(keysadd[0]) 337 for ind in range(ilis.lenindex): 338 ilis._fullindex(ind, keysadd, idxname, varname, len(ilis) + lenadd, 339 fillvalue, fillextern) ''' 340 if complete: 341 ilis.setcanonorder() 342 return ilis
tranform a list of indexes in crossed indexes (value extension).
Parameters
- idxname : list of string - name of indexes to transform
- varname : string - name of indexes to use
- reindex : boolean (default False) - if True, set default codec before transformation
- fillvalue : object value used for var extension
- fillextern : boolean(default True) - if True, fillvalue is converted to internal value
- inplace : boolean (default True) - if True, filter is apply to self,
- complete : boolean (default True) - if True, Field are ordered in canonical order
Returns : self or new Dataset
344 def getduplicates(self, indexname=None, resindex=None, indexview=None): 345 '''check duplicate cod in a list of indexes. Result is add in a new 346 index or returned. 347 348 *Parameters* 349 350 - **indexname** : list of string (default none) - name of indexes to check 351 (if None, all Field) 352 - **resindex** : string (default None) - Add a new index named resindex 353 with check result (False if duplicate) 354 - **indexview** : list of str (default None) - list of fields to return 355 356 *Returns* : list of int - list of rows with duplicate cod ''' 357 if not indexname: 358 indexname = self.lname 359 duplicates = [] 360 for name in indexname: 361 duplicates += self.nindex(name).getduplicates() 362 if resindex and isinstance(resindex, str): 363 newidx = self.field([True] * len(self), name=resindex) 364 for item in duplicates: 365 newidx[item] = False 366 self.addindex(newidx) 367 dupl = tuple(set(duplicates)) 368 if not indexview: 369 return dupl 370 return [tuple(self.record(ind, indexview)) for ind in dupl]
check duplicate cod in a list of indexes. Result is add in a new index or returned.
Parameters
- indexname : list of string (default none) - name of indexes to check (if None, all Field)
- resindex : string (default None) - Add a new index named resindex with check result (False if duplicate)
- indexview : list of str (default None) - list of fields to return
Returns : list of int - list of rows with duplicate cod
372 def iscanonorder(self): 373 '''return True if primary indexes have canonical ordered keys''' 374 primary = self.primary 375 canonorder = util.canonorder( 376 [len(self.lidx[idx].codec) for idx in primary]) 377 return canonorder == [self.lidx[idx].keys for idx in primary]
return True if primary indexes have canonical ordered keys
379 def isinrecord(self, record, extern=True): 380 '''Check if record is present in self. 381 382 *Parameters* 383 384 - **record** : list - value for each Field 385 - **extern** : if True, compare record values to external representation 386 of self.value, else, internal 387 388 *Returns boolean* : True if found''' 389 if extern: 390 return record in util.transpose(self.extidxext) 391 return record in util.transpose(self.extidx)
Check if record is present in self.
Parameters
- record : list - value for each Field
- extern : if True, compare record values to external representation of self.value, else, internal
Returns boolean : True if found
393 def idxrecord(self, record): 394 '''return rec array (without variable) from complete record (with variable)''' 395 return [record[self.lidxrow[i]] for i in range(len(self.lidxrow))]
return rec array (without variable) from complete record (with variable)
397 def indexinfos(self, keys=None): 398 '''return a dict with infos of each index : 399 - num, name, cat, diffdistparent, child, parent, distparent, 400 crossed, pparent, rateder (struct info) 401 - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info) 402 403 *Parameters* 404 405 - **keys** : string, list or tuple (default None) - list of attributes 406 to returned. 407 if 'all' or None, all attributes are returned. 408 if 'struct', only structural attributes are returned. 409 410 *Returns* : dict''' 411 return self.analysis.getinfos(keys)
return a dict with infos of each index : - num, name, cat, diffdistparent, child, parent, distparent, crossed, pparent, rateder (struct info) - lencodec, mincodec, maxcodec, typecodec, ratecodec (base info)
Parameters
- keys : string, list or tuple (default None) - list of attributes to returned. if 'all' or None, all attributes are returned. if 'struct', only structural attributes are returned.
Returns : dict
413 def indicator(self, fullsize=None, size=None): 414 '''generate size indicators: ol (object lightness), ul (unicity level), 415 gain (sizegain) 416 417 *Parameters* 418 419 - **fullsize** : int (default none) - size with full codec 420 - **size** : int (default none) - size with existing codec 421 - **indexinfos** : list (default None) - indexinfos data 422 423 *Returns* : dict''' 424 if not fullsize: 425 fullsize = len(self.to_obj(encoded=True, modecodec='full')) 426 if not size: 427 size = len(self.to_obj(encoded=True)) 428 nval = len(self) * (self.lenindex + 1) 429 sval = fullsize / nval 430 ncod = sum(self.indexlen) + self.lenindex 431 if nval != ncod: 432 scod = (size - ncod * sval) / (nval - ncod) 433 olight = scod / sval 434 else: 435 olight = None 436 return {'total values': nval, 'mean size': round(sval, 3), 437 'unique values': ncod, 'mean coding size': round(scod, 3), 438 'unicity level': round(ncod / nval, 3), 439 'optimize level': round(size / fullsize, 3), 440 'object lightness': round(olight, 3), 441 'maxgain': round((nval - ncod) / nval, 3), 442 'gain': round((fullsize - size) / fullsize, 3)}
generate size indicators: ol (object lightness), ul (unicity level), gain (sizegain)
Parameters
- fullsize : int (default none) - size with full codec
- size : int (default none) - size with existing codec
- indexinfos : list (default None) - indexinfos data
Returns : dict
444 def keytoval(self, listkey, extern=True): 445 ''' 446 convert a keys list (key for each index) to a values list (value for each index). 447 448 *Parameters* 449 450 - **listkey** : key for each index 451 - **extern** : boolean (default True) - if True, compare rec to val else to values 452 453 *Returns* 454 455 - **list** : value for each index''' 456 return [idx.keytoval(key, extern=extern) for idx, key in zip(self.lindex, listkey)]
convert a keys list (key for each index) to a values list (value for each index).
Parameters
- listkey : key for each index
- extern : boolean (default True) - if True, compare rec to val else to values
Returns
- list : value for each index
458 def loc(self, rec, extern=True, row=False): 459 ''' 460 Return record or row corresponding to a list of idx values. 461 462 *Parameters* 463 464 - **rec** : list - value for each idx 465 - **extern** : boolean (default True) - if True, compare rec to val, 466 else to values 467 - **row** : Boolean (default False) - if True, return list of row, 468 else list of records 469 470 *Returns* 471 472 - **object** : variable value or None if not found''' 473 locrow = None 474 try: 475 if len(rec) == self.lenindex: 476 locrow = list(set.intersection(*[set(self.lindex[i].loc(rec[i], extern)) 477 for i in range(self.lenindex)])) 478 elif len(rec) == self.lenidx: 479 locrow = list(set.intersection(*[set(self.lidx[i].loc(rec[i], extern)) 480 for i in range(self.lenidx)])) 481 except: 482 pass 483 if locrow is None: 484 return None 485 if row: 486 return locrow 487 return [self.record(locr, extern=extern) for locr in locrow]
Return record or row corresponding to a list of idx values.
Parameters
- rec : list - value for each idx
- extern : boolean (default True) - if True, compare rec to val, else to values
- row : Boolean (default False) - if True, return list of row, else list of records
Returns
- object : variable value or None if not found
489 def mix(self, other, fillvalue=None): 490 '''add other Field not included in self and add other's values''' 491 sname = set(self.lname) 492 oname = set(other.lname) 493 newself = copy(self) 494 copother = copy(other) 495 for nam in oname - sname: 496 newself.addindex({nam: [fillvalue] * len(newself)}) 497 for nam in sname - oname: 498 copother.addindex({nam: [fillvalue] * len(copother)}) 499 return newself.add(copother, name=True, solve=False)
add other Field not included in self and add other's values
501 def merging(self, listname=None): 502 ''' add a new Field build with Field define in listname. 503 Values of the new Field are set of values in listname Field''' 504 self.addindex(Field.merging([self.nindex(name) for name in listname]))
add a new Field build with Field define in listname. Values of the new Field are set of values in listname Field
506 def nindex(self, name): 507 ''' index with name equal to attribute name''' 508 if name in self.lname: 509 return self.lindex[self.lname.index(name)] 510 return None
index with name equal to attribute name
512 def orindex(self, other, first=False, merge=False, update=False): 513 ''' Add other's index to self's index (with same length) 514 515 *Parameters* 516 517 - **other** : self class - object to add 518 - **first** : Boolean (default False) - If True insert indexes 519 at the first row, else at the end 520 - **merge** : Boolean (default False) - create a new index 521 if merge is False 522 - **update** : Boolean (default False) - if True, update actual 523 values if index name is present (and merge is True) 524 525 *Returns* : none ''' 526 if len(self) != 0 and len(self) != len(other) and len(other) != 0: 527 raise DatasetError("the sizes are not equal") 528 otherc = copy(other) 529 for idx in otherc.lindex: 530 self.addindex(idx, first=first, merge=merge, update=update) 531 return self
Add other's index to self's index (with same length)
Parameters
- other : self class - object to add
- first : Boolean (default False) - If True insert indexes at the first row, else at the end
- merge : Boolean (default False) - create a new index if merge is False
- update : Boolean (default False) - if True, update actual values if index name is present (and merge is True)
Returns : none
533 def record(self, row, indexname=None, extern=True): 534 '''return the record at the row 535 536 *Parameters* 537 538 - **row** : int - row of the record 539 - **extern** : boolean (default True) - if True, return val record else 540 value record 541 - **indexname** : list of str (default None) - list of fields to return 542 *Returns* 543 544 - **list** : val record or value record''' 545 if indexname is None: 546 indexname = self.lname 547 if extern: 548 record = [idx.val[row] for idx in self.lindex] 549 #record = [idx.values[row].to_obj() for idx in self.lindex] 550 #record = [idx.valrow(row) for idx in self.lindex] 551 else: 552 record = [idx.values[row] for idx in self.lindex] 553 return [record[self.lname.index(name)] for name in indexname]
return the record at the row
Parameters
- row : int - row of the record
- extern : boolean (default True) - if True, return val record else value record
indexname : list of str (default None) - list of fields to return Returns
list : val record or value record
555 def recidx(self, row, extern=True): 556 '''return the list of idx val or values at the row 557 558 *Parameters* 559 560 - **row** : int - row of the record 561 - **extern** : boolean (default True) - if True, return val rec else value rec 562 563 *Returns* 564 565 - **list** : val or value for idx''' 566 if extern: 567 return [idx.values[row].to_obj() for idx in self.lidx] 568 #return [idx.valrow(row) for idx in self.lidx] 569 return [idx.values[row] for idx in self.lidx]
return the list of idx val or values at the row
Parameters
- row : int - row of the record
- extern : boolean (default True) - if True, return val rec else value rec
Returns
- list : val or value for idx
571 def recvar(self, row, extern=True): 572 '''return the list of var val or values at the row 573 574 *Parameters* 575 576 - **row** : int - row of the record 577 - **extern** : boolean (default True) - if True, return val rec else value rec 578 579 *Returns* 580 581 - **list** : val or value for var''' 582 if extern: 583 return [idx.values[row].to_obj() for idx in self.lvar] 584 #return [idx.valrow(row) for idx in self.lvar] 585 return [idx.values[row] for idx in self.lvar]
return the list of var val or values at the row
Parameters
- row : int - row of the record
- extern : boolean (default True) - if True, return val rec else value rec
Returns
- list : val or value for var
587 def reindex(self): 588 '''Calculate a new default codec for each index (Return self)''' 589 for idx in self.lindex: 590 idx.reindex() 591 return self
Calculate a new default codec for each index (Return self)
593 def renameindex(self, oldname, newname): 594 '''replace an index name 'oldname' by a new one 'newname'. ''' 595 for i in range(self.lenindex): 596 if self.lname[i] == oldname: 597 self.lindex[i].setname(newname) 598 for i in range(len(self.lvarname)): 599 if self.lvarname[i] == oldname: 600 self.lvarname[i] = newname
replace an index name 'oldname' by a new one 'newname'.
602 def reorder(self, recorder=None): 603 '''Reorder records in the order define by 'recorder' ''' 604 if recorder is None or set(recorder) != set(range(len(self))): 605 return None 606 for idx in self.lindex: 607 idx.set_keys([idx.keys[i] for i in recorder]) 608 return None
Reorder records in the order define by 'recorder'
610 def setcanonorder(self, reindex=False): 611 '''Set the canonical index order : primary - secondary/unique - variable. 612 Set the canonical keys order : ordered keys in the first columns. 613 614 *Parameters* 615 - **reindex** : boolean (default False) - if True, set default codec after 616 transformation 617 618 *Return* : self''' 619 order = self.primaryname 620 order += self.secondaryname 621 order += self.lvarname 622 order += self.lunicname 623 self.swapindex(order) 624 self.sort(reindex=reindex) 625 self.analysis.actualize() 626 return self
Set the canonical index order : primary - secondary/unique - variable. Set the canonical keys order : ordered keys in the first columns.
Parameters
- reindex : boolean (default False) - if True, set default codec after transformation
Return : self
628 def setfilter(self, filt=None, first=False, filtname=ES.filter, unique=False): 629 '''Add a filter index with boolean values 630 631 - **filt** : list of boolean - values of the filter idx to add 632 - **first** : boolean (default False) - If True insert index at the first row, 633 else at the end 634 - **filtname** : string (default ES.filter) - Name of the filter Field added 635 636 *Returns* : self''' 637 if not filt: 638 filt = [True] * len(self) 639 idx = self.field(filt, name=filtname) 640 idx.reindex() 641 if not idx.cod in ([True, False], [False, True], [True], [False]): 642 raise DatasetError('filt is not consistent') 643 if unique: 644 for name in self.lname: 645 if name[:len(ES.filter)] == ES.filter: 646 self.delindex(ES.filter) 647 self.addindex(idx, first=first) 648 return self
Add a filter index with boolean values
- filt : list of boolean - values of the filter idx to add
- first : boolean (default False) - If True insert index at the first row, else at the end
- filtname : string (default ES.filter) - Name of the filter Field added
Returns : self
650 def setname(self, listname=None): 651 '''Update Field name by the name in listname''' 652 for i in range(min(self.lenindex, len(listname))): 653 self.lindex[i].name = listname[i] 654 self.analysis.actualize()
Update Field name by the name in listname
656 def sort(self, order=None, reverse=False, func=str, reindex=True): 657 '''Sort data following the index order and apply the ascending or descending 658 sort function to values. 659 660 *Parameters* 661 662 - **order** : list (default None)- new order of index to apply. If None or [], 663 the sort function is applied to the existing order of indexes. 664 - **reverse** : boolean (default False)- ascending if True, descending if False 665 - **func** : function (default str) - parameter key used in the sorted function 666 - **reindex** : boolean (default True) - if True, apply a new codec order (key = func) 667 668 *Returns* : self''' 669 if not order: 670 order = list(range(self.lenindex)) 671 orderfull = order + list(set(range(self.lenindex)) - set(order)) 672 if reindex: 673 for i in order: 674 self.lindex[i].reindex(codec=sorted( 675 self.lindex[i].codec, key=func)) 676 newidx = util.transpose(sorted(util.transpose( 677 [self.lindex[orderfull[i]].keys for i in range(self.lenindex)]), 678 reverse=reverse)) 679 for i in range(self.lenindex): 680 self.lindex[orderfull[i]].set_keys(newidx[i]) 681 return self
Sort data following the index order and apply the ascending or descending sort function to values.
Parameters
- order : list (default None)- new order of index to apply. If None or [], the sort function is applied to the existing order of indexes.
- reverse : boolean (default False)- ascending if True, descending if False
- func : function (default str) - parameter key used in the sorted function
- reindex : boolean (default True) - if True, apply a new codec order (key = func)
Returns : self
683 def swapindex(self, order): 684 ''' 685 Change the order of the index . 686 687 *Parameters* 688 689 - **order** : list of int or list of name - new order of index to apply. 690 691 *Returns* : self ''' 692 if self.lenindex != len(order): 693 raise DatasetError('length of order and Dataset different') 694 if not order or isinstance(order[0], int): 695 self.lindex = [self.lindex[ind] for ind in order] 696 elif isinstance(order[0], str): 697 self.lindex = [self.nindex(name) for name in order] 698 return self
Change the order of the index .
Parameters
- order : list of int or list of name - new order of index to apply.
Returns : self
700 def tostdcodec(self, inplace=False, full=True): 701 '''Transform all codec in full or default codec. 702 703 *Parameters* 704 705 - **inplace** : boolean (default False) - if True apply transformation 706 to self, else to a new Dataset 707 - **full** : boolean (default True)- full codec if True, default if False 708 709 710 *Return Dataset* : self or new Dataset''' 711 lindex = [idx.tostdcodec(inplace=False, full=full) 712 for idx in self.lindex] 713 if inplace: 714 self.lindex = lindex 715 return self 716 return self.__class__(lindex, self.lvarname)
Transform all codec in full or default codec.
Parameters
- inplace : boolean (default False) - if True apply transformation to self, else to a new Dataset
- full : boolean (default True)- full codec if True, default if False
Return Dataset : self or new Dataset
718 def tree(self, mode='derived', width=5, lname=20, string=True): 719 '''return a string with a tree of derived Field. 720 721 *Parameters* 722 723 - **lname** : integer (default 20) - length of the names 724 - **width** : integer (default 5) - length of the lines 725 - **mode** : string (default 'derived') - kind of tree : 726 'derived' : derived tree 727 'distance': min distance tree 728 'diff': min dist rate tree 729 ''' 730 return self.analysis.tree(width=width, lname=lname, mode=mode, string=string)
return a string with a tree of derived Field.
Parameters
- lname : integer (default 20) - length of the names
- width : integer (default 5) - length of the lines
- mode : string (default 'derived') - kind of tree : 'derived' : derived tree 'distance': min distance tree 'diff': min dist rate tree
732 def updateindex(self, listvalue, index, extern=True): 733 '''update values of an index. 734 735 *Parameters* 736 737 - **listvalue** : list - index values to replace 738 - **index** : integer - index row to update 739 - **extern** : if True, the listvalue has external representation, else internal 740 741 *Returns* : none ''' 742 self.lindex[index].setlistvalue(listvalue, extern=extern)
update values of an index.
Parameters
- listvalue : list - index values to replace
- index : integer - index row to update
- extern : if True, the listvalue has external representation, else internal
Returns : none
744 def valtokey(self, rec, extern=True): 745 '''convert a record list (value or val for each idx) to a key list 746 (key for each index). 747 748 *Parameters* 749 750 - **rec** : list of value or val for each index 751 - **extern** : if True, the rec value has external representation, else internal 752 753 *Returns* 754 755 - **list of int** : record key for each index''' 756 return [idx.valtokey(val, extern=extern) for idx, val in zip(self.lindex, rec)]
convert a record list (value or val for each idx) to a key list (key for each index).
Parameters
- rec : list of value or val for each index
- extern : if True, the rec value has external representation, else internal
Returns
- list of int : record key for each index