tab-dataset.tab_dataset.cfield
The cfield module is part of the tab-dataset package.
It contains the classes Cfield, Cutil for Field entities.
For more information, see the user guide or the github repository.
1# -*- coding: utf-8 -*- 2""" 3The `cfield` module is part of the `tab-dataset` package. 4 5It contains the classes `Cfield`, `Cutil` for Field entities. 6 7For more information, see the 8[user guide](https://loco-philippe.github.io/tab-dataset/docs/user_guide.html) 9or the [github repository](https://github.com/loco-philippe/tab-dataset). 10""" 11 12from copy import copy 13from collections import defaultdict, Counter 14from itertools import product 15 16from json_ntv.ntv import Ntv 17from json_ntv.ntv_util import NtvUtil 18 19from tab_analysis.analysis import AnaRelation, AnaField 20 21 22@staticmethod 23def root(leng): 24 '''return the root Field''' 25 return Cfield(Cutil.identity(leng), 'root') 26 27 28def identity(*args, **kwargs): 29 '''return the same value as args or kwargs''' 30 if len(args) > 0: 31 return args[0] 32 if len(kwargs) > 0: 33 return kwargs[list(kwargs.keys())[0]] 34 return None 35 36 37class Cutil: 38 ''' common functions for Field and Dataset class''' 39 40 @staticmethod 41 def identity(leng): 42 '''return the root_field values''' 43 return list(range(leng)) 44 45 @staticmethod 46 def canonorder(lenidx): 47 '''return a list of crossed keys from a list of number of values''' 48 listrange = [range(lidx) for lidx in lenidx] 49 return Cutil.transpose(Cutil.list(list(product(*listrange)))) 50 51 @staticmethod 52 def default(values): 53 '''return default codec and keys from a list of values''' 54 codec = list(dict.fromkeys(values)) 55 dic = {codec[i]: i for i in range(len(codec))} 56 keys = [dic[val] for val in values] 57 return (codec, keys) 58 59 @staticmethod 60 def dist(key1, key2, distr=False): 61 '''return default coupling codec between two keys list and optionaly if 62 the relationship is distributed''' 63 if not key1 or not key2: 64 return 0 65 k1k2 = [tuple((v1, v2)) for v1, v2 in zip(key1, key2)] 66 dist = len(Cutil.tocodec(k1k2)) 67 if not distr: 68 return dist 69 distrib = False 70 if dist == (max(key1) + 1) * (max(key2) + 1): 71 distrib = max(Counter(k1k2).values()) == len(key1) // dist 72 # distrib = min(sum(map(lambda x: (x + i) % (max(a) + 1), a)) == sum(a) for i in range(1, max(a)+1)) 73 return [dist, distrib] 74 75 @staticmethod 76 def encode_coef(lis): 77 '''Generate a repetition coefficient for periodic list''' 78 if len(lis) < 2: 79 return 0 80 coef = 1 81 while coef != len(lis): 82 if lis[coef-1] != lis[coef]: 83 break 84 coef += 1 85 if (not len(lis) % (coef * (max(lis) + 1)) and 86 lis == Cutil.keysfromcoef(coef, max(lis) + 1, len(lis))): 87 return coef 88 return 0 89 90 @staticmethod 91 def funclist(value, func, *args, **kwargs): 92 '''return the function func applied to the object value with parameters args and kwargs''' 93 if func in (None, []): 94 return value 95 lis = [] 96 if not (isinstance(value, list) or value.__class__.__name__ in ['Cfield', 'Cdataset']): 97 listval = [value] 98 else: 99 listval = value 100 for val in listval: 101 try: 102 lis.append(val.func(*args, **kwargs)) 103 except: 104 try: 105 lis.append(func(val, *args, **kwargs)) 106 except: 107 try: 108 lis.append(listval.func(val, *args, **kwargs)) 109 except: 110 try: 111 lis.append(func(listval, val, *args, **kwargs)) 112 except: 113 raise FieldError("unable to apply func") 114 if len(lis) == 1: 115 return lis[0] 116 return lis 117 118 @staticmethod 119 def idxfull(setidx): 120 '''return additional keys for each index in the setidx list to have crossed setidx''' 121 setcodec = [set(idx.keys) for idx in setidx] 122 lenfull = Cutil.mul([len(codec) for codec in setcodec]) 123 if lenfull <= len(setidx[0]): 124 return [] 125 complet = Counter(list(product(*setcodec))) 126 complet.subtract( 127 Counter(Cutil.tuple(Cutil.transpose([idx.keys for idx in setidx])))) 128 keysadd = Cutil.transpose(Cutil.list(list(complet.elements()))) 129 if not keysadd: 130 return [] 131 return keysadd 132 133 @staticmethod 134 def idxlink(ref, lis): 135 ''' return a dict for each different tuple (ref value, lis value)''' 136 return dict(set(zip(ref, lis))) 137 #lis = set(util.tuple(util.transpose([ref, l2]))) 138 # if not len(lis) == len(set(ref)): 139 # return {} 140 # return dict(lis) 141 142 @staticmethod 143 def is_not_equal(value, tovalue=None, **kwargs): 144 ''' return True if value and tovalue are not equal''' 145 return value.__class__.__name__ != tovalue.__class__.__name__ or \ 146 value != tovalue 147 148 @staticmethod 149 def keysfromcoef(coef, period, leng=None): 150 ''' return a list of keys with periodic structure''' 151 if not leng: 152 leng = coef * period 153 return None if not (coef and period) else [(ind % (coef * period)) // coef 154 for ind in range(leng)] 155 156 @staticmethod 157 def keysfromderkeys(parentkeys, derkeys): 158 '''return keys from parent keys and derkeys 159 160 *Parameters* 161 162 - **parentkeys** : list of keys from parent 163 - **derkeys** : list of derived keys 164 165 *Returns* : list of keys''' 166 return [derkeys[pkey] for pkey in parentkeys] 167 168 @staticmethod 169 def list(tuplelists): 170 '''transform a list of tuples in a list of lists''' 171 return list(map(list, tuplelists)) 172 173 @staticmethod 174 def mul(values): 175 '''return the product of values in a list or tuple (math.prod)''' 176 mul = 1 177 for val in values: 178 mul *= val 179 return mul 180 181 @staticmethod 182 def reindex(oldkeys, oldcodec, newcodec): 183 '''new keys with new order of codec''' 184 dic = {newcodec[i]: i for i in range(len(newcodec))} 185 return [dic[oldcodec[key]] for key in oldkeys] 186 187 @staticmethod 188 def reorder(values, sort=None): 189 '''return a new values list following the order define by sort''' 190 if not sort: 191 return values 192 return [values[ind] for ind in sort] 193 194 @staticmethod 195 def resetidx(values): 196 '''return codec and keys from a list of values''' 197 codec = Cutil.tocodec(values) 198 return (codec, Cutil.tokeys(values, codec)) 199 200 @staticmethod 201 def tocodec(values, keys=None): 202 '''extract a list of unique values''' 203 if not keys: 204 # return list(set(values)) 205 return list(dict.fromkeys(values)) 206 #ind, codec = zip(*sorted(set(zip(keys, values)))) 207 return list(list(zip(*sorted(set(zip(keys, values)))))[1]) 208 209 @staticmethod 210 def tokeys(values, codec=None): 211 ''' return a list of keys from a list of values''' 212 if not codec: 213 codec = Cutil.tocodec(values) 214 dic = {codec[i]: i for i in range(len(codec))} # !!!!long 215 keys = [dic[val] for val in values] # hyper long 216 return keys 217 218 @staticmethod 219 def transpose(idxlist): 220 '''exchange row/column in a list of list''' 221 # if not isinstance(idxlist, list): 222 # raise FieldError('index not transposable') 223 # if not idxlist: 224 # return [] 225 return list(map(list, zip(*idxlist))) 226 # return [list(elmt) for elmt in zip(*idxlist)] 227 #size = min([len(ix) for ix in idxlist]) 228 # return [[ix[ind] for ix in idxlist] for ind in range(size)] 229 230 @staticmethod 231 def tuple(idx): 232 '''transform a list of list in a list of tuple''' 233 return list(map(tuple, idx)) 234 # return [val if not isinstance(val, list) else tuple(val) for val in idx] 235 236 @staticmethod 237 def tupled(lis): 238 '''transform a list of list in a tuple of tuple''' 239 #return tuple(val if not isinstance(val, list) else Sfield._tupled(val) for val in lis) 240 return tuple(map(Cutil.tupled, lis)) if isinstance(lis, list) else lis 241 242 @staticmethod 243 def listed(lis): 244 '''transform a tuple of tuple in a list of list''' 245 #return [val if not isinstance(val, tuple) else Cutil.listed(val) for val in lis] 246 return list(map(Cutil.listed, lis)) if isinstance(lis, tuple) else lis 247 248class Cfield: 249 # %% intro 250 ''' 251 A `Cfield` is a representation of an Field list . 252 253 *Attributes (for dynamic attributes see @property methods)* : 254 255 - **name** : name of the Field 256 - **_codec** : list of values for each key 257 - **_keys** : list of code values 258 259 The methods defined in this class are : 260 261 *constructor (@classmethod)* 262 263 - `Cfield.bol` 264 - `Cfield.from_ntv` 265 - `Cfield.ntv` 266 - `Cfield.like` 267 268 *conversion static methods* 269 270 - `Cfield.ntv_to_val` (@classmethod) 271 - `Cfield.n_to_i` (@staticmethod) 272 273 *dynamic value (getters @property)* 274 275 - `Cfield.hashf` 276 - `Cfield.to_analysis` 277 - `Cfield.values` 278 - `Cfield.codec` 279 - `Cfield.infos` 280 - `Cfield.keys` 281 282 *add - update methods* 283 284 - `Cfield.add` 285 - `Cfield.append` 286 - `Cfield.setcodecvalue` 287 - `Cfield.setcodeclist` 288 - `Cfield.setname` 289 - `Cfield.set_keys` 290 - `Cfield.set_codec` 291 - `Cfield.setkeys` 292 - `Cfield.setlistvalue` 293 - `Cfield.setvalue` 294 295 *transform methods* 296 297 - `Cfield.coupling` 298 - `Cfield.check_relation` (@staticmethod) 299 - `Cfield.extendkeys` 300 - `Cfield.full` 301 - `Cfield.reindex` 302 - `Cfield.reorder` 303 - `Cfield.sort` 304 - `Cfield.tocoupled` 305 - `Cfield.tostdcodec` 306 307 *getters methods* 308 309 - `Cfield.couplinginfos` 310 - `Cfield.derkeys` 311 - `Cfield.getduplicates` 312 - `Cfield.iscrossed` 313 - `Cfield.iscoupled` 314 - `Cfield.isderived` 315 - `Cfield.islinked` 316 - `Cfield.isvalue` 317 - `Cfield.iskeysfromderkeys` 318 - `Cfield.keytoval` 319 - `Cfield.loc` 320 - `Cfield.recordfromkeys` 321 - `Cfield.recordfromvalue` 322 - `Cfield.valtokey` 323 ''' 324 325 def __init__(self, codec=None, name=None, keys=None, default=False, reindex=False): 326 '''Two modes: 327 - a single attributes : Cfield object to copy 328 - multiple attributes : set codec, name and keys attributes''' 329 if not codec and not keys: 330 self._codec = [] 331 self._keys = [] 332 elif isinstance(codec, Cfield): 333 self._keys = codec._keys 334 self._codec = codec._codec 335 self.name = codec.name 336 return 337 elif not default: 338 self._keys = keys if keys else Cutil.identity(len(codec)) 339 self._codec = codec if codec else Cutil.identity(len(keys)) 340 else: 341 self._codec, self._keys = Cutil.default(codec) 342 self.name = name if name else 'field' 343 if reindex: 344 self.reindex() 345 return 346 347 def __repr__(self): 348 '''return classname and number of value''' 349 return self.__class__.__name__ + '[' + str(len(self)) + ']' 350 351 def __str__(self): 352 '''return json string format''' 353 return str({self.name: self.values}) 354 355 def __eq__(self, other): 356 ''' equal if class and values are equal''' 357 return self.__class__ .__name__ == other.__class__.__name__ and \ 358 self.values == other.values 359 360 def __len__(self): 361 ''' len of values''' 362 return len(self._keys) 363 364 def __contains__(self, item): 365 ''' item of values''' 366 return item in self.values 367 368 def __getitem__(self, ind): 369 ''' return value item (value conversion)''' 370 if isinstance(ind, tuple): 371 return [copy(self.values[i]) for i in ind] 372 # return self.values[ind] 373 return copy(self.values[ind]) 374 375 def __setitem__(self, ind, item): 376 ''' modify values item''' 377 if isinstance(ind, slice): 378 start, stop, step = ind.start or 0, ind.stop or len(self), ind.step or 1 379 idxt = list(iter(range(start, stop, step))) 380 if len(idxt) != len(item): 381 raise FieldError("item length not consistent") 382 self.setlistvalue(item, idxt) 383 elif ind < 0 or ind >= len(self): 384 raise FieldError("out of bounds") 385 else: 386 self.setvalue(ind, item) 387 388 def __delitem__(self, ind): 389 '''remove a record (value and key).''' 390 self._keys.pop(ind) 391 self.reindex() 392 393 def __hash__(self): 394 '''return hash(values)''' 395 return hash(tuple(self.values)) 396 397 def _hashe(self): 398 '''return hash(values)''' 399 return hash(tuple(self.values)) 400 401 def __add__(self, other): 402 ''' Add other's values to self's values in a new Field''' 403 newiindex = self.__copy__() 404 newiindex.__iadd__(other) 405 return newiindex 406 407 def __iadd__(self, other): 408 ''' Add other's values to self's values''' 409 return self.add(other, solve=False) 410 411 def __copy__(self): 412 ''' Copy all the data ''' 413 return self.__class__(self) 414 415 # %% property 416 @property 417 def hashf(self): 418 '''return hash(codec infos and keys)''' 419 return hash(tuple((len(self.codec), len(set(self.codec)), len(self), 420 self.name, tuple(self._keys)))) 421 422 @property 423 def to_analysis(self): 424 '''return data for AnaField module''' 425 return {'maxcodec': len(self), 'lencodec': len(self.codec), 'id': self.name, 426 'mincodec': len(set(self.codec)), 'hashf': self.hashf} 427 428 @property 429 def codec(self): 430 '''return codec ''' 431 return self._codec 432 433 @property 434 def infos(self): 435 '''return dict with lencodec, typecodec, ratecodec, mincodec, maxcodec''' 436 return AnaField(self.to_analysis).to_dict(full=True) 437 438 @property 439 def keys(self): 440 '''return keys ''' 441 return self._keys 442 443 @property 444 def values(self): 445 '''return values (see data model)''' 446 return [self._codec[key] for key in self._keys] 447 448 # %% class methods 449 @classmethod 450 def from_ntv(cls, ntv_value=None, extkeys=None, reindex=True, decode_str=False, 451 add_type=True, lengkeys=None): 452 '''Generate an Field Object from a Ntv field object''' 453 if isinstance(ntv_value, cls): 454 return copy(ntv_value) 455 if ntv_value is None: 456 return cls() 457 ntv = Ntv.obj(ntv_value, decode_str=decode_str) 458 #ntv = NtvList(ntv_value) 459 name, typ, codec, parent, keys, coef, leng = NtvUtil.decode_ntv_tab( 460 ntv, cls.ntv_to_val) 461 if parent and not extkeys: 462 return None 463 if coef: 464 keys = Cutil.keysfromcoef(coef, leng//coef, lengkeys) 465 elif extkeys and parent: 466 keys = Cutil.keysfromderkeys(extkeys, keys) 467 elif extkeys and not parent: 468 keys = extkeys 469 keys = list(range(len(codec))) if keys is None else keys 470 name = ntv.json_name(string=True) if add_type else name 471 return cls(codec=codec, name=name, keys=keys, reindex=reindex) 472 473 @classmethod 474 def bol(cls, leng, notdef=None, name=None, default=True): 475 ''' 476 Field constructor (boolean value). 477 478 *Parameters* 479 480 - **leng** : integer - length of the Field 481 - **notdef** : list (default None) - list of records without default value 482 - **default** : boolean (default True) - default value 483 - **name** : string (default None) - name of Field''' 484 values = [default] * leng 485 if notdef: 486 for item in notdef: 487 values[item] = not default 488 return cls.ntv({name: values}) 489 490 @classmethod 491 def like(cls, codec, parent, name=None, reindex=False): 492 '''Generate an Field Object from specific codec and keys from another field. 493 494 *Parameters* 495 496 - **codec** : list of objects 497 - **name** : string (default None) - name of index (see data model) 498 - **parent** : Field, parent of the new Field 499 - **reindex** : boolean (default True) - if True, default codec is apply 500 501 *Returns* : Field ''' 502 if isinstance(codec, Cfield): 503 return copy(codec) 504 return cls(codec=codec, name=name, keys=parent.keys, reindex=reindex) 505 506 @classmethod 507 def ntv(cls, ntv_value=None, extkeys=None, reindex=True, decode_str=False): 508 '''Generate an Field Object from a Ntv field object''' 509 return cls.from_ntv(ntv_value, extkeys=extkeys, reindex=reindex, decode_str=decode_str) 510 511 @classmethod 512 def ntv_to_val(cls, ntv): 513 '''conversion in decode_ntv_val method''' 514 return cls.n_to_i(ntv.val) 515 516 # %% static methods 517 @staticmethod 518 def n_to_i(ntv_lis): 519 ''' converting a NtvList value to an internal value''' 520 if isinstance(ntv_lis, list) and len(ntv_lis) == 0: 521 return [] 522 if isinstance(ntv_lis, list) and ntv_lis[0].__class__.__name__ in ('NtvSingle', 'NtvList'): 523 return [Cfield.n_to_i(ntv.to_obj()) for ntv in ntv_lis] 524 return ntv_lis 525 526 @staticmethod 527 def check_relation(parent, child, typecoupl, value=True): 528 '''get the inconsistent records for a relationship 529 530 *Parameters* 531 532 - **field** : child field involved in the relation 533 - **parent**: parent field involved in the relation 534 - **typecoupl**: str - relationship to check ('derived' or 'coupled') 535 - **value**: boolean (default True) - if True return a dict with inconsistent 536 values of the fields, else a tuple with index of records) 537 538 *Returns* : 539 540 - dict with inconsistent values of the fields 541 - or a tuple with index of records''' 542 match typecoupl: 543 case 'derived': 544 errors = parent.coupling(child, reindex=True) 545 case 'coupled': 546 errors = copy(parent).coupling(child, derived=False, reindex=True) 547 case _: 548 raise FieldError(typecoupl + "is not a valid relationship") 549 if not value: 550 return errors 551 return {'row': list(errors), child.name: child[errors], 552 parent.name: parent[errors]} 553 554 # %% instance methods 555 def add(self, other, solve=True): 556 ''' Add other's values to self's values 557 558 *Parameters* 559 560 - **other** : Field object to add to self object 561 - **solve** : Boolean (default True) - If True, replace None other's codec value 562 with self codec value. 563 564 *Returns* : self ''' 565 if solve: 566 solved = copy(other) 567 for i in range(len(solved.codec)): 568 if solved.codec[i] is None and i in range(len(self.codec)): 569 solved._codec[i] = self.codec[i] 570 values = self.values + solved.values 571 else: 572 values = self.values + other.values 573 codec = Cutil.tocodec(values) 574 if set(codec) != set(self._codec): 575 self._codec = codec 576 self._keys = Cutil.tokeys(values, self._codec) 577 return self 578 579 def append(self, value, unique=True): 580 '''add a new value 581 582 *Parameters* 583 584 - **value** : new object value 585 - **unique** : boolean (default True) - If False, duplication codec if value is present 586 587 *Returns* : key of value ''' 588 #value = Ntv.obj(value) 589 #value = self.s_to_i(value) 590 if value in self._codec and unique: 591 key = self._codec.index(value) 592 else: 593 key = len(self._codec) 594 self._codec.append(value) 595 self._keys.append(key) 596 return key 597 598 def coupling(self, idx, derived=True, duplicate=True, reindex=False): 599 ''' 600 Transform indexes in coupled or derived indexes (codec extension). 601 If derived option is True, self._codec is extended and idx codec not, 602 else, both are coupled and both codec are extended. 603 604 *Parameters* 605 606 - **idx** : single Field or list of Field to be coupled or derived. 607 - **derived** : boolean (default : True) - if True result is derived, 608 if False coupled 609 - **duplicate** : boolean (default: True) - if True, return duplicate records 610 (only for self index) 611 - **reindex** : boolean (default : False). If True self.index is reindexed 612 with default codec. But if not derived, idx indexes MUST to be reindexed. 613 614 *Returns* : tuple with duplicate records (errors) if 'duplicate', None else''' 615 duplic = tuple() 616 if not isinstance(idx, list): 617 index = [idx] 618 else: 619 index = idx 620 idxzip = self.__class__(list(zip(*([self.keys] + [ix.keys for ix in index]))), 621 reindex=True) 622 self.tocoupled(idxzip) 623 if not derived: 624 for ind in index: 625 ind.tocoupled(idxzip) 626 duplic += ind.getduplicates(reindex) 627 if duplicate and not duplic: 628 return self.getduplicates(reindex) 629 if duplicate and duplic: 630 return tuple(sorted(list(set(duplic + self.getduplicates(reindex))))) 631 if reindex: 632 self.reindex() 633 return None 634 635 def couplinginfos(self, other): 636 '''return a dict with the coupling info between other (distance, ratecpl, 637 rateder, dist, disttomin, disttomax, distmin, distmax, diff, typecoupl) 638 639 *Parameters* 640 641 - **other** : other index to compare 642 643 *Returns* : dict''' 644 if min(len(self), len(other)) == 0: 645 null = Cfield() 646 return AnaRelation([AnaField(null.to_analysis), AnaField(null.to_analysis)], 647 Cutil.dist(null.keys, null.keys, True) 648 ).to_dict(distances=True, misc=True) 649 return AnaRelation([AnaField(self.to_analysis), AnaField(other.to_analysis)], 650 Cutil.dist(self.keys, other.keys, True) 651 ).to_dict(distances=True, misc=True) 652 653 def derkeys(self, parent): 654 '''return keys derived from parent keys 655 656 *Parameters* 657 658 - **parent** : Field - parent 659 660 *Returns* : list of keys''' 661 derkey = [-1] * len(parent.codec) 662 for i in range(len(self)): 663 derkey[parent.keys[i]] = self.keys[i] 664 if min(derkey) < 0: 665 raise FieldError("parent is not a derive Field") 666 return derkey 667 668 def extendkeys(self, keys): 669 '''add keys to the Field 670 671 *Parameters* 672 673 - **keys** : list of int (value lower or equal than actual keys) 674 675 *Returns* : None ''' 676 if min(keys) < 0 or max(keys) > len(self._codec) - 1: 677 raise FieldError('keys not consistent with codec') 678 self._keys += keys 679 680 @staticmethod 681 def full(listidx): 682 '''tranform a list of indexes in crossed indexes (value extension). 683 684 *Parameters* 685 686 - **listidx** : list of Field to transform 687 688 *Returns* : tuple of records added ''' 689 idx1 = listidx[0] 690 for idx in listidx: 691 if len(idx) != len(idx): 692 return None 693 leninit = len(idx1) 694 keysadd = Cutil.idxfull(listidx) 695 for idx, keys in zip(listidx, keysadd): 696 idx._keys += keys 697 return tuple(range(leninit, len(idx1))) 698 699 def getduplicates(self, reindex=False): 700 ''' calculate items with duplicate codec 701 702 *Parameters* 703 704 - **reindex** : boolean (default : False). If True index is reindexed with default codec 705 706 *Returns* : tuple of items with duplicate codec''' 707 count = Counter(self._codec) 708 defcodec = list(count - Counter(list(count))) 709 dkeys = defaultdict(list) 710 for key, ind in zip(self._keys, range(len(self))): 711 dkeys[key].append(ind) 712 dcodec = defaultdict(list) 713 for key, ind in zip(self._codec, range(len(self._codec))): 714 dcodec[key].append(ind) 715 duplicates = [] 716 for item in defcodec: 717 for codecitem in dcodec[item]: 718 duplicates += dkeys[codecitem] 719 if reindex: 720 self.reindex() 721 return tuple(duplicates) 722 723 def iscrossed(self, other): 724 '''return True if self is crossed to other''' 725 return self.couplinginfos(other)['rateder'] == 1.0 726 727 def iscoupled(self, other): 728 '''return True if self is coupled to other''' 729 info = self.couplinginfos(other) 730 return info['diff'] == 0 and info['rateder'] == 0.0 731 732 def isderived(self, other, only=False): 733 '''return True if self is derived from other''' 734 info = self.couplinginfos(other) 735 return not (info['diff'] == 0 and only) and info['rateder'] == 0.0 736 737 def iskeysfromderkeys(self, other): 738 '''return True if self._keys is relative from other._keys''' 739 leng = len(other.codec) 740 if leng % len(self._codec) != 0: 741 return False 742 keys = [(i*len(self._codec))//leng for i in range(leng)] 743 return Cutil.keysfromderkeys(other.keys, keys) == self.keys 744 745 def islinked(self, other): 746 '''return True if self is linked to other''' 747 rate = self.couplinginfos(other)['rateder'] 748 return 0.0 < rate < 1.0 749 750 def isvalue(self, value): 751 ''' return True if value is in index values 752 753 *Parameters* 754 755 - **value** : value to check''' 756 return value in self.values 757 758 def keytoval(self, key): 759 ''' return the value of a key 760 761 *Parameters* 762 763 - **key** : key to convert into values 764 - **extern** : if True, return string representation else, internal value 765 766 *Returns* 767 768 - **int** : first key finded (None else)''' 769 if key < 0 or key >= len(self._codec): 770 return None 771 return self._codec[key] 772 773 def loc(self, value): 774 '''return a list of record number with value 775 776 *Parameters* 777 778 - **value** : value to check 779 780 *Returns* 781 782 - **list of int** : list of record number finded (None else)''' 783 return self.recordfromvalue(value) 784 785 def recordfromvalue(self, value): 786 '''return a list of record number with value 787 788 *Parameters* 789 790 - **value** : value to check 791 - **extern** : if True, compare value to external representation of self.value, 792 else, internal 793 794 *Returns* 795 796 - **list of int** : list of record number finded (None else)''' 797 798 if not value in self._codec: 799 return None 800 listkeys = [cod for cod, val in zip( 801 range(len(self._codec)), self._codec) if val == value] 802 return self.recordfromkeys(listkeys) 803 804 def recordfromkeys(self, listkeys): 805 '''return a list of record number with key in listkeys 806 807 *Parameters* 808 809 - **listkeys** : list of keys to check 810 811 *Returns* 812 813 - **list of int** : list of record number finded (None else)''' 814 815 return [rec for rec, key in zip(range(len(self)), self._keys) if key in listkeys] 816 817 def reindex(self, codec=None): 818 '''apply a reordered codec. If None, a new default codec is apply. 819 820 *Parameters* 821 822 - **codec** : list (default None) - reordered codec to apply. 823 824 *Returns* : self''' 825 826 if not codec: 827 codec = Cutil.tocodec(self.values) 828 self._keys = Cutil.reindex(self._keys, self._codec, codec) 829 self._codec = codec 830 return self 831 832 def reorder(self, sort=None, inplace=True): 833 '''Change the Field order with a new order define by sort and reset the codec. 834 835 *Parameters* 836 837 - **sort** : int list (default None)- new record order to apply. If None, no change. 838 - **inplace** : boolean (default True) - if True, new order is apply to self, 839 if False a new Field is created. 840 841 *Returns* 842 843 - **Field** : self if inplace, new Field if not inplace''' 844 values = Cutil.reorder(self.values, sort) 845 codec, keys = Cutil.resetidx(values) 846 if inplace: 847 self._keys = keys 848 self._codec = codec 849 return None 850 return self.__class__(name=self.name, codec=codec, keys=keys) 851 852 def setcodecvalue(self, oldvalue, newvalue): 853 '''update all the oldvalue by newvalue 854 855 *Parameters* 856 857 - **oldvalue** : list of values to replace 858 - **newvalue** : list of new value to apply 859 860 *Returns* : int - last codec rank updated (-1 if None)''' 861 862 rank = -1 863 for i in range(len(self._codec)): 864 if self._codec[i] == oldvalue: 865 self._codec[i] = newvalue 866 rank = i 867 return rank 868 869 def setcodeclist(self, listcodec): 870 '''update codec with listcodec values 871 872 *Parameters* 873 874 - **listcodec** : list of new codec values to apply 875 876 *Returns* : int - last codec rank updated (-1 if None)''' 877 self._codec = listcodec 878 879 def set_keys(self, keys): 880 ''' _keys setters ''' 881 self._keys = keys 882 883 def set_codec(self, codec): 884 ''' _codec setters ''' 885 self._codec = codec 886 887 def setkeys(self, keys, inplace=True): 888 '''apply new keys (replace codec with extended codec from parent keys) 889 890 *Parameters* 891 892 - **keys** : list of keys to apply 893 - **inplace** : if True, update self data, else create a new Field 894 895 *Returns* : self or new Field''' 896 codec = Cutil.tocodec(self.values, keys) 897 if inplace: 898 self._codec = codec 899 self._keys = keys 900 return self 901 return self.__class__(codec=codec, name=self.name, keys=keys) 902 903 def setname(self, name): 904 '''update the Field name 905 906 *Parameters* 907 908 - **name** : str to set into name 909 910 *Returns* : boolean - True if update''' 911 if isinstance(name, str): 912 self.name = name 913 return True 914 return False 915 916 def setvalue(self, ind, value): 917 '''update a value at the rank ind (and update codec and keys) 918 919 *Parameters* 920 921 - **ind** : rank of the value 922 - **value** : new value 923 924 *Returns* : None''' 925 values = self.values 926 values[ind] = value 927 self._codec, self._keys = Cutil.resetidx(values) 928 929 def setlistvalue(self, listvalue, listind=None): 930 '''update the values (and update codec and keys) 931 932 *Parameters* 933 934 - **listvalue** : list - list of new values 935 - **listind** : list(default None) - list of index 936 937 *Returns* : None''' 938 values = self.values 939 listind = listind if listind else range(len(self)) 940 for i, value_i in zip(listind, listvalue): 941 values[i] = value_i 942 self._codec, self._keys = Cutil.resetidx(values) 943 944 def sort(self, reverse=False, inplace=True, func=str): 945 '''Define sorted index with ordered codec. 946 947 *Parameters* 948 949 - **reverse** : boolean (defaut False) - codec is sorted with reverse order 950 - **inplace** : boolean (default True) - if True, new order is apply to self, 951 if False a new Field is created. 952 - **func** : function (default str) - key used in the sorted function 953 954 *Return* 955 956 - **Field** : self if inplace, new Field if not inplace''' 957 if inplace: 958 self.reindex(codec=sorted(self._codec, reverse=reverse, key=func)) 959 self._keys.sort() 960 return self 961 oldcodec = self._codec 962 codec = sorted(oldcodec, reverse=reverse, key=str) 963 return self.__class__(name=self.name, codec=codec, 964 keys=sorted(Cutil.reindex(self._keys, oldcodec, codec))) 965 966 def tocoupled(self, other, coupling=True): 967 ''' 968 Transform a derived index in a coupled index (keys extension) and add 969 new values to have the same length as other. 970 971 *Parameters* 972 973 - **other** : index to be coupled. 974 - **coupling** : boolean (default True) - reindex if False 975 976 *Returns* : None''' 977 dic = Cutil.idxlink(other.keys, self._keys) 978 if not dic: 979 raise FieldError("Field is not coupled or derived from other") 980 self._codec = [self._codec[dic[i]] for i in range(len(dic))] 981 self._keys = other.keys 982 if not coupling: 983 self.reindex() 984 985 def tostdcodec(self, inplace=False, full=True): 986 ''' 987 Transform codec in full or in default codec. 988 989 *Parameters* 990 991 - **inplace** : boolean (default True) - if True, new order is apply to self, 992 - **full** : boolean (default True) - if True reindex with full codec 993 994 *Return* 995 996 - **Field** : self if inplace, new Field if not inplace''' 997 if full: 998 codec = self.values 999 keys = list(range(len(codec))) 1000 else: 1001 codec = Cutil.tocodec(self.values) 1002 keys = Cutil.reindex(self._keys, self._codec, codec) 1003 if inplace: 1004 self._codec = codec 1005 self._keys = keys 1006 return self 1007 return self.__class__(codec=codec, name=self.name, keys=keys) 1008 1009 def valtokey(self, value): 1010 '''convert a value to a key 1011 1012 *Parameters* 1013 1014 - **value** : value to convert 1015 1016 *Returns* 1017 1018 - **int** : first key finded (None else)''' 1019 if value in self._codec: 1020 return self._codec.index(value) 1021 return None 1022 1023 1024class FieldError(Exception): 1025 ''' Field Exception''' 1026 # pass
23@staticmethod 24def root(leng): 25 '''return the root Field''' 26 return Cfield(Cutil.identity(leng), 'root')
return the root Field
29def identity(*args, **kwargs): 30 '''return the same value as args or kwargs''' 31 if len(args) > 0: 32 return args[0] 33 if len(kwargs) > 0: 34 return kwargs[list(kwargs.keys())[0]] 35 return None
return the same value as args or kwargs
38class Cutil: 39 ''' common functions for Field and Dataset class''' 40 41 @staticmethod 42 def identity(leng): 43 '''return the root_field values''' 44 return list(range(leng)) 45 46 @staticmethod 47 def canonorder(lenidx): 48 '''return a list of crossed keys from a list of number of values''' 49 listrange = [range(lidx) for lidx in lenidx] 50 return Cutil.transpose(Cutil.list(list(product(*listrange)))) 51 52 @staticmethod 53 def default(values): 54 '''return default codec and keys from a list of values''' 55 codec = list(dict.fromkeys(values)) 56 dic = {codec[i]: i for i in range(len(codec))} 57 keys = [dic[val] for val in values] 58 return (codec, keys) 59 60 @staticmethod 61 def dist(key1, key2, distr=False): 62 '''return default coupling codec between two keys list and optionaly if 63 the relationship is distributed''' 64 if not key1 or not key2: 65 return 0 66 k1k2 = [tuple((v1, v2)) for v1, v2 in zip(key1, key2)] 67 dist = len(Cutil.tocodec(k1k2)) 68 if not distr: 69 return dist 70 distrib = False 71 if dist == (max(key1) + 1) * (max(key2) + 1): 72 distrib = max(Counter(k1k2).values()) == len(key1) // dist 73 # distrib = min(sum(map(lambda x: (x + i) % (max(a) + 1), a)) == sum(a) for i in range(1, max(a)+1)) 74 return [dist, distrib] 75 76 @staticmethod 77 def encode_coef(lis): 78 '''Generate a repetition coefficient for periodic list''' 79 if len(lis) < 2: 80 return 0 81 coef = 1 82 while coef != len(lis): 83 if lis[coef-1] != lis[coef]: 84 break 85 coef += 1 86 if (not len(lis) % (coef * (max(lis) + 1)) and 87 lis == Cutil.keysfromcoef(coef, max(lis) + 1, len(lis))): 88 return coef 89 return 0 90 91 @staticmethod 92 def funclist(value, func, *args, **kwargs): 93 '''return the function func applied to the object value with parameters args and kwargs''' 94 if func in (None, []): 95 return value 96 lis = [] 97 if not (isinstance(value, list) or value.__class__.__name__ in ['Cfield', 'Cdataset']): 98 listval = [value] 99 else: 100 listval = value 101 for val in listval: 102 try: 103 lis.append(val.func(*args, **kwargs)) 104 except: 105 try: 106 lis.append(func(val, *args, **kwargs)) 107 except: 108 try: 109 lis.append(listval.func(val, *args, **kwargs)) 110 except: 111 try: 112 lis.append(func(listval, val, *args, **kwargs)) 113 except: 114 raise FieldError("unable to apply func") 115 if len(lis) == 1: 116 return lis[0] 117 return lis 118 119 @staticmethod 120 def idxfull(setidx): 121 '''return additional keys for each index in the setidx list to have crossed setidx''' 122 setcodec = [set(idx.keys) for idx in setidx] 123 lenfull = Cutil.mul([len(codec) for codec in setcodec]) 124 if lenfull <= len(setidx[0]): 125 return [] 126 complet = Counter(list(product(*setcodec))) 127 complet.subtract( 128 Counter(Cutil.tuple(Cutil.transpose([idx.keys for idx in setidx])))) 129 keysadd = Cutil.transpose(Cutil.list(list(complet.elements()))) 130 if not keysadd: 131 return [] 132 return keysadd 133 134 @staticmethod 135 def idxlink(ref, lis): 136 ''' return a dict for each different tuple (ref value, lis value)''' 137 return dict(set(zip(ref, lis))) 138 #lis = set(util.tuple(util.transpose([ref, l2]))) 139 # if not len(lis) == len(set(ref)): 140 # return {} 141 # return dict(lis) 142 143 @staticmethod 144 def is_not_equal(value, tovalue=None, **kwargs): 145 ''' return True if value and tovalue are not equal''' 146 return value.__class__.__name__ != tovalue.__class__.__name__ or \ 147 value != tovalue 148 149 @staticmethod 150 def keysfromcoef(coef, period, leng=None): 151 ''' return a list of keys with periodic structure''' 152 if not leng: 153 leng = coef * period 154 return None if not (coef and period) else [(ind % (coef * period)) // coef 155 for ind in range(leng)] 156 157 @staticmethod 158 def keysfromderkeys(parentkeys, derkeys): 159 '''return keys from parent keys and derkeys 160 161 *Parameters* 162 163 - **parentkeys** : list of keys from parent 164 - **derkeys** : list of derived keys 165 166 *Returns* : list of keys''' 167 return [derkeys[pkey] for pkey in parentkeys] 168 169 @staticmethod 170 def list(tuplelists): 171 '''transform a list of tuples in a list of lists''' 172 return list(map(list, tuplelists)) 173 174 @staticmethod 175 def mul(values): 176 '''return the product of values in a list or tuple (math.prod)''' 177 mul = 1 178 for val in values: 179 mul *= val 180 return mul 181 182 @staticmethod 183 def reindex(oldkeys, oldcodec, newcodec): 184 '''new keys with new order of codec''' 185 dic = {newcodec[i]: i for i in range(len(newcodec))} 186 return [dic[oldcodec[key]] for key in oldkeys] 187 188 @staticmethod 189 def reorder(values, sort=None): 190 '''return a new values list following the order define by sort''' 191 if not sort: 192 return values 193 return [values[ind] for ind in sort] 194 195 @staticmethod 196 def resetidx(values): 197 '''return codec and keys from a list of values''' 198 codec = Cutil.tocodec(values) 199 return (codec, Cutil.tokeys(values, codec)) 200 201 @staticmethod 202 def tocodec(values, keys=None): 203 '''extract a list of unique values''' 204 if not keys: 205 # return list(set(values)) 206 return list(dict.fromkeys(values)) 207 #ind, codec = zip(*sorted(set(zip(keys, values)))) 208 return list(list(zip(*sorted(set(zip(keys, values)))))[1]) 209 210 @staticmethod 211 def tokeys(values, codec=None): 212 ''' return a list of keys from a list of values''' 213 if not codec: 214 codec = Cutil.tocodec(values) 215 dic = {codec[i]: i for i in range(len(codec))} # !!!!long 216 keys = [dic[val] for val in values] # hyper long 217 return keys 218 219 @staticmethod 220 def transpose(idxlist): 221 '''exchange row/column in a list of list''' 222 # if not isinstance(idxlist, list): 223 # raise FieldError('index not transposable') 224 # if not idxlist: 225 # return [] 226 return list(map(list, zip(*idxlist))) 227 # return [list(elmt) for elmt in zip(*idxlist)] 228 #size = min([len(ix) for ix in idxlist]) 229 # return [[ix[ind] for ix in idxlist] for ind in range(size)] 230 231 @staticmethod 232 def tuple(idx): 233 '''transform a list of list in a list of tuple''' 234 return list(map(tuple, idx)) 235 # return [val if not isinstance(val, list) else tuple(val) for val in idx] 236 237 @staticmethod 238 def tupled(lis): 239 '''transform a list of list in a tuple of tuple''' 240 #return tuple(val if not isinstance(val, list) else Sfield._tupled(val) for val in lis) 241 return tuple(map(Cutil.tupled, lis)) if isinstance(lis, list) else lis 242 243 @staticmethod 244 def listed(lis): 245 '''transform a tuple of tuple in a list of list''' 246 #return [val if not isinstance(val, tuple) else Cutil.listed(val) for val in lis] 247 return list(map(Cutil.listed, lis)) if isinstance(lis, tuple) else lis
common functions for Field and Dataset class
41 @staticmethod 42 def identity(leng): 43 '''return the root_field values''' 44 return list(range(leng))
return the root_field values
46 @staticmethod 47 def canonorder(lenidx): 48 '''return a list of crossed keys from a list of number of values''' 49 listrange = [range(lidx) for lidx in lenidx] 50 return Cutil.transpose(Cutil.list(list(product(*listrange))))
return a list of crossed keys from a list of number of values
52 @staticmethod 53 def default(values): 54 '''return default codec and keys from a list of values''' 55 codec = list(dict.fromkeys(values)) 56 dic = {codec[i]: i for i in range(len(codec))} 57 keys = [dic[val] for val in values] 58 return (codec, keys)
return default codec and keys from a list of values
60 @staticmethod 61 def dist(key1, key2, distr=False): 62 '''return default coupling codec between two keys list and optionaly if 63 the relationship is distributed''' 64 if not key1 or not key2: 65 return 0 66 k1k2 = [tuple((v1, v2)) for v1, v2 in zip(key1, key2)] 67 dist = len(Cutil.tocodec(k1k2)) 68 if not distr: 69 return dist 70 distrib = False 71 if dist == (max(key1) + 1) * (max(key2) + 1): 72 distrib = max(Counter(k1k2).values()) == len(key1) // dist 73 # distrib = min(sum(map(lambda x: (x + i) % (max(a) + 1), a)) == sum(a) for i in range(1, max(a)+1)) 74 return [dist, distrib]
return default coupling codec between two keys list and optionaly if the relationship is distributed
76 @staticmethod 77 def encode_coef(lis): 78 '''Generate a repetition coefficient for periodic list''' 79 if len(lis) < 2: 80 return 0 81 coef = 1 82 while coef != len(lis): 83 if lis[coef-1] != lis[coef]: 84 break 85 coef += 1 86 if (not len(lis) % (coef * (max(lis) + 1)) and 87 lis == Cutil.keysfromcoef(coef, max(lis) + 1, len(lis))): 88 return coef 89 return 0
Generate a repetition coefficient for periodic list
91 @staticmethod 92 def funclist(value, func, *args, **kwargs): 93 '''return the function func applied to the object value with parameters args and kwargs''' 94 if func in (None, []): 95 return value 96 lis = [] 97 if not (isinstance(value, list) or value.__class__.__name__ in ['Cfield', 'Cdataset']): 98 listval = [value] 99 else: 100 listval = value 101 for val in listval: 102 try: 103 lis.append(val.func(*args, **kwargs)) 104 except: 105 try: 106 lis.append(func(val, *args, **kwargs)) 107 except: 108 try: 109 lis.append(listval.func(val, *args, **kwargs)) 110 except: 111 try: 112 lis.append(func(listval, val, *args, **kwargs)) 113 except: 114 raise FieldError("unable to apply func") 115 if len(lis) == 1: 116 return lis[0] 117 return lis
return the function func applied to the object value with parameters args and kwargs
119 @staticmethod 120 def idxfull(setidx): 121 '''return additional keys for each index in the setidx list to have crossed setidx''' 122 setcodec = [set(idx.keys) for idx in setidx] 123 lenfull = Cutil.mul([len(codec) for codec in setcodec]) 124 if lenfull <= len(setidx[0]): 125 return [] 126 complet = Counter(list(product(*setcodec))) 127 complet.subtract( 128 Counter(Cutil.tuple(Cutil.transpose([idx.keys for idx in setidx])))) 129 keysadd = Cutil.transpose(Cutil.list(list(complet.elements()))) 130 if not keysadd: 131 return [] 132 return keysadd
return additional keys for each index in the setidx list to have crossed setidx
134 @staticmethod 135 def idxlink(ref, lis): 136 ''' return a dict for each different tuple (ref value, lis value)''' 137 return dict(set(zip(ref, lis))) 138 #lis = set(util.tuple(util.transpose([ref, l2]))) 139 # if not len(lis) == len(set(ref)): 140 # return {} 141 # return dict(lis)
return a dict for each different tuple (ref value, lis value)
143 @staticmethod 144 def is_not_equal(value, tovalue=None, **kwargs): 145 ''' return True if value and tovalue are not equal''' 146 return value.__class__.__name__ != tovalue.__class__.__name__ or \ 147 value != tovalue
return True if value and tovalue are not equal
149 @staticmethod 150 def keysfromcoef(coef, period, leng=None): 151 ''' return a list of keys with periodic structure''' 152 if not leng: 153 leng = coef * period 154 return None if not (coef and period) else [(ind % (coef * period)) // coef 155 for ind in range(leng)]
return a list of keys with periodic structure
157 @staticmethod 158 def keysfromderkeys(parentkeys, derkeys): 159 '''return keys from parent keys and derkeys 160 161 *Parameters* 162 163 - **parentkeys** : list of keys from parent 164 - **derkeys** : list of derived keys 165 166 *Returns* : list of keys''' 167 return [derkeys[pkey] for pkey in parentkeys]
return keys from parent keys and derkeys
Parameters
- parentkeys : list of keys from parent
- derkeys : list of derived keys
Returns : list of keys
169 @staticmethod 170 def list(tuplelists): 171 '''transform a list of tuples in a list of lists''' 172 return list(map(list, tuplelists))
transform a list of tuples in a list of lists
174 @staticmethod 175 def mul(values): 176 '''return the product of values in a list or tuple (math.prod)''' 177 mul = 1 178 for val in values: 179 mul *= val 180 return mul
return the product of values in a list or tuple (math.prod)
182 @staticmethod 183 def reindex(oldkeys, oldcodec, newcodec): 184 '''new keys with new order of codec''' 185 dic = {newcodec[i]: i for i in range(len(newcodec))} 186 return [dic[oldcodec[key]] for key in oldkeys]
new keys with new order of codec
188 @staticmethod 189 def reorder(values, sort=None): 190 '''return a new values list following the order define by sort''' 191 if not sort: 192 return values 193 return [values[ind] for ind in sort]
return a new values list following the order define by sort
195 @staticmethod 196 def resetidx(values): 197 '''return codec and keys from a list of values''' 198 codec = Cutil.tocodec(values) 199 return (codec, Cutil.tokeys(values, codec))
return codec and keys from a list of values
201 @staticmethod 202 def tocodec(values, keys=None): 203 '''extract a list of unique values''' 204 if not keys: 205 # return list(set(values)) 206 return list(dict.fromkeys(values)) 207 #ind, codec = zip(*sorted(set(zip(keys, values)))) 208 return list(list(zip(*sorted(set(zip(keys, values)))))[1])
extract a list of unique values
210 @staticmethod 211 def tokeys(values, codec=None): 212 ''' return a list of keys from a list of values''' 213 if not codec: 214 codec = Cutil.tocodec(values) 215 dic = {codec[i]: i for i in range(len(codec))} # !!!!long 216 keys = [dic[val] for val in values] # hyper long 217 return keys
return a list of keys from a list of values
219 @staticmethod 220 def transpose(idxlist): 221 '''exchange row/column in a list of list''' 222 # if not isinstance(idxlist, list): 223 # raise FieldError('index not transposable') 224 # if not idxlist: 225 # return [] 226 return list(map(list, zip(*idxlist))) 227 # return [list(elmt) for elmt in zip(*idxlist)] 228 #size = min([len(ix) for ix in idxlist]) 229 # return [[ix[ind] for ix in idxlist] for ind in range(size)]
exchange row/column in a list of list
231 @staticmethod 232 def tuple(idx): 233 '''transform a list of list in a list of tuple''' 234 return list(map(tuple, idx)) 235 # return [val if not isinstance(val, list) else tuple(val) for val in idx]
transform a list of list in a list of tuple
237 @staticmethod 238 def tupled(lis): 239 '''transform a list of list in a tuple of tuple''' 240 #return tuple(val if not isinstance(val, list) else Sfield._tupled(val) for val in lis) 241 return tuple(map(Cutil.tupled, lis)) if isinstance(lis, list) else lis
transform a list of list in a tuple of tuple
243 @staticmethod 244 def listed(lis): 245 '''transform a tuple of tuple in a list of list''' 246 #return [val if not isinstance(val, tuple) else Cutil.listed(val) for val in lis] 247 return list(map(Cutil.listed, lis)) if isinstance(lis, tuple) else lis
transform a tuple of tuple in a list of list
249class Cfield: 250 # %% intro 251 ''' 252 A `Cfield` is a representation of an Field list . 253 254 *Attributes (for dynamic attributes see @property methods)* : 255 256 - **name** : name of the Field 257 - **_codec** : list of values for each key 258 - **_keys** : list of code values 259 260 The methods defined in this class are : 261 262 *constructor (@classmethod)* 263 264 - `Cfield.bol` 265 - `Cfield.from_ntv` 266 - `Cfield.ntv` 267 - `Cfield.like` 268 269 *conversion static methods* 270 271 - `Cfield.ntv_to_val` (@classmethod) 272 - `Cfield.n_to_i` (@staticmethod) 273 274 *dynamic value (getters @property)* 275 276 - `Cfield.hashf` 277 - `Cfield.to_analysis` 278 - `Cfield.values` 279 - `Cfield.codec` 280 - `Cfield.infos` 281 - `Cfield.keys` 282 283 *add - update methods* 284 285 - `Cfield.add` 286 - `Cfield.append` 287 - `Cfield.setcodecvalue` 288 - `Cfield.setcodeclist` 289 - `Cfield.setname` 290 - `Cfield.set_keys` 291 - `Cfield.set_codec` 292 - `Cfield.setkeys` 293 - `Cfield.setlistvalue` 294 - `Cfield.setvalue` 295 296 *transform methods* 297 298 - `Cfield.coupling` 299 - `Cfield.check_relation` (@staticmethod) 300 - `Cfield.extendkeys` 301 - `Cfield.full` 302 - `Cfield.reindex` 303 - `Cfield.reorder` 304 - `Cfield.sort` 305 - `Cfield.tocoupled` 306 - `Cfield.tostdcodec` 307 308 *getters methods* 309 310 - `Cfield.couplinginfos` 311 - `Cfield.derkeys` 312 - `Cfield.getduplicates` 313 - `Cfield.iscrossed` 314 - `Cfield.iscoupled` 315 - `Cfield.isderived` 316 - `Cfield.islinked` 317 - `Cfield.isvalue` 318 - `Cfield.iskeysfromderkeys` 319 - `Cfield.keytoval` 320 - `Cfield.loc` 321 - `Cfield.recordfromkeys` 322 - `Cfield.recordfromvalue` 323 - `Cfield.valtokey` 324 ''' 325 326 def __init__(self, codec=None, name=None, keys=None, default=False, reindex=False): 327 '''Two modes: 328 - a single attributes : Cfield object to copy 329 - multiple attributes : set codec, name and keys attributes''' 330 if not codec and not keys: 331 self._codec = [] 332 self._keys = [] 333 elif isinstance(codec, Cfield): 334 self._keys = codec._keys 335 self._codec = codec._codec 336 self.name = codec.name 337 return 338 elif not default: 339 self._keys = keys if keys else Cutil.identity(len(codec)) 340 self._codec = codec if codec else Cutil.identity(len(keys)) 341 else: 342 self._codec, self._keys = Cutil.default(codec) 343 self.name = name if name else 'field' 344 if reindex: 345 self.reindex() 346 return 347 348 def __repr__(self): 349 '''return classname and number of value''' 350 return self.__class__.__name__ + '[' + str(len(self)) + ']' 351 352 def __str__(self): 353 '''return json string format''' 354 return str({self.name: self.values}) 355 356 def __eq__(self, other): 357 ''' equal if class and values are equal''' 358 return self.__class__ .__name__ == other.__class__.__name__ and \ 359 self.values == other.values 360 361 def __len__(self): 362 ''' len of values''' 363 return len(self._keys) 364 365 def __contains__(self, item): 366 ''' item of values''' 367 return item in self.values 368 369 def __getitem__(self, ind): 370 ''' return value item (value conversion)''' 371 if isinstance(ind, tuple): 372 return [copy(self.values[i]) for i in ind] 373 # return self.values[ind] 374 return copy(self.values[ind]) 375 376 def __setitem__(self, ind, item): 377 ''' modify values item''' 378 if isinstance(ind, slice): 379 start, stop, step = ind.start or 0, ind.stop or len(self), ind.step or 1 380 idxt = list(iter(range(start, stop, step))) 381 if len(idxt) != len(item): 382 raise FieldError("item length not consistent") 383 self.setlistvalue(item, idxt) 384 elif ind < 0 or ind >= len(self): 385 raise FieldError("out of bounds") 386 else: 387 self.setvalue(ind, item) 388 389 def __delitem__(self, ind): 390 '''remove a record (value and key).''' 391 self._keys.pop(ind) 392 self.reindex() 393 394 def __hash__(self): 395 '''return hash(values)''' 396 return hash(tuple(self.values)) 397 398 def _hashe(self): 399 '''return hash(values)''' 400 return hash(tuple(self.values)) 401 402 def __add__(self, other): 403 ''' Add other's values to self's values in a new Field''' 404 newiindex = self.__copy__() 405 newiindex.__iadd__(other) 406 return newiindex 407 408 def __iadd__(self, other): 409 ''' Add other's values to self's values''' 410 return self.add(other, solve=False) 411 412 def __copy__(self): 413 ''' Copy all the data ''' 414 return self.__class__(self) 415 416 # %% property 417 @property 418 def hashf(self): 419 '''return hash(codec infos and keys)''' 420 return hash(tuple((len(self.codec), len(set(self.codec)), len(self), 421 self.name, tuple(self._keys)))) 422 423 @property 424 def to_analysis(self): 425 '''return data for AnaField module''' 426 return {'maxcodec': len(self), 'lencodec': len(self.codec), 'id': self.name, 427 'mincodec': len(set(self.codec)), 'hashf': self.hashf} 428 429 @property 430 def codec(self): 431 '''return codec ''' 432 return self._codec 433 434 @property 435 def infos(self): 436 '''return dict with lencodec, typecodec, ratecodec, mincodec, maxcodec''' 437 return AnaField(self.to_analysis).to_dict(full=True) 438 439 @property 440 def keys(self): 441 '''return keys ''' 442 return self._keys 443 444 @property 445 def values(self): 446 '''return values (see data model)''' 447 return [self._codec[key] for key in self._keys] 448 449 # %% class methods 450 @classmethod 451 def from_ntv(cls, ntv_value=None, extkeys=None, reindex=True, decode_str=False, 452 add_type=True, lengkeys=None): 453 '''Generate an Field Object from a Ntv field object''' 454 if isinstance(ntv_value, cls): 455 return copy(ntv_value) 456 if ntv_value is None: 457 return cls() 458 ntv = Ntv.obj(ntv_value, decode_str=decode_str) 459 #ntv = NtvList(ntv_value) 460 name, typ, codec, parent, keys, coef, leng = NtvUtil.decode_ntv_tab( 461 ntv, cls.ntv_to_val) 462 if parent and not extkeys: 463 return None 464 if coef: 465 keys = Cutil.keysfromcoef(coef, leng//coef, lengkeys) 466 elif extkeys and parent: 467 keys = Cutil.keysfromderkeys(extkeys, keys) 468 elif extkeys and not parent: 469 keys = extkeys 470 keys = list(range(len(codec))) if keys is None else keys 471 name = ntv.json_name(string=True) if add_type else name 472 return cls(codec=codec, name=name, keys=keys, reindex=reindex) 473 474 @classmethod 475 def bol(cls, leng, notdef=None, name=None, default=True): 476 ''' 477 Field constructor (boolean value). 478 479 *Parameters* 480 481 - **leng** : integer - length of the Field 482 - **notdef** : list (default None) - list of records without default value 483 - **default** : boolean (default True) - default value 484 - **name** : string (default None) - name of Field''' 485 values = [default] * leng 486 if notdef: 487 for item in notdef: 488 values[item] = not default 489 return cls.ntv({name: values}) 490 491 @classmethod 492 def like(cls, codec, parent, name=None, reindex=False): 493 '''Generate an Field Object from specific codec and keys from another field. 494 495 *Parameters* 496 497 - **codec** : list of objects 498 - **name** : string (default None) - name of index (see data model) 499 - **parent** : Field, parent of the new Field 500 - **reindex** : boolean (default True) - if True, default codec is apply 501 502 *Returns* : Field ''' 503 if isinstance(codec, Cfield): 504 return copy(codec) 505 return cls(codec=codec, name=name, keys=parent.keys, reindex=reindex) 506 507 @classmethod 508 def ntv(cls, ntv_value=None, extkeys=None, reindex=True, decode_str=False): 509 '''Generate an Field Object from a Ntv field object''' 510 return cls.from_ntv(ntv_value, extkeys=extkeys, reindex=reindex, decode_str=decode_str) 511 512 @classmethod 513 def ntv_to_val(cls, ntv): 514 '''conversion in decode_ntv_val method''' 515 return cls.n_to_i(ntv.val) 516 517 # %% static methods 518 @staticmethod 519 def n_to_i(ntv_lis): 520 ''' converting a NtvList value to an internal value''' 521 if isinstance(ntv_lis, list) and len(ntv_lis) == 0: 522 return [] 523 if isinstance(ntv_lis, list) and ntv_lis[0].__class__.__name__ in ('NtvSingle', 'NtvList'): 524 return [Cfield.n_to_i(ntv.to_obj()) for ntv in ntv_lis] 525 return ntv_lis 526 527 @staticmethod 528 def check_relation(parent, child, typecoupl, value=True): 529 '''get the inconsistent records for a relationship 530 531 *Parameters* 532 533 - **field** : child field involved in the relation 534 - **parent**: parent field involved in the relation 535 - **typecoupl**: str - relationship to check ('derived' or 'coupled') 536 - **value**: boolean (default True) - if True return a dict with inconsistent 537 values of the fields, else a tuple with index of records) 538 539 *Returns* : 540 541 - dict with inconsistent values of the fields 542 - or a tuple with index of records''' 543 match typecoupl: 544 case 'derived': 545 errors = parent.coupling(child, reindex=True) 546 case 'coupled': 547 errors = copy(parent).coupling(child, derived=False, reindex=True) 548 case _: 549 raise FieldError(typecoupl + "is not a valid relationship") 550 if not value: 551 return errors 552 return {'row': list(errors), child.name: child[errors], 553 parent.name: parent[errors]} 554 555 # %% instance methods 556 def add(self, other, solve=True): 557 ''' Add other's values to self's values 558 559 *Parameters* 560 561 - **other** : Field object to add to self object 562 - **solve** : Boolean (default True) - If True, replace None other's codec value 563 with self codec value. 564 565 *Returns* : self ''' 566 if solve: 567 solved = copy(other) 568 for i in range(len(solved.codec)): 569 if solved.codec[i] is None and i in range(len(self.codec)): 570 solved._codec[i] = self.codec[i] 571 values = self.values + solved.values 572 else: 573 values = self.values + other.values 574 codec = Cutil.tocodec(values) 575 if set(codec) != set(self._codec): 576 self._codec = codec 577 self._keys = Cutil.tokeys(values, self._codec) 578 return self 579 580 def append(self, value, unique=True): 581 '''add a new value 582 583 *Parameters* 584 585 - **value** : new object value 586 - **unique** : boolean (default True) - If False, duplication codec if value is present 587 588 *Returns* : key of value ''' 589 #value = Ntv.obj(value) 590 #value = self.s_to_i(value) 591 if value in self._codec and unique: 592 key = self._codec.index(value) 593 else: 594 key = len(self._codec) 595 self._codec.append(value) 596 self._keys.append(key) 597 return key 598 599 def coupling(self, idx, derived=True, duplicate=True, reindex=False): 600 ''' 601 Transform indexes in coupled or derived indexes (codec extension). 602 If derived option is True, self._codec is extended and idx codec not, 603 else, both are coupled and both codec are extended. 604 605 *Parameters* 606 607 - **idx** : single Field or list of Field to be coupled or derived. 608 - **derived** : boolean (default : True) - if True result is derived, 609 if False coupled 610 - **duplicate** : boolean (default: True) - if True, return duplicate records 611 (only for self index) 612 - **reindex** : boolean (default : False). If True self.index is reindexed 613 with default codec. But if not derived, idx indexes MUST to be reindexed. 614 615 *Returns* : tuple with duplicate records (errors) if 'duplicate', None else''' 616 duplic = tuple() 617 if not isinstance(idx, list): 618 index = [idx] 619 else: 620 index = idx 621 idxzip = self.__class__(list(zip(*([self.keys] + [ix.keys for ix in index]))), 622 reindex=True) 623 self.tocoupled(idxzip) 624 if not derived: 625 for ind in index: 626 ind.tocoupled(idxzip) 627 duplic += ind.getduplicates(reindex) 628 if duplicate and not duplic: 629 return self.getduplicates(reindex) 630 if duplicate and duplic: 631 return tuple(sorted(list(set(duplic + self.getduplicates(reindex))))) 632 if reindex: 633 self.reindex() 634 return None 635 636 def couplinginfos(self, other): 637 '''return a dict with the coupling info between other (distance, ratecpl, 638 rateder, dist, disttomin, disttomax, distmin, distmax, diff, typecoupl) 639 640 *Parameters* 641 642 - **other** : other index to compare 643 644 *Returns* : dict''' 645 if min(len(self), len(other)) == 0: 646 null = Cfield() 647 return AnaRelation([AnaField(null.to_analysis), AnaField(null.to_analysis)], 648 Cutil.dist(null.keys, null.keys, True) 649 ).to_dict(distances=True, misc=True) 650 return AnaRelation([AnaField(self.to_analysis), AnaField(other.to_analysis)], 651 Cutil.dist(self.keys, other.keys, True) 652 ).to_dict(distances=True, misc=True) 653 654 def derkeys(self, parent): 655 '''return keys derived from parent keys 656 657 *Parameters* 658 659 - **parent** : Field - parent 660 661 *Returns* : list of keys''' 662 derkey = [-1] * len(parent.codec) 663 for i in range(len(self)): 664 derkey[parent.keys[i]] = self.keys[i] 665 if min(derkey) < 0: 666 raise FieldError("parent is not a derive Field") 667 return derkey 668 669 def extendkeys(self, keys): 670 '''add keys to the Field 671 672 *Parameters* 673 674 - **keys** : list of int (value lower or equal than actual keys) 675 676 *Returns* : None ''' 677 if min(keys) < 0 or max(keys) > len(self._codec) - 1: 678 raise FieldError('keys not consistent with codec') 679 self._keys += keys 680 681 @staticmethod 682 def full(listidx): 683 '''tranform a list of indexes in crossed indexes (value extension). 684 685 *Parameters* 686 687 - **listidx** : list of Field to transform 688 689 *Returns* : tuple of records added ''' 690 idx1 = listidx[0] 691 for idx in listidx: 692 if len(idx) != len(idx): 693 return None 694 leninit = len(idx1) 695 keysadd = Cutil.idxfull(listidx) 696 for idx, keys in zip(listidx, keysadd): 697 idx._keys += keys 698 return tuple(range(leninit, len(idx1))) 699 700 def getduplicates(self, reindex=False): 701 ''' calculate items with duplicate codec 702 703 *Parameters* 704 705 - **reindex** : boolean (default : False). If True index is reindexed with default codec 706 707 *Returns* : tuple of items with duplicate codec''' 708 count = Counter(self._codec) 709 defcodec = list(count - Counter(list(count))) 710 dkeys = defaultdict(list) 711 for key, ind in zip(self._keys, range(len(self))): 712 dkeys[key].append(ind) 713 dcodec = defaultdict(list) 714 for key, ind in zip(self._codec, range(len(self._codec))): 715 dcodec[key].append(ind) 716 duplicates = [] 717 for item in defcodec: 718 for codecitem in dcodec[item]: 719 duplicates += dkeys[codecitem] 720 if reindex: 721 self.reindex() 722 return tuple(duplicates) 723 724 def iscrossed(self, other): 725 '''return True if self is crossed to other''' 726 return self.couplinginfos(other)['rateder'] == 1.0 727 728 def iscoupled(self, other): 729 '''return True if self is coupled to other''' 730 info = self.couplinginfos(other) 731 return info['diff'] == 0 and info['rateder'] == 0.0 732 733 def isderived(self, other, only=False): 734 '''return True if self is derived from other''' 735 info = self.couplinginfos(other) 736 return not (info['diff'] == 0 and only) and info['rateder'] == 0.0 737 738 def iskeysfromderkeys(self, other): 739 '''return True if self._keys is relative from other._keys''' 740 leng = len(other.codec) 741 if leng % len(self._codec) != 0: 742 return False 743 keys = [(i*len(self._codec))//leng for i in range(leng)] 744 return Cutil.keysfromderkeys(other.keys, keys) == self.keys 745 746 def islinked(self, other): 747 '''return True if self is linked to other''' 748 rate = self.couplinginfos(other)['rateder'] 749 return 0.0 < rate < 1.0 750 751 def isvalue(self, value): 752 ''' return True if value is in index values 753 754 *Parameters* 755 756 - **value** : value to check''' 757 return value in self.values 758 759 def keytoval(self, key): 760 ''' return the value of a key 761 762 *Parameters* 763 764 - **key** : key to convert into values 765 - **extern** : if True, return string representation else, internal value 766 767 *Returns* 768 769 - **int** : first key finded (None else)''' 770 if key < 0 or key >= len(self._codec): 771 return None 772 return self._codec[key] 773 774 def loc(self, value): 775 '''return a list of record number with value 776 777 *Parameters* 778 779 - **value** : value to check 780 781 *Returns* 782 783 - **list of int** : list of record number finded (None else)''' 784 return self.recordfromvalue(value) 785 786 def recordfromvalue(self, value): 787 '''return a list of record number with value 788 789 *Parameters* 790 791 - **value** : value to check 792 - **extern** : if True, compare value to external representation of self.value, 793 else, internal 794 795 *Returns* 796 797 - **list of int** : list of record number finded (None else)''' 798 799 if not value in self._codec: 800 return None 801 listkeys = [cod for cod, val in zip( 802 range(len(self._codec)), self._codec) if val == value] 803 return self.recordfromkeys(listkeys) 804 805 def recordfromkeys(self, listkeys): 806 '''return a list of record number with key in listkeys 807 808 *Parameters* 809 810 - **listkeys** : list of keys to check 811 812 *Returns* 813 814 - **list of int** : list of record number finded (None else)''' 815 816 return [rec for rec, key in zip(range(len(self)), self._keys) if key in listkeys] 817 818 def reindex(self, codec=None): 819 '''apply a reordered codec. If None, a new default codec is apply. 820 821 *Parameters* 822 823 - **codec** : list (default None) - reordered codec to apply. 824 825 *Returns* : self''' 826 827 if not codec: 828 codec = Cutil.tocodec(self.values) 829 self._keys = Cutil.reindex(self._keys, self._codec, codec) 830 self._codec = codec 831 return self 832 833 def reorder(self, sort=None, inplace=True): 834 '''Change the Field order with a new order define by sort and reset the codec. 835 836 *Parameters* 837 838 - **sort** : int list (default None)- new record order to apply. If None, no change. 839 - **inplace** : boolean (default True) - if True, new order is apply to self, 840 if False a new Field is created. 841 842 *Returns* 843 844 - **Field** : self if inplace, new Field if not inplace''' 845 values = Cutil.reorder(self.values, sort) 846 codec, keys = Cutil.resetidx(values) 847 if inplace: 848 self._keys = keys 849 self._codec = codec 850 return None 851 return self.__class__(name=self.name, codec=codec, keys=keys) 852 853 def setcodecvalue(self, oldvalue, newvalue): 854 '''update all the oldvalue by newvalue 855 856 *Parameters* 857 858 - **oldvalue** : list of values to replace 859 - **newvalue** : list of new value to apply 860 861 *Returns* : int - last codec rank updated (-1 if None)''' 862 863 rank = -1 864 for i in range(len(self._codec)): 865 if self._codec[i] == oldvalue: 866 self._codec[i] = newvalue 867 rank = i 868 return rank 869 870 def setcodeclist(self, listcodec): 871 '''update codec with listcodec values 872 873 *Parameters* 874 875 - **listcodec** : list of new codec values to apply 876 877 *Returns* : int - last codec rank updated (-1 if None)''' 878 self._codec = listcodec 879 880 def set_keys(self, keys): 881 ''' _keys setters ''' 882 self._keys = keys 883 884 def set_codec(self, codec): 885 ''' _codec setters ''' 886 self._codec = codec 887 888 def setkeys(self, keys, inplace=True): 889 '''apply new keys (replace codec with extended codec from parent keys) 890 891 *Parameters* 892 893 - **keys** : list of keys to apply 894 - **inplace** : if True, update self data, else create a new Field 895 896 *Returns* : self or new Field''' 897 codec = Cutil.tocodec(self.values, keys) 898 if inplace: 899 self._codec = codec 900 self._keys = keys 901 return self 902 return self.__class__(codec=codec, name=self.name, keys=keys) 903 904 def setname(self, name): 905 '''update the Field name 906 907 *Parameters* 908 909 - **name** : str to set into name 910 911 *Returns* : boolean - True if update''' 912 if isinstance(name, str): 913 self.name = name 914 return True 915 return False 916 917 def setvalue(self, ind, value): 918 '''update a value at the rank ind (and update codec and keys) 919 920 *Parameters* 921 922 - **ind** : rank of the value 923 - **value** : new value 924 925 *Returns* : None''' 926 values = self.values 927 values[ind] = value 928 self._codec, self._keys = Cutil.resetidx(values) 929 930 def setlistvalue(self, listvalue, listind=None): 931 '''update the values (and update codec and keys) 932 933 *Parameters* 934 935 - **listvalue** : list - list of new values 936 - **listind** : list(default None) - list of index 937 938 *Returns* : None''' 939 values = self.values 940 listind = listind if listind else range(len(self)) 941 for i, value_i in zip(listind, listvalue): 942 values[i] = value_i 943 self._codec, self._keys = Cutil.resetidx(values) 944 945 def sort(self, reverse=False, inplace=True, func=str): 946 '''Define sorted index with ordered codec. 947 948 *Parameters* 949 950 - **reverse** : boolean (defaut False) - codec is sorted with reverse order 951 - **inplace** : boolean (default True) - if True, new order is apply to self, 952 if False a new Field is created. 953 - **func** : function (default str) - key used in the sorted function 954 955 *Return* 956 957 - **Field** : self if inplace, new Field if not inplace''' 958 if inplace: 959 self.reindex(codec=sorted(self._codec, reverse=reverse, key=func)) 960 self._keys.sort() 961 return self 962 oldcodec = self._codec 963 codec = sorted(oldcodec, reverse=reverse, key=str) 964 return self.__class__(name=self.name, codec=codec, 965 keys=sorted(Cutil.reindex(self._keys, oldcodec, codec))) 966 967 def tocoupled(self, other, coupling=True): 968 ''' 969 Transform a derived index in a coupled index (keys extension) and add 970 new values to have the same length as other. 971 972 *Parameters* 973 974 - **other** : index to be coupled. 975 - **coupling** : boolean (default True) - reindex if False 976 977 *Returns* : None''' 978 dic = Cutil.idxlink(other.keys, self._keys) 979 if not dic: 980 raise FieldError("Field is not coupled or derived from other") 981 self._codec = [self._codec[dic[i]] for i in range(len(dic))] 982 self._keys = other.keys 983 if not coupling: 984 self.reindex() 985 986 def tostdcodec(self, inplace=False, full=True): 987 ''' 988 Transform codec in full or in default codec. 989 990 *Parameters* 991 992 - **inplace** : boolean (default True) - if True, new order is apply to self, 993 - **full** : boolean (default True) - if True reindex with full codec 994 995 *Return* 996 997 - **Field** : self if inplace, new Field if not inplace''' 998 if full: 999 codec = self.values 1000 keys = list(range(len(codec))) 1001 else: 1002 codec = Cutil.tocodec(self.values) 1003 keys = Cutil.reindex(self._keys, self._codec, codec) 1004 if inplace: 1005 self._codec = codec 1006 self._keys = keys 1007 return self 1008 return self.__class__(codec=codec, name=self.name, keys=keys) 1009 1010 def valtokey(self, value): 1011 '''convert a value to a key 1012 1013 *Parameters* 1014 1015 - **value** : value to convert 1016 1017 *Returns* 1018 1019 - **int** : first key finded (None else)''' 1020 if value in self._codec: 1021 return self._codec.index(value) 1022 return None
A Cfield is a representation of an Field list .
Attributes (for dynamic attributes see @property methods) :
- name : name of the Field
- _codec : list of values for each key
- _keys : list of code values
The methods defined in this class are :
constructor (@classmethod)
conversion static methods
Cfield.ntv_to_val(@classmethod)Cfield.n_to_i(@staticmethod)
dynamic value (getters @property)
add - update methods
Cfield.addCfield.appendCfield.setcodecvalueCfield.setcodeclistCfield.setnameCfield.set_keysCfield.set_codecCfield.setkeysCfield.setlistvalueCfield.setvalue
transform methods
Cfield.couplingCfield.check_relation(@staticmethod)Cfield.extendkeysCfield.fullCfield.reindexCfield.reorderCfield.sortCfield.tocoupledCfield.tostdcodec
getters methods
326 def __init__(self, codec=None, name=None, keys=None, default=False, reindex=False): 327 '''Two modes: 328 - a single attributes : Cfield object to copy 329 - multiple attributes : set codec, name and keys attributes''' 330 if not codec and not keys: 331 self._codec = [] 332 self._keys = [] 333 elif isinstance(codec, Cfield): 334 self._keys = codec._keys 335 self._codec = codec._codec 336 self.name = codec.name 337 return 338 elif not default: 339 self._keys = keys if keys else Cutil.identity(len(codec)) 340 self._codec = codec if codec else Cutil.identity(len(keys)) 341 else: 342 self._codec, self._keys = Cutil.default(codec) 343 self.name = name if name else 'field' 344 if reindex: 345 self.reindex() 346 return
Two modes:
- a single attributes : Cfield object to copy
- multiple attributes : set codec, name and keys attributes
450 @classmethod 451 def from_ntv(cls, ntv_value=None, extkeys=None, reindex=True, decode_str=False, 452 add_type=True, lengkeys=None): 453 '''Generate an Field Object from a Ntv field object''' 454 if isinstance(ntv_value, cls): 455 return copy(ntv_value) 456 if ntv_value is None: 457 return cls() 458 ntv = Ntv.obj(ntv_value, decode_str=decode_str) 459 #ntv = NtvList(ntv_value) 460 name, typ, codec, parent, keys, coef, leng = NtvUtil.decode_ntv_tab( 461 ntv, cls.ntv_to_val) 462 if parent and not extkeys: 463 return None 464 if coef: 465 keys = Cutil.keysfromcoef(coef, leng//coef, lengkeys) 466 elif extkeys and parent: 467 keys = Cutil.keysfromderkeys(extkeys, keys) 468 elif extkeys and not parent: 469 keys = extkeys 470 keys = list(range(len(codec))) if keys is None else keys 471 name = ntv.json_name(string=True) if add_type else name 472 return cls(codec=codec, name=name, keys=keys, reindex=reindex)
Generate an Field Object from a Ntv field object
474 @classmethod 475 def bol(cls, leng, notdef=None, name=None, default=True): 476 ''' 477 Field constructor (boolean value). 478 479 *Parameters* 480 481 - **leng** : integer - length of the Field 482 - **notdef** : list (default None) - list of records without default value 483 - **default** : boolean (default True) - default value 484 - **name** : string (default None) - name of Field''' 485 values = [default] * leng 486 if notdef: 487 for item in notdef: 488 values[item] = not default 489 return cls.ntv({name: values})
Field constructor (boolean value).
Parameters
- leng : integer - length of the Field
- notdef : list (default None) - list of records without default value
- default : boolean (default True) - default value
- name : string (default None) - name of Field
491 @classmethod 492 def like(cls, codec, parent, name=None, reindex=False): 493 '''Generate an Field Object from specific codec and keys from another field. 494 495 *Parameters* 496 497 - **codec** : list of objects 498 - **name** : string (default None) - name of index (see data model) 499 - **parent** : Field, parent of the new Field 500 - **reindex** : boolean (default True) - if True, default codec is apply 501 502 *Returns* : Field ''' 503 if isinstance(codec, Cfield): 504 return copy(codec) 505 return cls(codec=codec, name=name, keys=parent.keys, reindex=reindex)
Generate an Field Object from specific codec and keys from another field.
Parameters
- codec : list of objects
- name : string (default None) - name of index (see data model)
- parent : Field, parent of the new Field
- reindex : boolean (default True) - if True, default codec is apply
Returns : Field
507 @classmethod 508 def ntv(cls, ntv_value=None, extkeys=None, reindex=True, decode_str=False): 509 '''Generate an Field Object from a Ntv field object''' 510 return cls.from_ntv(ntv_value, extkeys=extkeys, reindex=reindex, decode_str=decode_str)
Generate an Field Object from a Ntv field object
512 @classmethod 513 def ntv_to_val(cls, ntv): 514 '''conversion in decode_ntv_val method''' 515 return cls.n_to_i(ntv.val)
conversion in decode_ntv_val method
518 @staticmethod 519 def n_to_i(ntv_lis): 520 ''' converting a NtvList value to an internal value''' 521 if isinstance(ntv_lis, list) and len(ntv_lis) == 0: 522 return [] 523 if isinstance(ntv_lis, list) and ntv_lis[0].__class__.__name__ in ('NtvSingle', 'NtvList'): 524 return [Cfield.n_to_i(ntv.to_obj()) for ntv in ntv_lis] 525 return ntv_lis
converting a NtvList value to an internal value
527 @staticmethod 528 def check_relation(parent, child, typecoupl, value=True): 529 '''get the inconsistent records for a relationship 530 531 *Parameters* 532 533 - **field** : child field involved in the relation 534 - **parent**: parent field involved in the relation 535 - **typecoupl**: str - relationship to check ('derived' or 'coupled') 536 - **value**: boolean (default True) - if True return a dict with inconsistent 537 values of the fields, else a tuple with index of records) 538 539 *Returns* : 540 541 - dict with inconsistent values of the fields 542 - or a tuple with index of records''' 543 match typecoupl: 544 case 'derived': 545 errors = parent.coupling(child, reindex=True) 546 case 'coupled': 547 errors = copy(parent).coupling(child, derived=False, reindex=True) 548 case _: 549 raise FieldError(typecoupl + "is not a valid relationship") 550 if not value: 551 return errors 552 return {'row': list(errors), child.name: child[errors], 553 parent.name: parent[errors]}
get the inconsistent records for a relationship
Parameters
- field : child field involved in the relation
- parent: parent field involved in the relation
- typecoupl: str - relationship to check ('derived' or 'coupled')
- value: boolean (default True) - if True return a dict with inconsistent values of the fields, else a tuple with index of records)
Returns :
- dict with inconsistent values of the fields
- or a tuple with index of records
556 def add(self, other, solve=True): 557 ''' Add other's values to self's values 558 559 *Parameters* 560 561 - **other** : Field object to add to self object 562 - **solve** : Boolean (default True) - If True, replace None other's codec value 563 with self codec value. 564 565 *Returns* : self ''' 566 if solve: 567 solved = copy(other) 568 for i in range(len(solved.codec)): 569 if solved.codec[i] is None and i in range(len(self.codec)): 570 solved._codec[i] = self.codec[i] 571 values = self.values + solved.values 572 else: 573 values = self.values + other.values 574 codec = Cutil.tocodec(values) 575 if set(codec) != set(self._codec): 576 self._codec = codec 577 self._keys = Cutil.tokeys(values, self._codec) 578 return self
Add other's values to self's values
Parameters
- other : Field object to add to self object
- solve : Boolean (default True) - If True, replace None other's codec value with self codec value.
Returns : self
580 def append(self, value, unique=True): 581 '''add a new value 582 583 *Parameters* 584 585 - **value** : new object value 586 - **unique** : boolean (default True) - If False, duplication codec if value is present 587 588 *Returns* : key of value ''' 589 #value = Ntv.obj(value) 590 #value = self.s_to_i(value) 591 if value in self._codec and unique: 592 key = self._codec.index(value) 593 else: 594 key = len(self._codec) 595 self._codec.append(value) 596 self._keys.append(key) 597 return key
add a new value
Parameters
- value : new object value
- unique : boolean (default True) - If False, duplication codec if value is present
Returns : key of value
599 def coupling(self, idx, derived=True, duplicate=True, reindex=False): 600 ''' 601 Transform indexes in coupled or derived indexes (codec extension). 602 If derived option is True, self._codec is extended and idx codec not, 603 else, both are coupled and both codec are extended. 604 605 *Parameters* 606 607 - **idx** : single Field or list of Field to be coupled or derived. 608 - **derived** : boolean (default : True) - if True result is derived, 609 if False coupled 610 - **duplicate** : boolean (default: True) - if True, return duplicate records 611 (only for self index) 612 - **reindex** : boolean (default : False). If True self.index is reindexed 613 with default codec. But if not derived, idx indexes MUST to be reindexed. 614 615 *Returns* : tuple with duplicate records (errors) if 'duplicate', None else''' 616 duplic = tuple() 617 if not isinstance(idx, list): 618 index = [idx] 619 else: 620 index = idx 621 idxzip = self.__class__(list(zip(*([self.keys] + [ix.keys for ix in index]))), 622 reindex=True) 623 self.tocoupled(idxzip) 624 if not derived: 625 for ind in index: 626 ind.tocoupled(idxzip) 627 duplic += ind.getduplicates(reindex) 628 if duplicate and not duplic: 629 return self.getduplicates(reindex) 630 if duplicate and duplic: 631 return tuple(sorted(list(set(duplic + self.getduplicates(reindex))))) 632 if reindex: 633 self.reindex() 634 return None
Transform indexes in coupled or derived indexes (codec extension). If derived option is True, self._codec is extended and idx codec not, else, both are coupled and both codec are extended.
Parameters
- idx : single Field or list of Field to be coupled or derived.
- derived : boolean (default : True) - if True result is derived, if False coupled
- duplicate : boolean (default: True) - if True, return duplicate records (only for self index)
- reindex : boolean (default : False). If True self.index is reindexed with default codec. But if not derived, idx indexes MUST to be reindexed.
Returns : tuple with duplicate records (errors) if 'duplicate', None else
636 def couplinginfos(self, other): 637 '''return a dict with the coupling info between other (distance, ratecpl, 638 rateder, dist, disttomin, disttomax, distmin, distmax, diff, typecoupl) 639 640 *Parameters* 641 642 - **other** : other index to compare 643 644 *Returns* : dict''' 645 if min(len(self), len(other)) == 0: 646 null = Cfield() 647 return AnaRelation([AnaField(null.to_analysis), AnaField(null.to_analysis)], 648 Cutil.dist(null.keys, null.keys, True) 649 ).to_dict(distances=True, misc=True) 650 return AnaRelation([AnaField(self.to_analysis), AnaField(other.to_analysis)], 651 Cutil.dist(self.keys, other.keys, True) 652 ).to_dict(distances=True, misc=True)
return a dict with the coupling info between other (distance, ratecpl, rateder, dist, disttomin, disttomax, distmin, distmax, diff, typecoupl)
Parameters
- other : other index to compare
Returns : dict
654 def derkeys(self, parent): 655 '''return keys derived from parent keys 656 657 *Parameters* 658 659 - **parent** : Field - parent 660 661 *Returns* : list of keys''' 662 derkey = [-1] * len(parent.codec) 663 for i in range(len(self)): 664 derkey[parent.keys[i]] = self.keys[i] 665 if min(derkey) < 0: 666 raise FieldError("parent is not a derive Field") 667 return derkey
return keys derived from parent keys
Parameters
- parent : Field - parent
Returns : list of keys
669 def extendkeys(self, keys): 670 '''add keys to the Field 671 672 *Parameters* 673 674 - **keys** : list of int (value lower or equal than actual keys) 675 676 *Returns* : None ''' 677 if min(keys) < 0 or max(keys) > len(self._codec) - 1: 678 raise FieldError('keys not consistent with codec') 679 self._keys += keys
add keys to the Field
Parameters
- keys : list of int (value lower or equal than actual keys)
Returns : None
681 @staticmethod 682 def full(listidx): 683 '''tranform a list of indexes in crossed indexes (value extension). 684 685 *Parameters* 686 687 - **listidx** : list of Field to transform 688 689 *Returns* : tuple of records added ''' 690 idx1 = listidx[0] 691 for idx in listidx: 692 if len(idx) != len(idx): 693 return None 694 leninit = len(idx1) 695 keysadd = Cutil.idxfull(listidx) 696 for idx, keys in zip(listidx, keysadd): 697 idx._keys += keys 698 return tuple(range(leninit, len(idx1)))
tranform a list of indexes in crossed indexes (value extension).
Parameters
- listidx : list of Field to transform
Returns : tuple of records added
700 def getduplicates(self, reindex=False): 701 ''' calculate items with duplicate codec 702 703 *Parameters* 704 705 - **reindex** : boolean (default : False). If True index is reindexed with default codec 706 707 *Returns* : tuple of items with duplicate codec''' 708 count = Counter(self._codec) 709 defcodec = list(count - Counter(list(count))) 710 dkeys = defaultdict(list) 711 for key, ind in zip(self._keys, range(len(self))): 712 dkeys[key].append(ind) 713 dcodec = defaultdict(list) 714 for key, ind in zip(self._codec, range(len(self._codec))): 715 dcodec[key].append(ind) 716 duplicates = [] 717 for item in defcodec: 718 for codecitem in dcodec[item]: 719 duplicates += dkeys[codecitem] 720 if reindex: 721 self.reindex() 722 return tuple(duplicates)
calculate items with duplicate codec
Parameters
- reindex : boolean (default : False). If True index is reindexed with default codec
Returns : tuple of items with duplicate codec
724 def iscrossed(self, other): 725 '''return True if self is crossed to other''' 726 return self.couplinginfos(other)['rateder'] == 1.0
return True if self is crossed to other
728 def iscoupled(self, other): 729 '''return True if self is coupled to other''' 730 info = self.couplinginfos(other) 731 return info['diff'] == 0 and info['rateder'] == 0.0
return True if self is coupled to other
733 def isderived(self, other, only=False): 734 '''return True if self is derived from other''' 735 info = self.couplinginfos(other) 736 return not (info['diff'] == 0 and only) and info['rateder'] == 0.0
return True if self is derived from other
738 def iskeysfromderkeys(self, other): 739 '''return True if self._keys is relative from other._keys''' 740 leng = len(other.codec) 741 if leng % len(self._codec) != 0: 742 return False 743 keys = [(i*len(self._codec))//leng for i in range(leng)] 744 return Cutil.keysfromderkeys(other.keys, keys) == self.keys
return True if self._keys is relative from other._keys
746 def islinked(self, other): 747 '''return True if self is linked to other''' 748 rate = self.couplinginfos(other)['rateder'] 749 return 0.0 < rate < 1.0
return True if self is linked to other
751 def isvalue(self, value): 752 ''' return True if value is in index values 753 754 *Parameters* 755 756 - **value** : value to check''' 757 return value in self.values
return True if value is in index values
Parameters
- value : value to check
759 def keytoval(self, key): 760 ''' return the value of a key 761 762 *Parameters* 763 764 - **key** : key to convert into values 765 - **extern** : if True, return string representation else, internal value 766 767 *Returns* 768 769 - **int** : first key finded (None else)''' 770 if key < 0 or key >= len(self._codec): 771 return None 772 return self._codec[key]
return the value of a key
Parameters
- key : key to convert into values
- extern : if True, return string representation else, internal value
Returns
- int : first key finded (None else)
774 def loc(self, value): 775 '''return a list of record number with value 776 777 *Parameters* 778 779 - **value** : value to check 780 781 *Returns* 782 783 - **list of int** : list of record number finded (None else)''' 784 return self.recordfromvalue(value)
return a list of record number with value
Parameters
- value : value to check
Returns
- list of int : list of record number finded (None else)
786 def recordfromvalue(self, value): 787 '''return a list of record number with value 788 789 *Parameters* 790 791 - **value** : value to check 792 - **extern** : if True, compare value to external representation of self.value, 793 else, internal 794 795 *Returns* 796 797 - **list of int** : list of record number finded (None else)''' 798 799 if not value in self._codec: 800 return None 801 listkeys = [cod for cod, val in zip( 802 range(len(self._codec)), self._codec) if val == value] 803 return self.recordfromkeys(listkeys)
return a list of record number with value
Parameters
- value : value to check
- extern : if True, compare value to external representation of self.value, else, internal
Returns
- list of int : list of record number finded (None else)
805 def recordfromkeys(self, listkeys): 806 '''return a list of record number with key in listkeys 807 808 *Parameters* 809 810 - **listkeys** : list of keys to check 811 812 *Returns* 813 814 - **list of int** : list of record number finded (None else)''' 815 816 return [rec for rec, key in zip(range(len(self)), self._keys) if key in listkeys]
return a list of record number with key in listkeys
Parameters
- listkeys : list of keys to check
Returns
- list of int : list of record number finded (None else)
818 def reindex(self, codec=None): 819 '''apply a reordered codec. If None, a new default codec is apply. 820 821 *Parameters* 822 823 - **codec** : list (default None) - reordered codec to apply. 824 825 *Returns* : self''' 826 827 if not codec: 828 codec = Cutil.tocodec(self.values) 829 self._keys = Cutil.reindex(self._keys, self._codec, codec) 830 self._codec = codec 831 return self
apply a reordered codec. If None, a new default codec is apply.
Parameters
- codec : list (default None) - reordered codec to apply.
Returns : self
833 def reorder(self, sort=None, inplace=True): 834 '''Change the Field order with a new order define by sort and reset the codec. 835 836 *Parameters* 837 838 - **sort** : int list (default None)- new record order to apply. If None, no change. 839 - **inplace** : boolean (default True) - if True, new order is apply to self, 840 if False a new Field is created. 841 842 *Returns* 843 844 - **Field** : self if inplace, new Field if not inplace''' 845 values = Cutil.reorder(self.values, sort) 846 codec, keys = Cutil.resetidx(values) 847 if inplace: 848 self._keys = keys 849 self._codec = codec 850 return None 851 return self.__class__(name=self.name, codec=codec, keys=keys)
Change the Field order with a new order define by sort and reset the codec.
Parameters
- sort : int list (default None)- new record order to apply. If None, no change.
- inplace : boolean (default True) - if True, new order is apply to self, if False a new Field is created.
Returns
- Field : self if inplace, new Field if not inplace
853 def setcodecvalue(self, oldvalue, newvalue): 854 '''update all the oldvalue by newvalue 855 856 *Parameters* 857 858 - **oldvalue** : list of values to replace 859 - **newvalue** : list of new value to apply 860 861 *Returns* : int - last codec rank updated (-1 if None)''' 862 863 rank = -1 864 for i in range(len(self._codec)): 865 if self._codec[i] == oldvalue: 866 self._codec[i] = newvalue 867 rank = i 868 return rank
update all the oldvalue by newvalue
Parameters
- oldvalue : list of values to replace
- newvalue : list of new value to apply
Returns : int - last codec rank updated (-1 if None)
870 def setcodeclist(self, listcodec): 871 '''update codec with listcodec values 872 873 *Parameters* 874 875 - **listcodec** : list of new codec values to apply 876 877 *Returns* : int - last codec rank updated (-1 if None)''' 878 self._codec = listcodec
update codec with listcodec values
Parameters
- listcodec : list of new codec values to apply
Returns : int - last codec rank updated (-1 if None)
888 def setkeys(self, keys, inplace=True): 889 '''apply new keys (replace codec with extended codec from parent keys) 890 891 *Parameters* 892 893 - **keys** : list of keys to apply 894 - **inplace** : if True, update self data, else create a new Field 895 896 *Returns* : self or new Field''' 897 codec = Cutil.tocodec(self.values, keys) 898 if inplace: 899 self._codec = codec 900 self._keys = keys 901 return self 902 return self.__class__(codec=codec, name=self.name, keys=keys)
apply new keys (replace codec with extended codec from parent keys)
Parameters
- keys : list of keys to apply
- inplace : if True, update self data, else create a new Field
Returns : self or new Field
904 def setname(self, name): 905 '''update the Field name 906 907 *Parameters* 908 909 - **name** : str to set into name 910 911 *Returns* : boolean - True if update''' 912 if isinstance(name, str): 913 self.name = name 914 return True 915 return False
update the Field name
Parameters
- name : str to set into name
Returns : boolean - True if update
917 def setvalue(self, ind, value): 918 '''update a value at the rank ind (and update codec and keys) 919 920 *Parameters* 921 922 - **ind** : rank of the value 923 - **value** : new value 924 925 *Returns* : None''' 926 values = self.values 927 values[ind] = value 928 self._codec, self._keys = Cutil.resetidx(values)
update a value at the rank ind (and update codec and keys)
Parameters
- ind : rank of the value
- value : new value
Returns : None
930 def setlistvalue(self, listvalue, listind=None): 931 '''update the values (and update codec and keys) 932 933 *Parameters* 934 935 - **listvalue** : list - list of new values 936 - **listind** : list(default None) - list of index 937 938 *Returns* : None''' 939 values = self.values 940 listind = listind if listind else range(len(self)) 941 for i, value_i in zip(listind, listvalue): 942 values[i] = value_i 943 self._codec, self._keys = Cutil.resetidx(values)
update the values (and update codec and keys)
Parameters
- listvalue : list - list of new values
- listind : list(default None) - list of index
Returns : None
945 def sort(self, reverse=False, inplace=True, func=str): 946 '''Define sorted index with ordered codec. 947 948 *Parameters* 949 950 - **reverse** : boolean (defaut False) - codec is sorted with reverse order 951 - **inplace** : boolean (default True) - if True, new order is apply to self, 952 if False a new Field is created. 953 - **func** : function (default str) - key used in the sorted function 954 955 *Return* 956 957 - **Field** : self if inplace, new Field if not inplace''' 958 if inplace: 959 self.reindex(codec=sorted(self._codec, reverse=reverse, key=func)) 960 self._keys.sort() 961 return self 962 oldcodec = self._codec 963 codec = sorted(oldcodec, reverse=reverse, key=str) 964 return self.__class__(name=self.name, codec=codec, 965 keys=sorted(Cutil.reindex(self._keys, oldcodec, codec)))
Define sorted index with ordered codec.
Parameters
- reverse : boolean (defaut False) - codec is sorted with reverse order
- inplace : boolean (default True) - if True, new order is apply to self, if False a new Field is created.
- func : function (default str) - key used in the sorted function
Return
- Field : self if inplace, new Field if not inplace
967 def tocoupled(self, other, coupling=True): 968 ''' 969 Transform a derived index in a coupled index (keys extension) and add 970 new values to have the same length as other. 971 972 *Parameters* 973 974 - **other** : index to be coupled. 975 - **coupling** : boolean (default True) - reindex if False 976 977 *Returns* : None''' 978 dic = Cutil.idxlink(other.keys, self._keys) 979 if not dic: 980 raise FieldError("Field is not coupled or derived from other") 981 self._codec = [self._codec[dic[i]] for i in range(len(dic))] 982 self._keys = other.keys 983 if not coupling: 984 self.reindex()
Transform a derived index in a coupled index (keys extension) and add new values to have the same length as other.
Parameters
- other : index to be coupled.
- coupling : boolean (default True) - reindex if False
Returns : None
986 def tostdcodec(self, inplace=False, full=True): 987 ''' 988 Transform codec in full or in default codec. 989 990 *Parameters* 991 992 - **inplace** : boolean (default True) - if True, new order is apply to self, 993 - **full** : boolean (default True) - if True reindex with full codec 994 995 *Return* 996 997 - **Field** : self if inplace, new Field if not inplace''' 998 if full: 999 codec = self.values 1000 keys = list(range(len(codec))) 1001 else: 1002 codec = Cutil.tocodec(self.values) 1003 keys = Cutil.reindex(self._keys, self._codec, codec) 1004 if inplace: 1005 self._codec = codec 1006 self._keys = keys 1007 return self 1008 return self.__class__(codec=codec, name=self.name, keys=keys)
Transform codec in full or in default codec.
Parameters
- inplace : boolean (default True) - if True, new order is apply to self,
- full : boolean (default True) - if True reindex with full codec
Return
- Field : self if inplace, new Field if not inplace
1010 def valtokey(self, value): 1011 '''convert a value to a key 1012 1013 *Parameters* 1014 1015 - **value** : value to convert 1016 1017 *Returns* 1018 1019 - **int** : first key finded (None else)''' 1020 if value in self._codec: 1021 return self._codec.index(value) 1022 return None
convert a value to a key
Parameters
- value : value to convert
Returns
- int : first key finded (None else)
Field Exception
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback