tab-dataset.tab_dataset.dataset_interface
The dataset_interface module is part of the tab-dataset package.
It contains the classes DatasetInterface for Dataset entities.
For more information, see the user guide or the github repository.
1# -*- coding: utf-8 -*- 2""" 3The `dataset_interface` module is part of the `tab-dataset` package. 4 5It contains the classes `DatasetInterface` for Dataset entities. 6 7For more information, see the 8[user guide](https://loco-philippe.github.io/tab-dataset/docs/user_guide.html) 9or the [github repository](https://github.com/loco-philippe/tab-dataset). 10""" 11 12import csv 13import math 14import json 15import xarray 16import numpy as np 17import matplotlib.pyplot as plt 18from tabulate import tabulate 19 20from json_ntv.ntv import NtvList, NtvJsonEncoder 21from tab_dataset.cfield import Cutil 22from tab_dataset.cdataset import DatasetError 23 24 25class DatasetInterface: 26 '''this class includes Dataset methods : 27 28 - `DatasetInterface.json` 29 - `DatasetInterface.plot` 30 - `DatasetInterface.to_ntv` 31 - `DatasetInterface.to_csv` 32 - `DatasetInterface.to_file` 33 - `DatasetInterface.to_xarray` 34 - `DatasetInterface.to_dataframe` 35 - `DatasetInterface.view` 36 - `DatasetInterface.vlist` 37 - `DatasetInterface.voxel` 38 ''' 39 40 def json(self, **kwargs): 41 ''' 42 Return json dict, json string or Cbor binary. 43 44 *Parameters (kwargs)* 45 46 - **encoded** : boolean (default False) - choice for return format 47 (string/bytes if True, dict else) 48 - **format** : string (default 'json')- choice for return format (json, cbor) 49 - **codif** : dict (default ES.codeb). Numerical value for string in CBOR encoder 50 - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec 51 if 'default' each index has keys, if 'optimize' keys are optimized, 52 if 'dict' dict format is used, if 'nokeys' keys are absent 53 - **name** : boolean (default False) - if False, default index name are not included 54 - **geojson** : boolean (default False) - geojson for LocationValue if True 55 56 *Returns* : string or dict''' 57 return self.to_obj(**kwargs) 58 59 def plot(self, varname=None, idxname=None, order=None, line=True, size=5, 60 marker='o', maxlen=20): 61 ''' 62 This function visualize data with line or colormesh. 63 64 *Parameters* 65 66 - **varname** : string (default none) - Name of the variable to use. If None, 67 first lvarname is used. 68 - **line** : Boolean (default True) - Choice line or colormesh. 69 - **order** : list (defaut None) - order of the axes (x, y, hue or col) 70 - **size** : int (defaut 5) - plot size 71 - **marker** : Char (default 'o') - Symbol for each point. 72 - **maxlen** : Integer (default 20) - maximum length for string 73 74 *Returns* 75 76 - **None** ''' 77 if not self.consistent: 78 return None 79 if idxname: 80 idxname = [name for name in idxname if len( 81 self.nindex(name).codec) > 1] 82 #xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=[util.cast],## 83 xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=None, 84 dtype='str', npdtype='str', maxlen=maxlen, coord=True) 85 if not order: 86 order = [0, 1, 2] 87 88 if len(xar.dims) == 1: 89 xar.plot.line(x=xar.dims[0]+'_row', size=size, marker=marker) 90 elif len(xar.dims) == 2 and line: 91 xar.plot.line(x=xar.dims[order[0]] + '_row', 92 xticks=list(xar.coords[xar.dims[0]+'_row'].values), 93 hue=xar.dims[order[1]], size=size, marker=marker) 94 elif len(xar.dims) == 2 and not line: 95 xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row', 96 xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values), 97 yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values), 98 size=size) 99 elif len(xar.dims) == 3 and line: 100 xar.plot.line(x=xar.dims[order[0]] + '_row', col=xar.dims[order[1]], 101 xticks=list( 102 xar.coords[xar.dims[order[0]]+'_row'].values), 103 hue=xar.dims[order[2]], col_wrap=2, size=size, marker=marker) 104 elif len(xar.dims) == 3 and not line: 105 xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row', 106 xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values), 107 yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values), 108 col=xar.dims[order[2]], col_wrap=2, size=size) 109 plt.show() 110 return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) for i in range(len(xar.dims))} 111 112 def to_csv(self, filename, optcsv={'quoting': csv.QUOTE_NONNUMERIC}, **kwargs): 113 ''' 114 Generate csv file to display data. 115 116 *Parameters* 117 118 - **filename** : string - file name (with path) 119 - **optcsv** : parameter for csv.writer 120 121 *Parameters (kwargs)* 122 123 - **name=listcode** : element (default None) - eg location='ns' 124 - listcode : string with Code for each index (j: json, n: name, s: simple). 125 - name : name of the index 126 - **lenres** : Integer (default : 0) - Number of raws (all if 0) 127 - **header** : Boolean (default : True) - If True, first line with names 128 - **optcsv** : parameter for csv.writer 129 - **ifunc** : function (default None) - function to apply to indexes 130 - **other kwargs** : parameter for ifunc 131 132 *Returns* : size of csv file ''' 133 size = 0 134 if not optcsv: 135 optcsv = {} 136 tab = self._to_tab(**kwargs) 137 with open(filename, 'w', newline='', encoding="utf-8") as csvfile: 138 writer = csv.writer(csvfile, **optcsv) 139 for lign in tab: 140 size += writer.writerow(lign) 141 return size 142 143 def to_dataframe(self, info=False, idx=None, fillvalue='?', fillextern=True, 144 lisfunc=None, name=None, numeric=False, npdtype=None, **kwargs): 145 ''' 146 Complete the Object and generate a Pandas DataFrame with the dimension define by idx. 147 148 *Parameters* 149 150 - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray 151 - **idx** : list (default none) - list of idx to be completed. If [], 152 self.primary is used. 153 - **fillvalue** : object (default '?') - value used for the new extval 154 - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value 155 - **lisfunc** : function (default none) - list of function to apply to indexes before export 156 - **name** : string (default None) - DataArray name. If None, variable name 157 - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values. 158 - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None) 159 - **kwargs** : parameter for lisfunc 160 161 *Returns* : pandas.DataFrame ''' 162 if self.consistent: 163 return self.to_xarray(info=info, idx=idx, fillvalue=fillvalue, 164 fillextern=fillextern, lisfunc=lisfunc, name=name, 165 numeric=numeric, npdtype=npdtype, **kwargs 166 ).to_dataframe(name=name) 167 return None 168 169 def to_file(self, filename, **kwargs): 170 '''Generate file to display data. 171 172 *Parameters (kwargs)* 173 174 - **filename** : string - file name (with path) 175 - **kwargs** : see 'to_ntv' parameters 176 177 *Returns* : Integer - file lenght (bytes) ''' 178 option = {'format': 'cbor', 'modecodec': 'optimize'} | kwargs | { 179 'encoded': True} 180 data = self.to_ntv(modecodec=option['modecodec']).to_obj(**option) 181 if option['format'] == 'cbor': 182 size = len(data) 183 with open(filename, 'wb') as file: 184 file.write(data) 185 else: 186 size = len(bytes(data, 'UTF-8')) 187 with open(filename, 'w', newline='', encoding="utf-8") as file: 188 file.write(data) 189 return size 190 191 def to_ntv(self, modecodec='optimize', def_type='json', name=False): 192 '''Return a Ntv tab value (whithout name) . 193 194 *Parameters (kwargs)* 195 196 - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec 197 if 'default' each index has keys, if 'optimize' keys are optimized, 198 if 'dict' dict format is used, if 'nokeys' keys are absent 199 - **def_type** : string (default 'json') - default ntv_type for NtvList or NtvSet 200 - **name** : boolean (default False) - if False, default index name are not included 201 202 203 *Returns* : Ntv object''' 204 idxname = [name or iname != 'i' + str(i) 205 for i, iname in enumerate(self.lname)] 206 if modecodec != 'optimize': 207 lis = [index.to_ntv(modecodec=modecodec, name=iname) 208 for index, iname in zip(self.lindex, idxname)] 209 else: 210 lis = [] 211 anafields = self.anafields 212 for idx, iname, anafld in zip(self.lindex, idxname, anafields): 213 coef = Cutil.encode_coef(idx.keys) 214 parent = anafld.p_derived.view('index') 215 if anafld.category == 'unique': 216 lis.append(idx.to_ntv(name=iname)) 217 elif anafld.category == 'coupled': 218 idx_coup = idx.setkeys( 219 self.lindex[parent].keys, inplace=False) 220 lis.append(idx_coup.to_ntv(parent=parent, name=iname)) 221 elif coef: 222 lis.append(idx.to_ntv(keys=[coef], name=iname)) 223 elif parent == -1: # cat='variable' or 'secondary' 224 if idx.keys == list(range(len(self))): 225 lis.append(idx.to_ntv(modecodec='full', name=iname)) 226 else: 227 lis.append(idx.to_ntv(modecodec='default', name=iname)) 228 else: # derived 229 if len(self.lindex[parent].codec) == len(self): 230 lis.append(idx.to_ntv(modecodec='default', name=iname)) 231 else: # derived 232 keys = idx.derkeys(self.lindex[parent]) 233 lis.append(idx.to_ntv( 234 keys=keys, parent=parent, name=iname)) 235 return NtvList(lis, self.name) 236 237 def to_xarray(self, info=False, idxname=None, varname=None, fillvalue='?', 238 fillextern=True, lisfunc=None, name=None, numeric=False, 239 npdtype=None, attrs=None, coord=False, **kwargs): 240 ''' 241 Complete the Object and generate a Xarray DataArray with the dimension define by idx. 242 Only the first variable is incuded. 243 244 *Parameters* 245 246 - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray 247 - **idxname** : list (default none) - list of choosen primary fields. If None, 248 self.primary is used. 249 - **varname** : string (default none) - Name of the variable to use. If None, 250 first lvarname is used. 251 - **fillvalue** : object (default '?') - value used for the new extval 252 - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value 253 - **lisfunc** : function (default none) - list of function to apply to indexes before export 254 - **name** : string (default None) - DataArray name. If None, variable name 255 - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values. 256 - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None) 257 - **attrs** : dict (default None) - attributes for the DataArray 258 - **coord** : boolean (default False) - if True, add derivated coords 259 - **kwargs** : parameter for lisfunc 260 261 *Returns* : DataArray ''' 262 option = {'dtype': None} | kwargs 263 if not self.consistent: 264 raise DatasetError("Dataset not consistent") 265 if idxname is None or idxname == []: 266 idxname = self.primaryname 267 ilf = self.full(idxname=idxname, varname=varname, fillvalue=fillvalue, 268 fillextern=fillextern, inplace=False) 269 ilf.setcanonorder() 270 if not varname and len(ilf.lvarname) != 0: 271 varname = ilf.lvarname[0] 272 if not varname in ilf.lname: 273 ivar = -1 274 else: 275 ivar = ilf.lname.index(varname) 276 if isinstance(lisfunc, list) and len(lisfunc) == 1: 277 lisfunc = lisfunc * ilf.lenindex 278 elif isinstance(lisfunc, list) and len(lisfunc) != ilf.lenindex: 279 lisfunc = [None] * ilf.lenindex 280 elif not isinstance(lisfunc, list): 281 funcvar = lisfunc 282 lisfunc = [None] * ilf.lenindex 283 if ivar != -1: 284 lisfunc[ivar] = funcvar 285 lisfuncname = dict(zip(ilf.lname, lisfunc)) 286 coords = ilf._xcoord(idxname, ivar, lisfuncname, coord, **option) 287 dims = idxname 288 if numeric: 289 #lisfunc[ivar] = util.cast 290 fillvalue = math.nan 291 npdtype = 'float' 292 option['dtype'] = 'float' 293 if ivar == -1: 294 data = self.field(list(range(len(ilf)))).to_numpy(npdtype='int')\ 295 .reshape([len(ilf.nindex(name).codec) for name in idxname]) 296 else: 297 data = ilf.lindex[ivar]\ 298 .to_numpy(func=lisfunc[ivar], npdtype=npdtype, **option)\ 299 .reshape([len(ilf.nindex(name).codec) for name in idxname]) 300 if not name and ivar == -1: 301 name = ilf.name 302 elif not name: 303 name = ilf.lname[ivar] 304 if not isinstance(attrs, dict): 305 attrs = {} 306 for nam in ilf.lunicname: 307 attrs[nam] = ilf.nindex(nam).codec[0] 308 if info: 309 attrs |= ilf.indexinfos() 310 #print(data, coords, dims, attrs, name) 311 return xarray.DataArray(data, coords, dims, attrs=attrs, name=name) 312 313 def voxel(self, idxname=None, varname=None): 314 ''' 315 Plot not null values in a cube with voxels and return indexes values. 316 317 *Parameters* 318 319 - **idxname** : list (default none) - list of idx to be completed. If None, 320 self.primary is used. 321 - **varname** : string (default none) - Name of the variable to use. If None, 322 first lvarname is used. 323 324 *Returns* : **dict of indexes values** 325 ''' 326 if not self.consistent: 327 return None 328 if idxname is None or idxname == []: 329 idxname = self.primaryname 330 if varname is None and self.lvarname: 331 varname = self.lvarname[0] 332 if len(idxname) > 3: 333 raise DatasetError('number of idx > 3') 334 if len(idxname) == 2: 335 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 336 idxname += [' '] 337 elif len(idxname) == 1: 338 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 339 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 340 idxname += [' ', ' '] 341 xar = self.to_xarray(idxname=idxname, varname=varname, fillvalue='?', 342 fillextern=False, lisfunc=Cutil.is_not_equal, tovalue='?') 343 axe = plt.figure().add_subplot(projection='3d') 344 axe.voxels(xar, edgecolor='k') 345 axe.set_xticks(np.arange(self.idxlen[self.idxname.index(xar.dims[0])])) 346 axe.set_yticks(np.arange(self.idxlen[self.idxname.index(xar.dims[1])])) 347 axe.set_zticks(np.arange(self.idxlen[self.idxname.index(xar.dims[2])])) 348 axe.set(xlabel=xar.dims[0][:8], 349 ylabel=xar.dims[1][:8], 350 zlabel=xar.dims[2][:8]) 351 plt.show() 352 self.delindex([' ', ' ']) 353 return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) 354 for i in range(len(xar.dims))} 355 356 def view(self, **kwargs): 357 ''' 358 Generate tabular list to display data. 359 360 *Parameters (kwargs)* 361 362 - **name=listcode** : element (default None) - eg location='ns' 363 - listcode : string with Code for each index (j: json, n: name, s: simple). 364 - name : name of the index 365 - **defcode** : String (default : 'j') - default list code (if 'all' is True) 366 - **all** : Boolean (default : True) - 'defcode apply to all indexes or none 367 - **lenres** : Integer (default : 0) - Number of raws (all if 0) 368 - **header** : Boolean (default : True) - First line with names 369 - **width** : Integer (default None) - Number of characters displayed for each 370 attribute (all if None) 371 - **ifunc** : function (default None) - function to apply to indexes 372 - **tabulate params** : default 'tablefmt': 'simple', 'numalign': 'left', 373 'stralign': 'left', 'floatfmt': '.3f' - See tabulate module 374 - **other kwargs** : parameter for ifunc 375 376 *Returns* : list or html table (tabulate format) ''' 377 opttab = {'defcode': 'j', 'all': True, 'lenres': 0, 'header': True} 378 optview = {'tablefmt': 'simple', 'numalign': 'decimal', 379 'stralign': 'left', 'floatfmt': '.2f'} 380 option = opttab | optview | kwargs 381 tab = self._to_tab(**option) 382 width = ({'width': None} | kwargs)['width'] 383 if width: 384 #tab = [[(lambda x: x[:width] if isinstance(x, str) else x)(val) 385 tab = [[val[:width] if isinstance(val, str) else val 386 for val in lig] for lig in tab] 387 return tabulate(tab, headers='firstrow', **{k: option[k] for k in optview}) 388 389 def vlist(self, *args, func=None, index=-1, **kwargs): 390 ''' 391 Apply a function to an index and return the result. 392 393 *Parameters* 394 395 - **func** : function (default none) - function to apply to extval or extidx 396 - **args, kwargs** : parameters for the function 397 - **index** : integer - index to update (index=-1 for first variable) 398 399 *Returns* : list of func result''' 400 if index == -1 and self.lvar: 401 return self.lvar[0].vlist(func, *args, **kwargs) 402 if index == -1 and self.lenindex == 1: 403 index = 0 404 return self.lindex[index].vlist(func, *args, **kwargs) 405 406 # %%internal 407 408 def _to_tab(self, **kwargs): 409 ''' data preparation (dict of dict) for view or csv export. 410 Representation is included if : 411 - code is definie in the name element of the field 412 - or code is defined in 'defcode' element and 'all' element is True 413 414 *Parameters (kwargs)* 415 416 - **name=listcode** : element (default None) - eg location='ns' 417 - listcode : string with Code for each index (j: json, n: name, s: simple, f: ifunc). 418 - name : name of the index 419 - **defcode** : String (default : 'j') - default list code (if 'all' is True) 420 - **all** : Boolean (default : True) - 'defcode apply to all indexes or none 421 - **lenres** : Integer (default : 0) - Number of raws (all if 0) 422 - **ifunc** : function (default None) - function to apply to indexes 423 - **other kwargs** : parameter for ifunc''' 424 425 option = {'defcode': 'j', 'all': True, 'lenres': 0, 'ifunc': None, 426 'header': True} | kwargs 427 tab = [] 428 reslist = [] 429 diccode = {'j': '', 'n': 'name-', 's': 'smpl-', 'f': 'func-'} 430 if option['header']: 431 for name in self.lname: 432 opt = name if name in option else 'defcode' 433 if opt != 'defcode' or option['all']: 434 for char, code in diccode.items(): 435 if char in option[opt]: 436 reslist.append(code + name) 437 tab.append(reslist) 438 lenres = option['lenres'] 439 if lenres == 0: 440 lenres = len(self) 441 for i in range(min(lenres, len(self))): 442 reslist = [] 443 for name in self.lname: 444 opt = name if name in option else 'defcode' 445 if opt != 'defcode' or option['all']: 446 for char, code in diccode.items(): 447 if char in option[opt]: 448 val = self.nindex(name).values[i] 449 if char == 'j': 450 #reslist.append(util.cast(val, dtype='json')) 451 reslist.append(json.dumps( 452 self.field.s_to_e(val), cls=NtvJsonEncoder)) 453 elif char == 'n': 454 reslist.append(self.field.i_to_name(val)) 455 elif char == 's': 456 reslist.append(json.dumps( 457 self.field.s_to_e(val), cls=NtvJsonEncoder)) 458 elif char == 'f': 459 reslist.append(Cutil.funclist( 460 val, option['ifunc'], **kwargs)) 461 tab.append(reslist) 462 return tab 463 464 def _xcoord(self, axename, ivar, lisfuncname=None, coord=False, **kwargs): 465 ''' Coords generation for Xarray''' 466 #maxlen = kwargs.get('maxlen', 20) 467 #info = self.indexinfos() 468 dic_part = self.field_partition(axename) 469 coords = {} 470 ana = self.analysis 471 for i in range(self.lenindex): 472 #fieldi = info[i] 473 iname = self.lname[i] 474 # if fieldi['pparent'] == -1 or i == ivar: 475 if i in dic_part['variable'] or i in dic_part['unique'] or i == ivar: 476 continue 477 if isinstance(lisfuncname, dict) and len(lisfuncname) == self.lenindex: 478 funci = lisfuncname[iname] 479 else: 480 funci = None 481 if iname in axename: 482 coords[iname] = self.lindex[i].to_numpy( 483 func=funci, codec=True, **kwargs) 484 if coord: 485 coords[iname+'_row'] = (iname, 486 np.arange(len(coords[iname]))) 487 coords[iname+'_str'] = (iname, 488 self.lindex[i].to_numpy(func=str, codec=True)) 489 else: 490 #ascendants = self.analysis.fields[i].ascendants('derived', 'index') # !!!!!! 491 #p_prim = [ind for ind in ascendants if self.lname[ind] in axename][0] 492 #p_prim = self.analysis.fields[i].ascendants('derived', 'index')[-1] 493 #self.lindex[i].setkeys(self.lindex[p_prim].keys) # !!! 494 #coords[iname] = (self.lname[p_prim], 495 # self.lindex[i].to_numpy(func=funci, codec=True, **kwargs)) 496 f_prim = [self.nindex(name) for name in axename if 497 ana.get_relation(i, name).typecoupl in ['derived', 'coupled']][0] 498 self.lindex[i].setkeys(f_prim.keys) # !!! 499 coords[iname] = (f_prim.name, self.lindex[i].to_numpy( 500 func=funci, codec=True, **kwargs)) 501 return coords
26class DatasetInterface: 27 '''this class includes Dataset methods : 28 29 - `DatasetInterface.json` 30 - `DatasetInterface.plot` 31 - `DatasetInterface.to_ntv` 32 - `DatasetInterface.to_csv` 33 - `DatasetInterface.to_file` 34 - `DatasetInterface.to_xarray` 35 - `DatasetInterface.to_dataframe` 36 - `DatasetInterface.view` 37 - `DatasetInterface.vlist` 38 - `DatasetInterface.voxel` 39 ''' 40 41 def json(self, **kwargs): 42 ''' 43 Return json dict, json string or Cbor binary. 44 45 *Parameters (kwargs)* 46 47 - **encoded** : boolean (default False) - choice for return format 48 (string/bytes if True, dict else) 49 - **format** : string (default 'json')- choice for return format (json, cbor) 50 - **codif** : dict (default ES.codeb). Numerical value for string in CBOR encoder 51 - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec 52 if 'default' each index has keys, if 'optimize' keys are optimized, 53 if 'dict' dict format is used, if 'nokeys' keys are absent 54 - **name** : boolean (default False) - if False, default index name are not included 55 - **geojson** : boolean (default False) - geojson for LocationValue if True 56 57 *Returns* : string or dict''' 58 return self.to_obj(**kwargs) 59 60 def plot(self, varname=None, idxname=None, order=None, line=True, size=5, 61 marker='o', maxlen=20): 62 ''' 63 This function visualize data with line or colormesh. 64 65 *Parameters* 66 67 - **varname** : string (default none) - Name of the variable to use. If None, 68 first lvarname is used. 69 - **line** : Boolean (default True) - Choice line or colormesh. 70 - **order** : list (defaut None) - order of the axes (x, y, hue or col) 71 - **size** : int (defaut 5) - plot size 72 - **marker** : Char (default 'o') - Symbol for each point. 73 - **maxlen** : Integer (default 20) - maximum length for string 74 75 *Returns* 76 77 - **None** ''' 78 if not self.consistent: 79 return None 80 if idxname: 81 idxname = [name for name in idxname if len( 82 self.nindex(name).codec) > 1] 83 #xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=[util.cast],## 84 xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=None, 85 dtype='str', npdtype='str', maxlen=maxlen, coord=True) 86 if not order: 87 order = [0, 1, 2] 88 89 if len(xar.dims) == 1: 90 xar.plot.line(x=xar.dims[0]+'_row', size=size, marker=marker) 91 elif len(xar.dims) == 2 and line: 92 xar.plot.line(x=xar.dims[order[0]] + '_row', 93 xticks=list(xar.coords[xar.dims[0]+'_row'].values), 94 hue=xar.dims[order[1]], size=size, marker=marker) 95 elif len(xar.dims) == 2 and not line: 96 xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row', 97 xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values), 98 yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values), 99 size=size) 100 elif len(xar.dims) == 3 and line: 101 xar.plot.line(x=xar.dims[order[0]] + '_row', col=xar.dims[order[1]], 102 xticks=list( 103 xar.coords[xar.dims[order[0]]+'_row'].values), 104 hue=xar.dims[order[2]], col_wrap=2, size=size, marker=marker) 105 elif len(xar.dims) == 3 and not line: 106 xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row', 107 xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values), 108 yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values), 109 col=xar.dims[order[2]], col_wrap=2, size=size) 110 plt.show() 111 return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) for i in range(len(xar.dims))} 112 113 def to_csv(self, filename, optcsv={'quoting': csv.QUOTE_NONNUMERIC}, **kwargs): 114 ''' 115 Generate csv file to display data. 116 117 *Parameters* 118 119 - **filename** : string - file name (with path) 120 - **optcsv** : parameter for csv.writer 121 122 *Parameters (kwargs)* 123 124 - **name=listcode** : element (default None) - eg location='ns' 125 - listcode : string with Code for each index (j: json, n: name, s: simple). 126 - name : name of the index 127 - **lenres** : Integer (default : 0) - Number of raws (all if 0) 128 - **header** : Boolean (default : True) - If True, first line with names 129 - **optcsv** : parameter for csv.writer 130 - **ifunc** : function (default None) - function to apply to indexes 131 - **other kwargs** : parameter for ifunc 132 133 *Returns* : size of csv file ''' 134 size = 0 135 if not optcsv: 136 optcsv = {} 137 tab = self._to_tab(**kwargs) 138 with open(filename, 'w', newline='', encoding="utf-8") as csvfile: 139 writer = csv.writer(csvfile, **optcsv) 140 for lign in tab: 141 size += writer.writerow(lign) 142 return size 143 144 def to_dataframe(self, info=False, idx=None, fillvalue='?', fillextern=True, 145 lisfunc=None, name=None, numeric=False, npdtype=None, **kwargs): 146 ''' 147 Complete the Object and generate a Pandas DataFrame with the dimension define by idx. 148 149 *Parameters* 150 151 - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray 152 - **idx** : list (default none) - list of idx to be completed. If [], 153 self.primary is used. 154 - **fillvalue** : object (default '?') - value used for the new extval 155 - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value 156 - **lisfunc** : function (default none) - list of function to apply to indexes before export 157 - **name** : string (default None) - DataArray name. If None, variable name 158 - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values. 159 - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None) 160 - **kwargs** : parameter for lisfunc 161 162 *Returns* : pandas.DataFrame ''' 163 if self.consistent: 164 return self.to_xarray(info=info, idx=idx, fillvalue=fillvalue, 165 fillextern=fillextern, lisfunc=lisfunc, name=name, 166 numeric=numeric, npdtype=npdtype, **kwargs 167 ).to_dataframe(name=name) 168 return None 169 170 def to_file(self, filename, **kwargs): 171 '''Generate file to display data. 172 173 *Parameters (kwargs)* 174 175 - **filename** : string - file name (with path) 176 - **kwargs** : see 'to_ntv' parameters 177 178 *Returns* : Integer - file lenght (bytes) ''' 179 option = {'format': 'cbor', 'modecodec': 'optimize'} | kwargs | { 180 'encoded': True} 181 data = self.to_ntv(modecodec=option['modecodec']).to_obj(**option) 182 if option['format'] == 'cbor': 183 size = len(data) 184 with open(filename, 'wb') as file: 185 file.write(data) 186 else: 187 size = len(bytes(data, 'UTF-8')) 188 with open(filename, 'w', newline='', encoding="utf-8") as file: 189 file.write(data) 190 return size 191 192 def to_ntv(self, modecodec='optimize', def_type='json', name=False): 193 '''Return a Ntv tab value (whithout name) . 194 195 *Parameters (kwargs)* 196 197 - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec 198 if 'default' each index has keys, if 'optimize' keys are optimized, 199 if 'dict' dict format is used, if 'nokeys' keys are absent 200 - **def_type** : string (default 'json') - default ntv_type for NtvList or NtvSet 201 - **name** : boolean (default False) - if False, default index name are not included 202 203 204 *Returns* : Ntv object''' 205 idxname = [name or iname != 'i' + str(i) 206 for i, iname in enumerate(self.lname)] 207 if modecodec != 'optimize': 208 lis = [index.to_ntv(modecodec=modecodec, name=iname) 209 for index, iname in zip(self.lindex, idxname)] 210 else: 211 lis = [] 212 anafields = self.anafields 213 for idx, iname, anafld in zip(self.lindex, idxname, anafields): 214 coef = Cutil.encode_coef(idx.keys) 215 parent = anafld.p_derived.view('index') 216 if anafld.category == 'unique': 217 lis.append(idx.to_ntv(name=iname)) 218 elif anafld.category == 'coupled': 219 idx_coup = idx.setkeys( 220 self.lindex[parent].keys, inplace=False) 221 lis.append(idx_coup.to_ntv(parent=parent, name=iname)) 222 elif coef: 223 lis.append(idx.to_ntv(keys=[coef], name=iname)) 224 elif parent == -1: # cat='variable' or 'secondary' 225 if idx.keys == list(range(len(self))): 226 lis.append(idx.to_ntv(modecodec='full', name=iname)) 227 else: 228 lis.append(idx.to_ntv(modecodec='default', name=iname)) 229 else: # derived 230 if len(self.lindex[parent].codec) == len(self): 231 lis.append(idx.to_ntv(modecodec='default', name=iname)) 232 else: # derived 233 keys = idx.derkeys(self.lindex[parent]) 234 lis.append(idx.to_ntv( 235 keys=keys, parent=parent, name=iname)) 236 return NtvList(lis, self.name) 237 238 def to_xarray(self, info=False, idxname=None, varname=None, fillvalue='?', 239 fillextern=True, lisfunc=None, name=None, numeric=False, 240 npdtype=None, attrs=None, coord=False, **kwargs): 241 ''' 242 Complete the Object and generate a Xarray DataArray with the dimension define by idx. 243 Only the first variable is incuded. 244 245 *Parameters* 246 247 - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray 248 - **idxname** : list (default none) - list of choosen primary fields. If None, 249 self.primary is used. 250 - **varname** : string (default none) - Name of the variable to use. If None, 251 first lvarname is used. 252 - **fillvalue** : object (default '?') - value used for the new extval 253 - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value 254 - **lisfunc** : function (default none) - list of function to apply to indexes before export 255 - **name** : string (default None) - DataArray name. If None, variable name 256 - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values. 257 - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None) 258 - **attrs** : dict (default None) - attributes for the DataArray 259 - **coord** : boolean (default False) - if True, add derivated coords 260 - **kwargs** : parameter for lisfunc 261 262 *Returns* : DataArray ''' 263 option = {'dtype': None} | kwargs 264 if not self.consistent: 265 raise DatasetError("Dataset not consistent") 266 if idxname is None or idxname == []: 267 idxname = self.primaryname 268 ilf = self.full(idxname=idxname, varname=varname, fillvalue=fillvalue, 269 fillextern=fillextern, inplace=False) 270 ilf.setcanonorder() 271 if not varname and len(ilf.lvarname) != 0: 272 varname = ilf.lvarname[0] 273 if not varname in ilf.lname: 274 ivar = -1 275 else: 276 ivar = ilf.lname.index(varname) 277 if isinstance(lisfunc, list) and len(lisfunc) == 1: 278 lisfunc = lisfunc * ilf.lenindex 279 elif isinstance(lisfunc, list) and len(lisfunc) != ilf.lenindex: 280 lisfunc = [None] * ilf.lenindex 281 elif not isinstance(lisfunc, list): 282 funcvar = lisfunc 283 lisfunc = [None] * ilf.lenindex 284 if ivar != -1: 285 lisfunc[ivar] = funcvar 286 lisfuncname = dict(zip(ilf.lname, lisfunc)) 287 coords = ilf._xcoord(idxname, ivar, lisfuncname, coord, **option) 288 dims = idxname 289 if numeric: 290 #lisfunc[ivar] = util.cast 291 fillvalue = math.nan 292 npdtype = 'float' 293 option['dtype'] = 'float' 294 if ivar == -1: 295 data = self.field(list(range(len(ilf)))).to_numpy(npdtype='int')\ 296 .reshape([len(ilf.nindex(name).codec) for name in idxname]) 297 else: 298 data = ilf.lindex[ivar]\ 299 .to_numpy(func=lisfunc[ivar], npdtype=npdtype, **option)\ 300 .reshape([len(ilf.nindex(name).codec) for name in idxname]) 301 if not name and ivar == -1: 302 name = ilf.name 303 elif not name: 304 name = ilf.lname[ivar] 305 if not isinstance(attrs, dict): 306 attrs = {} 307 for nam in ilf.lunicname: 308 attrs[nam] = ilf.nindex(nam).codec[0] 309 if info: 310 attrs |= ilf.indexinfos() 311 #print(data, coords, dims, attrs, name) 312 return xarray.DataArray(data, coords, dims, attrs=attrs, name=name) 313 314 def voxel(self, idxname=None, varname=None): 315 ''' 316 Plot not null values in a cube with voxels and return indexes values. 317 318 *Parameters* 319 320 - **idxname** : list (default none) - list of idx to be completed. If None, 321 self.primary is used. 322 - **varname** : string (default none) - Name of the variable to use. If None, 323 first lvarname is used. 324 325 *Returns* : **dict of indexes values** 326 ''' 327 if not self.consistent: 328 return None 329 if idxname is None or idxname == []: 330 idxname = self.primaryname 331 if varname is None and self.lvarname: 332 varname = self.lvarname[0] 333 if len(idxname) > 3: 334 raise DatasetError('number of idx > 3') 335 if len(idxname) == 2: 336 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 337 idxname += [' '] 338 elif len(idxname) == 1: 339 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 340 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 341 idxname += [' ', ' '] 342 xar = self.to_xarray(idxname=idxname, varname=varname, fillvalue='?', 343 fillextern=False, lisfunc=Cutil.is_not_equal, tovalue='?') 344 axe = plt.figure().add_subplot(projection='3d') 345 axe.voxels(xar, edgecolor='k') 346 axe.set_xticks(np.arange(self.idxlen[self.idxname.index(xar.dims[0])])) 347 axe.set_yticks(np.arange(self.idxlen[self.idxname.index(xar.dims[1])])) 348 axe.set_zticks(np.arange(self.idxlen[self.idxname.index(xar.dims[2])])) 349 axe.set(xlabel=xar.dims[0][:8], 350 ylabel=xar.dims[1][:8], 351 zlabel=xar.dims[2][:8]) 352 plt.show() 353 self.delindex([' ', ' ']) 354 return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) 355 for i in range(len(xar.dims))} 356 357 def view(self, **kwargs): 358 ''' 359 Generate tabular list to display data. 360 361 *Parameters (kwargs)* 362 363 - **name=listcode** : element (default None) - eg location='ns' 364 - listcode : string with Code for each index (j: json, n: name, s: simple). 365 - name : name of the index 366 - **defcode** : String (default : 'j') - default list code (if 'all' is True) 367 - **all** : Boolean (default : True) - 'defcode apply to all indexes or none 368 - **lenres** : Integer (default : 0) - Number of raws (all if 0) 369 - **header** : Boolean (default : True) - First line with names 370 - **width** : Integer (default None) - Number of characters displayed for each 371 attribute (all if None) 372 - **ifunc** : function (default None) - function to apply to indexes 373 - **tabulate params** : default 'tablefmt': 'simple', 'numalign': 'left', 374 'stralign': 'left', 'floatfmt': '.3f' - See tabulate module 375 - **other kwargs** : parameter for ifunc 376 377 *Returns* : list or html table (tabulate format) ''' 378 opttab = {'defcode': 'j', 'all': True, 'lenres': 0, 'header': True} 379 optview = {'tablefmt': 'simple', 'numalign': 'decimal', 380 'stralign': 'left', 'floatfmt': '.2f'} 381 option = opttab | optview | kwargs 382 tab = self._to_tab(**option) 383 width = ({'width': None} | kwargs)['width'] 384 if width: 385 #tab = [[(lambda x: x[:width] if isinstance(x, str) else x)(val) 386 tab = [[val[:width] if isinstance(val, str) else val 387 for val in lig] for lig in tab] 388 return tabulate(tab, headers='firstrow', **{k: option[k] for k in optview}) 389 390 def vlist(self, *args, func=None, index=-1, **kwargs): 391 ''' 392 Apply a function to an index and return the result. 393 394 *Parameters* 395 396 - **func** : function (default none) - function to apply to extval or extidx 397 - **args, kwargs** : parameters for the function 398 - **index** : integer - index to update (index=-1 for first variable) 399 400 *Returns* : list of func result''' 401 if index == -1 and self.lvar: 402 return self.lvar[0].vlist(func, *args, **kwargs) 403 if index == -1 and self.lenindex == 1: 404 index = 0 405 return self.lindex[index].vlist(func, *args, **kwargs) 406 407 # %%internal 408 409 def _to_tab(self, **kwargs): 410 ''' data preparation (dict of dict) for view or csv export. 411 Representation is included if : 412 - code is definie in the name element of the field 413 - or code is defined in 'defcode' element and 'all' element is True 414 415 *Parameters (kwargs)* 416 417 - **name=listcode** : element (default None) - eg location='ns' 418 - listcode : string with Code for each index (j: json, n: name, s: simple, f: ifunc). 419 - name : name of the index 420 - **defcode** : String (default : 'j') - default list code (if 'all' is True) 421 - **all** : Boolean (default : True) - 'defcode apply to all indexes or none 422 - **lenres** : Integer (default : 0) - Number of raws (all if 0) 423 - **ifunc** : function (default None) - function to apply to indexes 424 - **other kwargs** : parameter for ifunc''' 425 426 option = {'defcode': 'j', 'all': True, 'lenres': 0, 'ifunc': None, 427 'header': True} | kwargs 428 tab = [] 429 reslist = [] 430 diccode = {'j': '', 'n': 'name-', 's': 'smpl-', 'f': 'func-'} 431 if option['header']: 432 for name in self.lname: 433 opt = name if name in option else 'defcode' 434 if opt != 'defcode' or option['all']: 435 for char, code in diccode.items(): 436 if char in option[opt]: 437 reslist.append(code + name) 438 tab.append(reslist) 439 lenres = option['lenres'] 440 if lenres == 0: 441 lenres = len(self) 442 for i in range(min(lenres, len(self))): 443 reslist = [] 444 for name in self.lname: 445 opt = name if name in option else 'defcode' 446 if opt != 'defcode' or option['all']: 447 for char, code in diccode.items(): 448 if char in option[opt]: 449 val = self.nindex(name).values[i] 450 if char == 'j': 451 #reslist.append(util.cast(val, dtype='json')) 452 reslist.append(json.dumps( 453 self.field.s_to_e(val), cls=NtvJsonEncoder)) 454 elif char == 'n': 455 reslist.append(self.field.i_to_name(val)) 456 elif char == 's': 457 reslist.append(json.dumps( 458 self.field.s_to_e(val), cls=NtvJsonEncoder)) 459 elif char == 'f': 460 reslist.append(Cutil.funclist( 461 val, option['ifunc'], **kwargs)) 462 tab.append(reslist) 463 return tab 464 465 def _xcoord(self, axename, ivar, lisfuncname=None, coord=False, **kwargs): 466 ''' Coords generation for Xarray''' 467 #maxlen = kwargs.get('maxlen', 20) 468 #info = self.indexinfos() 469 dic_part = self.field_partition(axename) 470 coords = {} 471 ana = self.analysis 472 for i in range(self.lenindex): 473 #fieldi = info[i] 474 iname = self.lname[i] 475 # if fieldi['pparent'] == -1 or i == ivar: 476 if i in dic_part['variable'] or i in dic_part['unique'] or i == ivar: 477 continue 478 if isinstance(lisfuncname, dict) and len(lisfuncname) == self.lenindex: 479 funci = lisfuncname[iname] 480 else: 481 funci = None 482 if iname in axename: 483 coords[iname] = self.lindex[i].to_numpy( 484 func=funci, codec=True, **kwargs) 485 if coord: 486 coords[iname+'_row'] = (iname, 487 np.arange(len(coords[iname]))) 488 coords[iname+'_str'] = (iname, 489 self.lindex[i].to_numpy(func=str, codec=True)) 490 else: 491 #ascendants = self.analysis.fields[i].ascendants('derived', 'index') # !!!!!! 492 #p_prim = [ind for ind in ascendants if self.lname[ind] in axename][0] 493 #p_prim = self.analysis.fields[i].ascendants('derived', 'index')[-1] 494 #self.lindex[i].setkeys(self.lindex[p_prim].keys) # !!! 495 #coords[iname] = (self.lname[p_prim], 496 # self.lindex[i].to_numpy(func=funci, codec=True, **kwargs)) 497 f_prim = [self.nindex(name) for name in axename if 498 ana.get_relation(i, name).typecoupl in ['derived', 'coupled']][0] 499 self.lindex[i].setkeys(f_prim.keys) # !!! 500 coords[iname] = (f_prim.name, self.lindex[i].to_numpy( 501 func=funci, codec=True, **kwargs)) 502 return coords
this class includes Dataset methods :
41 def json(self, **kwargs): 42 ''' 43 Return json dict, json string or Cbor binary. 44 45 *Parameters (kwargs)* 46 47 - **encoded** : boolean (default False) - choice for return format 48 (string/bytes if True, dict else) 49 - **format** : string (default 'json')- choice for return format (json, cbor) 50 - **codif** : dict (default ES.codeb). Numerical value for string in CBOR encoder 51 - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec 52 if 'default' each index has keys, if 'optimize' keys are optimized, 53 if 'dict' dict format is used, if 'nokeys' keys are absent 54 - **name** : boolean (default False) - if False, default index name are not included 55 - **geojson** : boolean (default False) - geojson for LocationValue if True 56 57 *Returns* : string or dict''' 58 return self.to_obj(**kwargs)
Return json dict, json string or Cbor binary.
Parameters (kwargs)
- encoded : boolean (default False) - choice for return format (string/bytes if True, dict else)
- format : string (default 'json')- choice for return format (json, cbor)
- codif : dict (default ES.codeb). Numerical value for string in CBOR encoder
- modecodec : string (default 'optimize') - if 'full', each index is with a full codec if 'default' each index has keys, if 'optimize' keys are optimized, if 'dict' dict format is used, if 'nokeys' keys are absent
- name : boolean (default False) - if False, default index name are not included
- geojson : boolean (default False) - geojson for LocationValue if True
Returns : string or dict
60 def plot(self, varname=None, idxname=None, order=None, line=True, size=5, 61 marker='o', maxlen=20): 62 ''' 63 This function visualize data with line or colormesh. 64 65 *Parameters* 66 67 - **varname** : string (default none) - Name of the variable to use. If None, 68 first lvarname is used. 69 - **line** : Boolean (default True) - Choice line or colormesh. 70 - **order** : list (defaut None) - order of the axes (x, y, hue or col) 71 - **size** : int (defaut 5) - plot size 72 - **marker** : Char (default 'o') - Symbol for each point. 73 - **maxlen** : Integer (default 20) - maximum length for string 74 75 *Returns* 76 77 - **None** ''' 78 if not self.consistent: 79 return None 80 if idxname: 81 idxname = [name for name in idxname if len( 82 self.nindex(name).codec) > 1] 83 #xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=[util.cast],## 84 xar = self.to_xarray(numeric=True, varname=varname, idxname=idxname, lisfunc=None, 85 dtype='str', npdtype='str', maxlen=maxlen, coord=True) 86 if not order: 87 order = [0, 1, 2] 88 89 if len(xar.dims) == 1: 90 xar.plot.line(x=xar.dims[0]+'_row', size=size, marker=marker) 91 elif len(xar.dims) == 2 and line: 92 xar.plot.line(x=xar.dims[order[0]] + '_row', 93 xticks=list(xar.coords[xar.dims[0]+'_row'].values), 94 hue=xar.dims[order[1]], size=size, marker=marker) 95 elif len(xar.dims) == 2 and not line: 96 xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row', 97 xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values), 98 yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values), 99 size=size) 100 elif len(xar.dims) == 3 and line: 101 xar.plot.line(x=xar.dims[order[0]] + '_row', col=xar.dims[order[1]], 102 xticks=list( 103 xar.coords[xar.dims[order[0]]+'_row'].values), 104 hue=xar.dims[order[2]], col_wrap=2, size=size, marker=marker) 105 elif len(xar.dims) == 3 and not line: 106 xar.plot(x=xar.dims[order[0]]+'_row', y=xar.dims[order[1]]+'_row', 107 xticks=list(xar.coords[xar.dims[order[0]]+'_row'].values), 108 yticks=list(xar.coords[xar.dims[order[1]]+'_row'].values), 109 col=xar.dims[order[2]], col_wrap=2, size=size) 110 plt.show() 111 return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) for i in range(len(xar.dims))}
This function visualize data with line or colormesh.
Parameters
- varname : string (default none) - Name of the variable to use. If None, first lvarname is used.
- line : Boolean (default True) - Choice line or colormesh.
- order : list (defaut None) - order of the axes (x, y, hue or col)
- size : int (defaut 5) - plot size
- marker : Char (default 'o') - Symbol for each point.
- maxlen : Integer (default 20) - maximum length for string
Returns
- None
113 def to_csv(self, filename, optcsv={'quoting': csv.QUOTE_NONNUMERIC}, **kwargs): 114 ''' 115 Generate csv file to display data. 116 117 *Parameters* 118 119 - **filename** : string - file name (with path) 120 - **optcsv** : parameter for csv.writer 121 122 *Parameters (kwargs)* 123 124 - **name=listcode** : element (default None) - eg location='ns' 125 - listcode : string with Code for each index (j: json, n: name, s: simple). 126 - name : name of the index 127 - **lenres** : Integer (default : 0) - Number of raws (all if 0) 128 - **header** : Boolean (default : True) - If True, first line with names 129 - **optcsv** : parameter for csv.writer 130 - **ifunc** : function (default None) - function to apply to indexes 131 - **other kwargs** : parameter for ifunc 132 133 *Returns* : size of csv file ''' 134 size = 0 135 if not optcsv: 136 optcsv = {} 137 tab = self._to_tab(**kwargs) 138 with open(filename, 'w', newline='', encoding="utf-8") as csvfile: 139 writer = csv.writer(csvfile, **optcsv) 140 for lign in tab: 141 size += writer.writerow(lign) 142 return size
Generate csv file to display data.
Parameters
- filename : string - file name (with path)
- optcsv : parameter for csv.writer
Parameters (kwargs)
- name=listcode : element (default None) - eg location='ns'
- listcode : string with Code for each index (j: json, n: name, s: simple).
- name : name of the index
- lenres : Integer (default : 0) - Number of raws (all if 0)
- header : Boolean (default : True) - If True, first line with names
- optcsv : parameter for csv.writer
- ifunc : function (default None) - function to apply to indexes
- other kwargs : parameter for ifunc
Returns : size of csv file
144 def to_dataframe(self, info=False, idx=None, fillvalue='?', fillextern=True, 145 lisfunc=None, name=None, numeric=False, npdtype=None, **kwargs): 146 ''' 147 Complete the Object and generate a Pandas DataFrame with the dimension define by idx. 148 149 *Parameters* 150 151 - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray 152 - **idx** : list (default none) - list of idx to be completed. If [], 153 self.primary is used. 154 - **fillvalue** : object (default '?') - value used for the new extval 155 - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value 156 - **lisfunc** : function (default none) - list of function to apply to indexes before export 157 - **name** : string (default None) - DataArray name. If None, variable name 158 - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values. 159 - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None) 160 - **kwargs** : parameter for lisfunc 161 162 *Returns* : pandas.DataFrame ''' 163 if self.consistent: 164 return self.to_xarray(info=info, idx=idx, fillvalue=fillvalue, 165 fillextern=fillextern, lisfunc=lisfunc, name=name, 166 numeric=numeric, npdtype=npdtype, **kwargs 167 ).to_dataframe(name=name) 168 return None
Complete the Object and generate a Pandas DataFrame with the dimension define by idx.
Parameters
- info : boolean (default False) - if True, add _dict attributes to attrs Xarray
- idx : list (default none) - list of idx to be completed. If [], self.primary is used.
- fillvalue : object (default '?') - value used for the new extval
- fillextern : boolean(default True) - if True, fillvalue is converted to internal value
- lisfunc : function (default none) - list of function to apply to indexes before export
- name : string (default None) - DataArray name. If None, variable name
- numeric : Boolean (default False) - Generate a numeric DataArray.Values.
- npdtype : string (default None) - numpy dtype for the DataArray ('object' if None)
- kwargs : parameter for lisfunc
Returns : pandas.DataFrame
170 def to_file(self, filename, **kwargs): 171 '''Generate file to display data. 172 173 *Parameters (kwargs)* 174 175 - **filename** : string - file name (with path) 176 - **kwargs** : see 'to_ntv' parameters 177 178 *Returns* : Integer - file lenght (bytes) ''' 179 option = {'format': 'cbor', 'modecodec': 'optimize'} | kwargs | { 180 'encoded': True} 181 data = self.to_ntv(modecodec=option['modecodec']).to_obj(**option) 182 if option['format'] == 'cbor': 183 size = len(data) 184 with open(filename, 'wb') as file: 185 file.write(data) 186 else: 187 size = len(bytes(data, 'UTF-8')) 188 with open(filename, 'w', newline='', encoding="utf-8") as file: 189 file.write(data) 190 return size
Generate file to display data.
Parameters (kwargs)
- filename : string - file name (with path)
- kwargs : see 'to_ntv' parameters
Returns : Integer - file lenght (bytes)
192 def to_ntv(self, modecodec='optimize', def_type='json', name=False): 193 '''Return a Ntv tab value (whithout name) . 194 195 *Parameters (kwargs)* 196 197 - **modecodec** : string (default 'optimize') - if 'full', each index is with a full codec 198 if 'default' each index has keys, if 'optimize' keys are optimized, 199 if 'dict' dict format is used, if 'nokeys' keys are absent 200 - **def_type** : string (default 'json') - default ntv_type for NtvList or NtvSet 201 - **name** : boolean (default False) - if False, default index name are not included 202 203 204 *Returns* : Ntv object''' 205 idxname = [name or iname != 'i' + str(i) 206 for i, iname in enumerate(self.lname)] 207 if modecodec != 'optimize': 208 lis = [index.to_ntv(modecodec=modecodec, name=iname) 209 for index, iname in zip(self.lindex, idxname)] 210 else: 211 lis = [] 212 anafields = self.anafields 213 for idx, iname, anafld in zip(self.lindex, idxname, anafields): 214 coef = Cutil.encode_coef(idx.keys) 215 parent = anafld.p_derived.view('index') 216 if anafld.category == 'unique': 217 lis.append(idx.to_ntv(name=iname)) 218 elif anafld.category == 'coupled': 219 idx_coup = idx.setkeys( 220 self.lindex[parent].keys, inplace=False) 221 lis.append(idx_coup.to_ntv(parent=parent, name=iname)) 222 elif coef: 223 lis.append(idx.to_ntv(keys=[coef], name=iname)) 224 elif parent == -1: # cat='variable' or 'secondary' 225 if idx.keys == list(range(len(self))): 226 lis.append(idx.to_ntv(modecodec='full', name=iname)) 227 else: 228 lis.append(idx.to_ntv(modecodec='default', name=iname)) 229 else: # derived 230 if len(self.lindex[parent].codec) == len(self): 231 lis.append(idx.to_ntv(modecodec='default', name=iname)) 232 else: # derived 233 keys = idx.derkeys(self.lindex[parent]) 234 lis.append(idx.to_ntv( 235 keys=keys, parent=parent, name=iname)) 236 return NtvList(lis, self.name)
Return a Ntv tab value (whithout name) .
Parameters (kwargs)
- modecodec : string (default 'optimize') - if 'full', each index is with a full codec if 'default' each index has keys, if 'optimize' keys are optimized, if 'dict' dict format is used, if 'nokeys' keys are absent
- def_type : string (default 'json') - default ntv_type for NtvList or NtvSet
- name : boolean (default False) - if False, default index name are not included
Returns : Ntv object
238 def to_xarray(self, info=False, idxname=None, varname=None, fillvalue='?', 239 fillextern=True, lisfunc=None, name=None, numeric=False, 240 npdtype=None, attrs=None, coord=False, **kwargs): 241 ''' 242 Complete the Object and generate a Xarray DataArray with the dimension define by idx. 243 Only the first variable is incuded. 244 245 *Parameters* 246 247 - **info** : boolean (default False) - if True, add _dict attributes to attrs Xarray 248 - **idxname** : list (default none) - list of choosen primary fields. If None, 249 self.primary is used. 250 - **varname** : string (default none) - Name of the variable to use. If None, 251 first lvarname is used. 252 - **fillvalue** : object (default '?') - value used for the new extval 253 - **fillextern** : boolean(default True) - if True, fillvalue is converted to internal value 254 - **lisfunc** : function (default none) - list of function to apply to indexes before export 255 - **name** : string (default None) - DataArray name. If None, variable name 256 - **numeric** : Boolean (default False) - Generate a numeric DataArray.Values. 257 - **npdtype** : string (default None) - numpy dtype for the DataArray ('object' if None) 258 - **attrs** : dict (default None) - attributes for the DataArray 259 - **coord** : boolean (default False) - if True, add derivated coords 260 - **kwargs** : parameter for lisfunc 261 262 *Returns* : DataArray ''' 263 option = {'dtype': None} | kwargs 264 if not self.consistent: 265 raise DatasetError("Dataset not consistent") 266 if idxname is None or idxname == []: 267 idxname = self.primaryname 268 ilf = self.full(idxname=idxname, varname=varname, fillvalue=fillvalue, 269 fillextern=fillextern, inplace=False) 270 ilf.setcanonorder() 271 if not varname and len(ilf.lvarname) != 0: 272 varname = ilf.lvarname[0] 273 if not varname in ilf.lname: 274 ivar = -1 275 else: 276 ivar = ilf.lname.index(varname) 277 if isinstance(lisfunc, list) and len(lisfunc) == 1: 278 lisfunc = lisfunc * ilf.lenindex 279 elif isinstance(lisfunc, list) and len(lisfunc) != ilf.lenindex: 280 lisfunc = [None] * ilf.lenindex 281 elif not isinstance(lisfunc, list): 282 funcvar = lisfunc 283 lisfunc = [None] * ilf.lenindex 284 if ivar != -1: 285 lisfunc[ivar] = funcvar 286 lisfuncname = dict(zip(ilf.lname, lisfunc)) 287 coords = ilf._xcoord(idxname, ivar, lisfuncname, coord, **option) 288 dims = idxname 289 if numeric: 290 #lisfunc[ivar] = util.cast 291 fillvalue = math.nan 292 npdtype = 'float' 293 option['dtype'] = 'float' 294 if ivar == -1: 295 data = self.field(list(range(len(ilf)))).to_numpy(npdtype='int')\ 296 .reshape([len(ilf.nindex(name).codec) for name in idxname]) 297 else: 298 data = ilf.lindex[ivar]\ 299 .to_numpy(func=lisfunc[ivar], npdtype=npdtype, **option)\ 300 .reshape([len(ilf.nindex(name).codec) for name in idxname]) 301 if not name and ivar == -1: 302 name = ilf.name 303 elif not name: 304 name = ilf.lname[ivar] 305 if not isinstance(attrs, dict): 306 attrs = {} 307 for nam in ilf.lunicname: 308 attrs[nam] = ilf.nindex(nam).codec[0] 309 if info: 310 attrs |= ilf.indexinfos() 311 #print(data, coords, dims, attrs, name) 312 return xarray.DataArray(data, coords, dims, attrs=attrs, name=name)
Complete the Object and generate a Xarray DataArray with the dimension define by idx. Only the first variable is incuded.
Parameters
- info : boolean (default False) - if True, add _dict attributes to attrs Xarray
- idxname : list (default none) - list of choosen primary fields. If None, self.primary is used.
- varname : string (default none) - Name of the variable to use. If None, first lvarname is used.
- fillvalue : object (default '?') - value used for the new extval
- fillextern : boolean(default True) - if True, fillvalue is converted to internal value
- lisfunc : function (default none) - list of function to apply to indexes before export
- name : string (default None) - DataArray name. If None, variable name
- numeric : Boolean (default False) - Generate a numeric DataArray.Values.
- npdtype : string (default None) - numpy dtype for the DataArray ('object' if None)
- attrs : dict (default None) - attributes for the DataArray
- coord : boolean (default False) - if True, add derivated coords
- kwargs : parameter for lisfunc
Returns : DataArray
314 def voxel(self, idxname=None, varname=None): 315 ''' 316 Plot not null values in a cube with voxels and return indexes values. 317 318 *Parameters* 319 320 - **idxname** : list (default none) - list of idx to be completed. If None, 321 self.primary is used. 322 - **varname** : string (default none) - Name of the variable to use. If None, 323 first lvarname is used. 324 325 *Returns* : **dict of indexes values** 326 ''' 327 if not self.consistent: 328 return None 329 if idxname is None or idxname == []: 330 idxname = self.primaryname 331 if varname is None and self.lvarname: 332 varname = self.lvarname[0] 333 if len(idxname) > 3: 334 raise DatasetError('number of idx > 3') 335 if len(idxname) == 2: 336 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 337 idxname += [' '] 338 elif len(idxname) == 1: 339 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 340 self.addindex(self.field('null', ' ', keys=[0]*len(self))) 341 idxname += [' ', ' '] 342 xar = self.to_xarray(idxname=idxname, varname=varname, fillvalue='?', 343 fillextern=False, lisfunc=Cutil.is_not_equal, tovalue='?') 344 axe = plt.figure().add_subplot(projection='3d') 345 axe.voxels(xar, edgecolor='k') 346 axe.set_xticks(np.arange(self.idxlen[self.idxname.index(xar.dims[0])])) 347 axe.set_yticks(np.arange(self.idxlen[self.idxname.index(xar.dims[1])])) 348 axe.set_zticks(np.arange(self.idxlen[self.idxname.index(xar.dims[2])])) 349 axe.set(xlabel=xar.dims[0][:8], 350 ylabel=xar.dims[1][:8], 351 zlabel=xar.dims[2][:8]) 352 plt.show() 353 self.delindex([' ', ' ']) 354 return {xar.dims[i]: list(xar.coords[xar.dims[i]].values) 355 for i in range(len(xar.dims))}
Plot not null values in a cube with voxels and return indexes values.
Parameters
- idxname : list (default none) - list of idx to be completed. If None, self.primary is used.
- varname : string (default none) - Name of the variable to use. If None, first lvarname is used.
Returns : dict of indexes values
357 def view(self, **kwargs): 358 ''' 359 Generate tabular list to display data. 360 361 *Parameters (kwargs)* 362 363 - **name=listcode** : element (default None) - eg location='ns' 364 - listcode : string with Code for each index (j: json, n: name, s: simple). 365 - name : name of the index 366 - **defcode** : String (default : 'j') - default list code (if 'all' is True) 367 - **all** : Boolean (default : True) - 'defcode apply to all indexes or none 368 - **lenres** : Integer (default : 0) - Number of raws (all if 0) 369 - **header** : Boolean (default : True) - First line with names 370 - **width** : Integer (default None) - Number of characters displayed for each 371 attribute (all if None) 372 - **ifunc** : function (default None) - function to apply to indexes 373 - **tabulate params** : default 'tablefmt': 'simple', 'numalign': 'left', 374 'stralign': 'left', 'floatfmt': '.3f' - See tabulate module 375 - **other kwargs** : parameter for ifunc 376 377 *Returns* : list or html table (tabulate format) ''' 378 opttab = {'defcode': 'j', 'all': True, 'lenres': 0, 'header': True} 379 optview = {'tablefmt': 'simple', 'numalign': 'decimal', 380 'stralign': 'left', 'floatfmt': '.2f'} 381 option = opttab | optview | kwargs 382 tab = self._to_tab(**option) 383 width = ({'width': None} | kwargs)['width'] 384 if width: 385 #tab = [[(lambda x: x[:width] if isinstance(x, str) else x)(val) 386 tab = [[val[:width] if isinstance(val, str) else val 387 for val in lig] for lig in tab] 388 return tabulate(tab, headers='firstrow', **{k: option[k] for k in optview})
Generate tabular list to display data.
Parameters (kwargs)
- name=listcode : element (default None) - eg location='ns'
- listcode : string with Code for each index (j: json, n: name, s: simple).
- name : name of the index
- defcode : String (default : 'j') - default list code (if 'all' is True)
- all : Boolean (default : True) - 'defcode apply to all indexes or none
- lenres : Integer (default : 0) - Number of raws (all if 0)
- header : Boolean (default : True) - First line with names
- width : Integer (default None) - Number of characters displayed for each attribute (all if None)
- ifunc : function (default None) - function to apply to indexes
- tabulate params : default 'tablefmt': 'simple', 'numalign': 'left', 'stralign': 'left', 'floatfmt': '.3f' - See tabulate module
- other kwargs : parameter for ifunc
Returns : list or html table (tabulate format)
390 def vlist(self, *args, func=None, index=-1, **kwargs): 391 ''' 392 Apply a function to an index and return the result. 393 394 *Parameters* 395 396 - **func** : function (default none) - function to apply to extval or extidx 397 - **args, kwargs** : parameters for the function 398 - **index** : integer - index to update (index=-1 for first variable) 399 400 *Returns* : list of func result''' 401 if index == -1 and self.lvar: 402 return self.lvar[0].vlist(func, *args, **kwargs) 403 if index == -1 and self.lenindex == 1: 404 index = 0 405 return self.lindex[index].vlist(func, *args, **kwargs)
Apply a function to an index and return the result.
Parameters
- func : function (default none) - function to apply to extval or extidx
- args, kwargs : parameters for the function
- index : integer - index to update (index=-1 for first variable)
Returns : list of func result