ntv-pandas.ntv_pandas.pandas_ntv_connector
Created on Feb 27 2023
@author: Philippe@loco-labs.io
The pandas_ntv_connector
module is part of the ntv-pandas.ntv_pandas
package
(specification document).
A NtvConnector is defined by:
- clas_obj: str - define the class name of the object to convert
- clas_typ: str - define the NTVtype of the converted object
- to_obj_ntv: method - converter from JsonNTV to the object
- to_json_ntv: method - converter from the object to JsonNTV
It contains :
- functions
read_json
andto_json
to convert JSON data and pandas entities - function
to_analysis
to create data used by thetab_analysis
module - function
check_relation
to identify rows with inconsistent relationships functions
as_def_type
andequals
the child classes of
NTV.json_ntv.ntv.NtvConnector
abstract class:DataFrameConnec
: 'tab' connectorSeriesConnec
: 'field' connector
an utility class with static methods :
PdUtil
The functions to_json
, to_analysis
, check_relation
, as_def_type
and
equals
are used with the npd
accessor.
1# -*- coding: utf-8 -*- 2""" 3Created on Feb 27 2023 4 5@author: Philippe@loco-labs.io 6 7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package 8([specification document]( 9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)). 10 11A NtvConnector is defined by: 12- clas_obj: str - define the class name of the object to convert 13- clas_typ: str - define the NTVtype of the converted object 14- to_obj_ntv: method - converter from JsonNTV to the object 15- to_json_ntv: method - converter from the object to JsonNTV 16 17It contains : 18 19- functions `read_json` and `to_json` to convert JSON data and pandas entities 20- function `to_analysis` to create data used by the `tab_analysis` module 21- function `check_relation` to identify rows with inconsistent relationships 22- functions `as_def_type` and `equals` 23 24- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class: 25 - `DataFrameConnec`: 'tab' connector 26 - `SeriesConnec`: 'field' connector 27 28- an utility class with static methods : `PdUtil` 29 30The functions `to_json`, `to_analysis`, `check_relation`, `as_def_type` and 31`equals` are used with the `npd` accessor. 32 33""" 34import os 35import datetime 36import json 37import configparser 38from pathlib import Path 39from collections import Counter 40from io import StringIO 41import pandas as pd 42import numpy as np 43 44 45from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle 46from json_ntv.ntv_util import NtvUtil 47from json_ntv.ntv_connector import ShapelyConnec 48from tab_dataset.cfield import Cfield 49from ntv_numpy import Xdataset 50 51path_ntv_pandas = Path(os.path.abspath(__file__)).parent 52 53 54def as_def_type(pd_array): 55 '''convert a Series or DataFrame with default dtype''' 56 if isinstance(pd_array, (pd.Series, pd.Index)): 57 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 58 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns}) 59 60 61def check_relation(pd_df, parent, child, typecoupl, value=True): 62 ''' Accessor for method `cdataset.Cdataset.check_relation` invoket as 63 `pd.DataFrame.npd.check_relation`. 64 Get the inconsistent records for a relationship. 65 66 *Parameters* 67 68 - **child** : str - name of the child Series involved in the relation 69 - **parent**: str - name of the parent Series involved in the relation 70 - **typecoupl**: str - relationship to check ('derived' or 'coupled') 71 - **value**: boolean (default True) - if True return a dict with inconsistent 72 values of the Series, else a tuple with index of records) 73 74 *Returns* : 75 76 - dict with inconsistent values of the Series 77 - or a tuple with row of records''' 78 parent_idx = SeriesConnec.to_idx(pd_df[parent]) 79 parent_field = Cfield(parent_idx['codec'], parent, parent_idx['keys']) 80 child_idx = SeriesConnec.to_idx(pd_df[child]) 81 child_field = Cfield(child_idx['codec'], child, child_idx['keys']) 82 return Cfield.check_relation(parent_field, child_field, typecoupl, value) 83 84 85def equals(pdself, pdother): 86 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 87 if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series): 88 return SeriesConnec.equals(pdself, pdother) 89 if isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame): 90 return DataFrameConnec.equals(pdself, pdother) 91 return False 92 93 94def read_json(jsn, **kwargs): 95 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 96 97 *parameters* 98 99 - **jsn** : JSON text or JSON value to convert 100 - **extkeys**: list (default None) - keys to use if not present in ntv_value 101 - **decode_str**: boolean (default False) - if True, string values are converted 102 in object values 103 - **leng**: integer (default None) - leng of the Series (used with single codec value) 104 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 105 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 106 - **series**: boolean (default False) - used only without header. If True 107 JSON data is converted into Series else DataFrame 108 ''' 109 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 110 'annotated': False, 'series': False} | kwargs 111 jso = json.loads(jsn) if isinstance(jsn, str) else jsn 112 if 'schema' in jso: 113 return PdUtil.to_obj_table(jso, **option) 114 ntv = Ntv.from_obj(jso) 115 if ntv.type_str == 'field': 116 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 117 if ntv.type_str == 'tab': 118 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 119 if option['series']: 120 return SeriesConnec.to_obj_ntv(ntv, **option) 121 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 122 123 124def _dist(key1, key2, distr=False): 125 '''return default coupling codec between two keys list and optionaly if 126 the relationship is distributed''' 127 if not key1 or not key2: 128 return 0 129 k1k2 = [tuple((v1, v2)) for v1, v2 in zip(key1, key2)] 130 dist = len(list(dict.fromkeys(k1k2))) 131 if not distr: 132 return dist 133 distrib = False 134 if dist == (max(key1) + 1) * (max(key2) + 1): 135 distrib = max(Counter(k1k2).values()) == len(key1) // dist 136 # distrib = min(sum(map(lambda x: (x + i) % (max(a) + 1), a)) == sum(a) 137 # for i in range(1, max(a)+1)) 138 return [dist, distrib] 139 140 141def to_analysis(pd_df, distr=False): 142 '''return a dict with data used in AnaDataset module''' 143 144 keys = [list(pd_df[col].astype('category').cat.codes) 145 for col in pd_df.columns] 146 lencodec = [len(set(key)) for key in keys] 147 if distr: 148 dist = [[_dist(keys[i], keys[j], distr) for j in range(i+1, len(keys))] 149 for i in range(len(keys)-1)] 150 else: 151 dist = [[len(set(zip(keys[i], keys[j]))) for j in range(i+1, len(keys))] 152 for i in range(len(keys)-1)] 153 return {'fields': [{'lencodec': lencodec[ind], 'id': pd_df.columns[ind], 154 'mincodec': lencodec[ind]} 155 for ind in range(len(pd_df.columns))], 156 'name': None, 'length': len(pd_df), 157 'relations': {pd_df.columns[i]: {pd_df.columns[j+i+1]: dist[i][j] 158 for j in range(len(dist[i]))} for i in range(len(dist))}} 159 160 161def to_json(pd_array, **kwargs): 162 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 163 164 *parameters* 165 166 - **pd_array** : Series or Dataframe to convert 167 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 168 - **header** : boolean (default: True) - if True the JSON data is included as 169 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 170 - **table** : boolean (default False) - if True return TableSchema format 171 - **index** : boolean (default True) - if True the index Series is included 172 ''' 173 option = {'encoded': False, 'header': True, 174 'table': False, 'index': True} | kwargs 175 option['header'] = False if option['table'] else option['header'] 176 if isinstance(pd_array, pd.Series): 177 jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0] 178 head = ':field' 179 else: 180 jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'], 181 index=option['index'])[0] 182 head = ':tab' 183 if option['header']: 184 jsn = {head: jsn} 185 if option['encoded']: 186 return json.dumps(jsn) 187 return jsn 188 189 190def from_xarray(xdt, **kwargs): 191 ''' convert xarray.Dataset to pandas DataFrame. 192 193 *Parameters* 194 195 - **json_name**: Boolean (default True) - if False use full_name else json_name 196 - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs 197 - **dims**: list of string (default None) - order of dimensions full_name to apply 198 ''' 199 return Xdataset.from_xarray(xdt).to_dataframe(**kwargs) 200 201 202def from_scipp(sci, **kwargs): 203 ''' convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame. 204 205 *Parameters* 206 207 - **json_name**: Boolean (default True) - if False use full_name else json_name 208 - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs 209 - **dims**: list of string (default None) - order of dimensions full_name to apply 210 ''' 211 return Xdataset.from_scipp(sci).to_dataframe(**kwargs) 212 213 214class DataFrameConnec(NtvConnector): 215 216 '''NTV connector for pandas DataFrame. 217 218 One static methods is included: 219 220 - to_listidx: convert a DataFrame in categorical data 221 ''' 222 223 clas_obj = 'DataFrame' 224 clas_typ = 'tab' 225 226 @staticmethod 227 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 228 ''' convert json ntv_value into a DataFrame. 229 230 *Parameters* 231 232 - **index** : list (default None) - list of index values, 233 - **alias** : boolean (default False) - if True, alias dtype else default dtype 234 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 235 series = SeriesConnec.to_series 236 237 ntv = Ntv.fast(ntv_value) 238 lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) 239 for ntvf in ntv] 240 leng = max([idx[6] for idx in lidx]) 241 option = kwargs | {'leng': leng} 242 no_keys = [] 243 for ind, lind in enumerate(lidx): 244 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 245 NtvConnector.init_ntv_keys(ind, lidx, leng) 246 lind[2] = Ntv.fast(Ntv.obj_ntv( 247 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 248 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 249 else lidx[ind][4], **option) for ind in range(len(lidx))] 250 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 251 return PdUtil.pd_index(dfr) 252 253 @staticmethod 254 def to_json_ntv(value, name=None, typ=None, **kwargs): 255 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 256 257 *Parameters* 258 259 - **typ** : string (default None) - type of the NTV object, 260 - **name** : string (default None) - name of the NTV object 261 - **value** : DataFrame values 262 - **table** : boolean (default False) - if True return TableSchema format 263 - **index** : boolean (default True) - if True the index Series is included 264 ''' 265 table = kwargs.get('table', False) 266 index = kwargs.get('index', True) 267 if not table: 268 df2 = value.reset_index() if index else value 269 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 270 for col in df2.columns]).to_obj() 271 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 272 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 273 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 274 value[col]) for col in value.columns}) 275 table_val = json.loads(df2.to_json(orient='table', 276 date_format='iso', default_handler=str)) 277 for nam in value.columns: 278 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 279 value[nam], table=True, no_val=True) 280 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 281 ntv_name, ntv_type) 282 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 283 284 @staticmethod 285 def to_listidx(dtf): 286 ''' convert a DataFrame in categorical data 287 288 *Return: tuple with:* 289 290 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 291 - **lenght** of the DataFrame''' 292 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf)) 293 294 @staticmethod 295 def equals(pdself, pdother): 296 '''return True if columns are equals''' 297 if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)): 298 return False 299 if len(pdself.columns) != len(pdother.columns): 300 return False 301 for cself, cother in zip(pdself, pdother): 302 if not SeriesConnec.equals(pdself[cself], pdother[cother]): 303 return False 304 return True 305 306 307class SeriesConnec(NtvConnector): 308 '''NTV connector for pandas Series 309 310 Two static methods are included: 311 312 - to_idx: convert a Series in categorical data 313 - to_series: return a Series from Field data 314 ''' 315 clas_obj = 'Series' 316 clas_typ = 'field' 317 config = configparser.ConfigParser() 318 config.read(path_ntv_pandas.joinpath('ntv_pandas.ini')) 319 types = pd.DataFrame(json.loads(config['data']['type']), 320 columns=json.loads(config['data']['column'])) 321 astype = json.loads(config['data']['astype']) 322 deftype = {val: key for key, val in astype.items()} 323 config = configparser.ConfigParser() 324 config.read(path_ntv_pandas.joinpath('ntv_table.ini')) 325 table = pd.DataFrame(json.loads(config['data']['mapping']), 326 columns=json.loads(config['data']['column'])) 327 typtab = pd.DataFrame(json.loads(config['data']['type']), 328 columns=json.loads(config['data']['col_type'])) 329 330 @staticmethod 331 def to_obj_ntv(ntv_value, **kwargs): 332 '''Generate a Series Object from a Ntv field object 333 334 *Parameters* 335 336 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 337 338 *parameters (kwargs)* 339 340 - **extkeys**: list (default None) - keys to use if not present in ntv_value 341 - **decode_str**: boolean (default False) - if True, string values are converted 342 in object values 343 - **index**: list (default None) - if present, add the index in Series 344 - **leng**: integer (default None) - leng of the Series (used with single codec value) 345 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 346 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 347 ''' 348 option = {'extkeys': None, 'decode_str': False, 'leng': None, 349 'annotated': False} | kwargs 350 if ntv_value is None: 351 return None 352 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 353 354 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 355 NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val) 356 if parent and not option['extkeys']: 357 return None 358 if coef: 359 ntv_keys = NtvConnector.keysfromcoef( 360 coef, leng_field//coef, option['leng']) 361 elif option['extkeys'] and parent: 362 ntv_keys = NtvConnector.keysfromderkeys( 363 option['extkeys'], ntv_keys) 364 elif option['extkeys'] and not parent: 365 ntv_keys = option['extkeys'] 366 ntv_codec = Ntv.fast(Ntv.obj_ntv( 367 codec, typ=typ, single=len(codec) == 1)) 368 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 369 370 @staticmethod 371 def to_json_ntv(value, name=None, typ=None, **kwargs): 372 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 373 374 *Parameters* 375 376 - **typ** : string (default None) - type of the NTV object, 377 - **name** : string (default None) - name of the NTV object 378 - **value** : Series values 379 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 380 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 381 382 table = kwargs.get('table', False) 383 no_val = kwargs.get('no_val', False) 384 srs = value.astype(SeriesConnec.astype.get( 385 value.dtype.name, value.dtype.name)) 386 sr_name = srs.name if srs.name else '' 387 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 388 389 if table: 390 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 391 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 392 if no_val: 393 return (ntv_name, ntv_type) 394 return (ntv_value, ntv_name, ntv_type) 395 if srs.dtype.name == 'category': 396 cdc = pd.Series(srs.cat.categories) 397 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 398 cat_value = PdUtil.ntv_val(ntv_type, cdc) 399 cat_value = NtvList(cat_value, ntv_type=ntv_type) 400 cod_value = list(srs.cat.codes) 401 coef = NtvConnector.encode_coef(cod_value) 402 ntv_value = [cat_value, NtvList( 403 [coef]) if coef else NtvList(cod_value)] 404 ntv_type = None 405 else: 406 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 407 ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 408 def_type=ntv_type).ntv_value 409 if len(ntv_value) == 1: 410 ntv_value[0].set_name(ntv_name) 411 return (ntv_value[0].to_obj(), name, 412 SeriesConnec.clas_typ if not typ else typ) 413 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 414 SeriesConnec.clas_typ if not typ else typ) 415 416 @staticmethod 417 def to_idx(ser): 418 ''' convert a Series in categorical data 419 420 *return (dict)* 421 422 { 'codec': 'list of pandas categories', 423 'name': 'name of the series', 424 'keys': 'list of pandas codes' } 425 ''' 426 idx = ser.astype('category') 427 lis = list(idx.cat.categories) 428 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 429 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 430 for ts in lis] 431 return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)} 432 433 @staticmethod 434 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 435 ''' return a pd.Series from Field data (codec, name, keys) 436 437 *Parameters* 438 439 - **ntv_codec**: Ntv object - codec value to convert in Series values 440 - **ntv_type**: string - default type to apply to convert in dtype 441 - **ntv_name**: string - name of the Series 442 443 *parameters (kwargs)* 444 445 - **index**: list (default None) - if present, add the index in Series 446 - **leng**: integer (default None) - leng of the Series (used with single codec value) 447 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 448 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 449 ''' 450 option = {'index': None, 'leng': None, 'alias': False, 451 'annotated': False} | kwargs 452 types = SeriesConnec.types.set_index('ntv_type') 453 astype = SeriesConnec.astype 454 leng = option['leng'] 455 456 ntv_type = ntv_codec.type_str 457 len_unique = leng if len(ntv_codec) == 1 and leng else 1 458 pd_convert = ntv_type in types.index 459 460 pd_name, name_type, dtype = PdUtil.pd_name( 461 ntv_name, ntv_type, pd_convert) 462 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 463 option['annotated'], pd_convert) 464 if ntv_keys: 465 if pd_convert and name_type != 'array': 466 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 467 cat_type = categ.dtype.name 468 categories = categ.astype(astype.get(cat_type, cat_type)) 469 else: 470 categories = pd.Series(ntv_obj, dtype='object') 471 cat = pd.CategoricalDtype(categories=categories) 472 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 473 srs = pd.Series(data, name=pd_name, 474 index=option['index'], dtype='category') 475 else: 476 data = ntv_obj * len_unique 477 if pd_convert: 478 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 479 else: 480 srs = pd.Series(data, name=pd_name, dtype=dtype) 481 482 if option['alias']: 483 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 484 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 485 486 @staticmethod 487 def _from_json(data, dtype, ntv_type, pd_name=None): 488 '''return a Series from a Json data. 489 490 *Parameters* 491 492 - **data**: Json-value - data to convert in a Series 493 - **dtype**: string - dtype of the Series 494 - **ntv_type**: string - default type to apply to convert in dtype 495 - **pd_name**: string - name of the Series including ntv_type 496 497 NTVvalue and a ntv_type''' 498 srs = pd.read_json(StringIO(json.dumps(data)), 499 dtype=dtype, typ='series') 500 if not pd_name is None: 501 srs = srs.rename(pd_name) 502 return PdUtil.convert(ntv_type, srs, tojson=False) 503 504 @staticmethod 505 def equals(pdself, pdother): 506 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 507 if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)): 508 return False 509 if pdself.name != pdother.name: 510 return False 511 type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category' 512 if type_cat: 513 return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories) 514 return as_def_type(pdself).equals(as_def_type(pdother)) 515 516 517class PdUtil: 518 '''ntv-pandas utilities. 519 520 This class includes static methods: 521 522 Ntv and pandas 523 - **ntv_type**: return NTVtype from name_type and dtype of a Series 524 - **convert**: convert Series with external NTVtype 525 - **ntv_val**: convert a simple Series into NTV json-value 526 - **ntv_obj**: return a list of values to convert in a Series 527 - **pd_name**: return a tuple with the name of the Series and the type deduced from the name 528 - **pd_index**: return a DataFrame with index 529 - **unic**: return simple value if the Series contains a single value 530 531 TableSchema 532 - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series 533 - **name_table**: return a list of non index field's names from a json Table 534 - **ntvtype_table**: return a list of non index field's ntv_type from a json Table 535 - **table_schema**: add 'format' and 'type' keys in a Json TableSchema 536 - **table_val**: convert a Series into TableSchema json-value 537 - **ntv_table**: return NTVtype from the TableSchema data 538 ''' 539 @staticmethod 540 def to_obj_table(jsn, **kwargs): 541 ''' convert json TableSchema data into a DataFrame or a Series''' 542 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 543 name = PdUtil.name_table(jsn['schema']['fields']) 544 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 545 for nam, ntvtyp in zip(name, ntv_type)] 546 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 547 for nam, ntvtyp in zip(name, ntv_type)] 548 dfr = pd.read_json(StringIO(json.dumps(jsn['data'])), orient='record') 549 dfr = PdUtil.pd_index(dfr) 550 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 551 for ind, col in enumerate(dfr.columns)}) 552 dfr = dfr.astype({col: pd_dtype[ind] 553 for ind, col in enumerate(dfr.columns)}) 554 dfr.columns = pd_name 555 if len(dfr.columns) == 1: 556 return dfr[dfr.columns[0]] 557 return dfr 558 559 @staticmethod 560 def decode_ntv_to_val(ntv): 561 ''' return a value from a ntv_field''' 562 if isinstance(ntv, NtvSingle): 563 return ntv.to_obj(simpleval=True) 564 return [ntv_val.to_obj() for ntv_val in ntv] 565 566 @staticmethod 567 def name_table(fields): 568 '''return a list of non index field's names from a json Table''' 569 names = [field.get('name', None) for field in fields 570 if field.get('name', None) != 'index'] 571 return [None if name == 'values' else name for name in names] 572 573 @staticmethod 574 def ntvtype_table(fields): 575 '''return a list of non index field's ntv_type from a json Table''' 576 return [PdUtil.ntv_table(field.get('format', 'default'), 577 field.get('type', None)) for field in fields 578 if field.get('name', None) != 'index'] 579 580 @staticmethod 581 def table_schema(schema, name, ntv_type): 582 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 583 for the field defined by 'name' ''' 584 ind = [field['name'] for field in schema['fields']].index(name) 585 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 586 if tabletype['format'] == 'default': 587 schema['fields'][ind].pop('format', None) 588 else: 589 schema['fields'][ind]['format'] = tabletype['format'] 590 schema['fields'][ind]['type'] = tabletype['type'] 591 schema['fields'][ind].pop('extDtype', None) 592 return schema 593 594 @staticmethod 595 def table_val(ntv_type, ntv_name, srs): 596 '''convert a Series into TableSchema json-value. 597 598 *Parameters* 599 600 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 601 - **ntv_name**: string - name of the Series 602 - **srs** : Series to be converted.''' 603 srs = PdUtil.convert(ntv_type, srs) 604 srs.name = ntv_name 605 tab_val = json.loads(srs.to_json(orient='table', 606 date_format='iso', default_handler=str)) 607 name = 'values' if srs.name is None else srs.name 608 tab_val['schema'] = PdUtil.table_schema( 609 tab_val['schema'], name, ntv_type) 610 return tab_val 611 612 @staticmethod 613 def convert(ntv_type, srs, tojson=True): 614 ''' convert Series with external NTVtype. 615 616 *Parameters* 617 618 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 619 - **srs** : Series to be converted. 620 - **tojson** : boolean (default True) - apply to json function''' 621 if tojson: 622 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 623 return srs.apply(ShapelyConnec.to_coord) 624 if ntv_type == 'geojson': 625 return srs.apply(ShapelyConnec.to_geojson) 626 if ntv_type == 'date': 627 return srs.astype(str) 628 return srs 629 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 630 return srs.apply(ShapelyConnec.to_geometry) 631 if ntv_type == 'geojson': 632 return srs.apply(ShapelyConnec.from_geojson) 633 if ntv_type == 'datetime': 634 return pd.to_datetime(srs) 635 if ntv_type == 'date': 636 return pd.to_datetime(srs).dt.date 637 if ntv_type == 'time': 638 return pd.to_datetime(srs, format='mixed').dt.time 639 return srs 640 641 @staticmethod 642 def ntv_type(name_type, dtype, table=False): 643 ''' return NTVtype from name_type and dtype of a Series . 644 645 *Parameters* 646 647 - **name_type** : string - type included in the Series name, 648 - **dtype** : string - dtype of the Series. 649 - **table** : boolean (default False) - True if Table Schema conversion 650 ''' 651 if not name_type: 652 types_none = SeriesConnec.types.set_index('name_type').loc[None] 653 if dtype in types_none.dtype.values: 654 return types_none.set_index('dtype').loc[dtype].ntv_type 655 if not table: 656 return None 657 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 658 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 659 return name_type 660 661 @staticmethod 662 def ntv_val(ntv_type, srs): 663 ''' convert a simple Series into NTV json-value. 664 665 *Parameters* 666 667 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 668 - **srs** : Series to be converted.''' 669 srs = PdUtil.convert(ntv_type, srs) 670 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 671 return srs.to_list() 672 if srs.dtype.name == 'object': 673 return srs.to_list() 674 return json.loads(srs.to_json(orient='records', 675 date_format='iso', default_handler=str)) 676 677 @staticmethod 678 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 679 '''return a list of values to convert in a Series''' 680 if pd_convert: 681 if name_type == 'array': 682 return ntv_codec.to_obj(format='obj', simpleval=True) 683 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 684 def_type=ntv_codec.type_str, fast=True) 685 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 686 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type) 687 688 @staticmethod 689 def ntv_table(table_format, table_type): 690 ''' return NTVtype from the TableSchema data. 691 692 *Parameters* 693 694 - **table_format** : string - TableSchema format, 695 - **table_type** : string - TableSchema type''' 696 return SeriesConnec.table.set_index(['type', 'format']).loc[ 697 (table_type, table_format)].values[0] 698 699 @staticmethod 700 def pd_index(dfr): 701 '''return a DataFrame with index''' 702 if 'index' in dfr.columns: 703 dfr = dfr.set_index('index') 704 dfr.index.rename(None, inplace=True) 705 return dfr 706 707 @staticmethod 708 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 709 '''return a tuple with the name of the Series, the type deduced from 710 the name and the dtype''' 711 ntv_name = '' if ntv_name is None else ntv_name 712 typtab = SeriesConnec.typtab.set_index('ntv_type') 713 types = SeriesConnec.types.set_index('ntv_type') 714 if table and ntv_type.lower() in typtab.index: 715 name_type = typtab.loc[ntv_type.lower()]['name_type'] 716 dtype = typtab.loc[ntv_type.lower()]['dtype'] 717 elif pd_convert or table: 718 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 719 dtype = types.loc[ntv_type]['dtype'] 720 else: 721 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 722 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 723 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 724 return (pd_name if pd_name else None, name_type, dtype) 725 726 @staticmethod 727 def unic(srs): 728 ''' return simple value if the Series contains a single value''' 729 if str(srs.dtype) == 'category': 730 return srs 731 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
55def as_def_type(pd_array): 56 '''convert a Series or DataFrame with default dtype''' 57 if isinstance(pd_array, (pd.Series, pd.Index)): 58 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 59 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
convert a Series or DataFrame with default dtype
62def check_relation(pd_df, parent, child, typecoupl, value=True): 63 ''' Accessor for method `cdataset.Cdataset.check_relation` invoket as 64 `pd.DataFrame.npd.check_relation`. 65 Get the inconsistent records for a relationship. 66 67 *Parameters* 68 69 - **child** : str - name of the child Series involved in the relation 70 - **parent**: str - name of the parent Series involved in the relation 71 - **typecoupl**: str - relationship to check ('derived' or 'coupled') 72 - **value**: boolean (default True) - if True return a dict with inconsistent 73 values of the Series, else a tuple with index of records) 74 75 *Returns* : 76 77 - dict with inconsistent values of the Series 78 - or a tuple with row of records''' 79 parent_idx = SeriesConnec.to_idx(pd_df[parent]) 80 parent_field = Cfield(parent_idx['codec'], parent, parent_idx['keys']) 81 child_idx = SeriesConnec.to_idx(pd_df[child]) 82 child_field = Cfield(child_idx['codec'], child, child_idx['keys']) 83 return Cfield.check_relation(parent_field, child_field, typecoupl, value)
Accessor for method cdataset.Cdataset.check_relation
invoket as
pd.DataFrame.npd.check_relation
.
Get the inconsistent records for a relationship.
Parameters
- child : str - name of the child Series involved in the relation
- parent: str - name of the parent Series involved in the relation
- typecoupl: str - relationship to check ('derived' or 'coupled')
- value: boolean (default True) - if True return a dict with inconsistent values of the Series, else a tuple with index of records)
Returns :
- dict with inconsistent values of the Series
- or a tuple with row of records
86def equals(pdself, pdother): 87 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 88 if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series): 89 return SeriesConnec.equals(pdself, pdother) 90 if isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame): 91 return DataFrameConnec.equals(pdself, pdother) 92 return False
return True if pd.equals is True and names are equal and dtype of categories are equal
95def read_json(jsn, **kwargs): 96 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 97 98 *parameters* 99 100 - **jsn** : JSON text or JSON value to convert 101 - **extkeys**: list (default None) - keys to use if not present in ntv_value 102 - **decode_str**: boolean (default False) - if True, string values are converted 103 in object values 104 - **leng**: integer (default None) - leng of the Series (used with single codec value) 105 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 106 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 107 - **series**: boolean (default False) - used only without header. If True 108 JSON data is converted into Series else DataFrame 109 ''' 110 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 111 'annotated': False, 'series': False} | kwargs 112 jso = json.loads(jsn) if isinstance(jsn, str) else jsn 113 if 'schema' in jso: 114 return PdUtil.to_obj_table(jso, **option) 115 ntv = Ntv.from_obj(jso) 116 if ntv.type_str == 'field': 117 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 118 if ntv.type_str == 'tab': 119 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 120 if option['series']: 121 return SeriesConnec.to_obj_ntv(ntv, **option) 122 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
convert JSON text or JSON Value to pandas Series or Dataframe.
parameters
- jsn : JSON text or JSON value to convert
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
- series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
142def to_analysis(pd_df, distr=False): 143 '''return a dict with data used in AnaDataset module''' 144 145 keys = [list(pd_df[col].astype('category').cat.codes) 146 for col in pd_df.columns] 147 lencodec = [len(set(key)) for key in keys] 148 if distr: 149 dist = [[_dist(keys[i], keys[j], distr) for j in range(i+1, len(keys))] 150 for i in range(len(keys)-1)] 151 else: 152 dist = [[len(set(zip(keys[i], keys[j]))) for j in range(i+1, len(keys))] 153 for i in range(len(keys)-1)] 154 return {'fields': [{'lencodec': lencodec[ind], 'id': pd_df.columns[ind], 155 'mincodec': lencodec[ind]} 156 for ind in range(len(pd_df.columns))], 157 'name': None, 'length': len(pd_df), 158 'relations': {pd_df.columns[i]: {pd_df.columns[j+i+1]: dist[i][j] 159 for j in range(len(dist[i]))} for i in range(len(dist))}}
return a dict with data used in AnaDataset module
162def to_json(pd_array, **kwargs): 163 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 164 165 *parameters* 166 167 - **pd_array** : Series or Dataframe to convert 168 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 169 - **header** : boolean (default: True) - if True the JSON data is included as 170 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 171 - **table** : boolean (default False) - if True return TableSchema format 172 - **index** : boolean (default True) - if True the index Series is included 173 ''' 174 option = {'encoded': False, 'header': True, 175 'table': False, 'index': True} | kwargs 176 option['header'] = False if option['table'] else option['header'] 177 if isinstance(pd_array, pd.Series): 178 jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0] 179 head = ':field' 180 else: 181 jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'], 182 index=option['index'])[0] 183 head = ':tab' 184 if option['header']: 185 jsn = {head: jsn} 186 if option['encoded']: 187 return json.dumps(jsn) 188 return jsn
convert pandas Series or Dataframe to JSON text or JSON Value.
parameters
- pd_array : Series or Dataframe to convert
- encoded : boolean (default: False) - if True return a JSON text else a JSON value
- header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
- table : boolean (default False) - if True return TableSchema format
- index : boolean (default True) - if True the index Series is included
191def from_xarray(xdt, **kwargs): 192 ''' convert xarray.Dataset to pandas DataFrame. 193 194 *Parameters* 195 196 - **json_name**: Boolean (default True) - if False use full_name else json_name 197 - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs 198 - **dims**: list of string (default None) - order of dimensions full_name to apply 199 ''' 200 return Xdataset.from_xarray(xdt).to_dataframe(**kwargs)
convert xarray.Dataset to pandas DataFrame.
Parameters
- json_name: Boolean (default True) - if False use full_name else json_name
- info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
- dims: list of string (default None) - order of dimensions full_name to apply
203def from_scipp(sci, **kwargs): 204 ''' convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame. 205 206 *Parameters* 207 208 - **json_name**: Boolean (default True) - if False use full_name else json_name 209 - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs 210 - **dims**: list of string (default None) - order of dimensions full_name to apply 211 ''' 212 return Xdataset.from_scipp(sci).to_dataframe(**kwargs)
convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame.
Parameters
- json_name: Boolean (default True) - if False use full_name else json_name
- info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
- dims: list of string (default None) - order of dimensions full_name to apply
215class DataFrameConnec(NtvConnector): 216 217 '''NTV connector for pandas DataFrame. 218 219 One static methods is included: 220 221 - to_listidx: convert a DataFrame in categorical data 222 ''' 223 224 clas_obj = 'DataFrame' 225 clas_typ = 'tab' 226 227 @staticmethod 228 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 229 ''' convert json ntv_value into a DataFrame. 230 231 *Parameters* 232 233 - **index** : list (default None) - list of index values, 234 - **alias** : boolean (default False) - if True, alias dtype else default dtype 235 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 236 series = SeriesConnec.to_series 237 238 ntv = Ntv.fast(ntv_value) 239 lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) 240 for ntvf in ntv] 241 leng = max([idx[6] for idx in lidx]) 242 option = kwargs | {'leng': leng} 243 no_keys = [] 244 for ind, lind in enumerate(lidx): 245 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 246 NtvConnector.init_ntv_keys(ind, lidx, leng) 247 lind[2] = Ntv.fast(Ntv.obj_ntv( 248 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 249 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 250 else lidx[ind][4], **option) for ind in range(len(lidx))] 251 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 252 return PdUtil.pd_index(dfr) 253 254 @staticmethod 255 def to_json_ntv(value, name=None, typ=None, **kwargs): 256 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 257 258 *Parameters* 259 260 - **typ** : string (default None) - type of the NTV object, 261 - **name** : string (default None) - name of the NTV object 262 - **value** : DataFrame values 263 - **table** : boolean (default False) - if True return TableSchema format 264 - **index** : boolean (default True) - if True the index Series is included 265 ''' 266 table = kwargs.get('table', False) 267 index = kwargs.get('index', True) 268 if not table: 269 df2 = value.reset_index() if index else value 270 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 271 for col in df2.columns]).to_obj() 272 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 273 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 274 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 275 value[col]) for col in value.columns}) 276 table_val = json.loads(df2.to_json(orient='table', 277 date_format='iso', default_handler=str)) 278 for nam in value.columns: 279 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 280 value[nam], table=True, no_val=True) 281 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 282 ntv_name, ntv_type) 283 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 284 285 @staticmethod 286 def to_listidx(dtf): 287 ''' convert a DataFrame in categorical data 288 289 *Return: tuple with:* 290 291 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 292 - **lenght** of the DataFrame''' 293 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf)) 294 295 @staticmethod 296 def equals(pdself, pdother): 297 '''return True if columns are equals''' 298 if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)): 299 return False 300 if len(pdself.columns) != len(pdother.columns): 301 return False 302 for cself, cother in zip(pdself, pdother): 303 if not SeriesConnec.equals(pdself[cself], pdother[cother]): 304 return False 305 return True
NTV connector for pandas DataFrame.
One static methods is included:
- to_listidx: convert a DataFrame in categorical data
227 @staticmethod 228 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 229 ''' convert json ntv_value into a DataFrame. 230 231 *Parameters* 232 233 - **index** : list (default None) - list of index values, 234 - **alias** : boolean (default False) - if True, alias dtype else default dtype 235 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 236 series = SeriesConnec.to_series 237 238 ntv = Ntv.fast(ntv_value) 239 lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) 240 for ntvf in ntv] 241 leng = max([idx[6] for idx in lidx]) 242 option = kwargs | {'leng': leng} 243 no_keys = [] 244 for ind, lind in enumerate(lidx): 245 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 246 NtvConnector.init_ntv_keys(ind, lidx, leng) 247 lind[2] = Ntv.fast(Ntv.obj_ntv( 248 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 249 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 250 else lidx[ind][4], **option) for ind in range(len(lidx))] 251 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 252 return PdUtil.pd_index(dfr)
convert json ntv_value into a DataFrame.
Parameters
- index : list (default None) - list of index values,
- alias : boolean (default False) - if True, alias dtype else default dtype
- annotated : boolean (default False) - if True, NTV names are not included.
254 @staticmethod 255 def to_json_ntv(value, name=None, typ=None, **kwargs): 256 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 257 258 *Parameters* 259 260 - **typ** : string (default None) - type of the NTV object, 261 - **name** : string (default None) - name of the NTV object 262 - **value** : DataFrame values 263 - **table** : boolean (default False) - if True return TableSchema format 264 - **index** : boolean (default True) - if True the index Series is included 265 ''' 266 table = kwargs.get('table', False) 267 index = kwargs.get('index', True) 268 if not table: 269 df2 = value.reset_index() if index else value 270 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 271 for col in df2.columns]).to_obj() 272 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 273 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 274 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 275 value[col]) for col in value.columns}) 276 table_val = json.loads(df2.to_json(orient='table', 277 date_format='iso', default_handler=str)) 278 for nam in value.columns: 279 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 280 value[nam], table=True, no_val=True) 281 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 282 ntv_name, ntv_type) 283 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : DataFrame values
- table : boolean (default False) - if True return TableSchema format
- index : boolean (default True) - if True the index Series is included
285 @staticmethod 286 def to_listidx(dtf): 287 ''' convert a DataFrame in categorical data 288 289 *Return: tuple with:* 290 291 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 292 - **lenght** of the DataFrame''' 293 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
convert a DataFrame in categorical data
Return: tuple with:
- list of dict (keys : 'codec', 'name, 'keys') for each column
- lenght of the DataFrame
295 @staticmethod 296 def equals(pdself, pdother): 297 '''return True if columns are equals''' 298 if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)): 299 return False 300 if len(pdself.columns) != len(pdother.columns): 301 return False 302 for cself, cother in zip(pdself, pdother): 303 if not SeriesConnec.equals(pdself[cself], pdother[cother]): 304 return False 305 return True
return True if columns are equals
Inherited Members
- json_ntv.ntv_util.NtvConnector
- DIC_NTV_CL
- DIC_GEO_CL
- DIC_DAT_CL
- DIC_FCT
- DIC_GEO
- DIC_CBOR
- DIC_OBJ
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- format_field
- init_ntv_keys
308class SeriesConnec(NtvConnector): 309 '''NTV connector for pandas Series 310 311 Two static methods are included: 312 313 - to_idx: convert a Series in categorical data 314 - to_series: return a Series from Field data 315 ''' 316 clas_obj = 'Series' 317 clas_typ = 'field' 318 config = configparser.ConfigParser() 319 config.read(path_ntv_pandas.joinpath('ntv_pandas.ini')) 320 types = pd.DataFrame(json.loads(config['data']['type']), 321 columns=json.loads(config['data']['column'])) 322 astype = json.loads(config['data']['astype']) 323 deftype = {val: key for key, val in astype.items()} 324 config = configparser.ConfigParser() 325 config.read(path_ntv_pandas.joinpath('ntv_table.ini')) 326 table = pd.DataFrame(json.loads(config['data']['mapping']), 327 columns=json.loads(config['data']['column'])) 328 typtab = pd.DataFrame(json.loads(config['data']['type']), 329 columns=json.loads(config['data']['col_type'])) 330 331 @staticmethod 332 def to_obj_ntv(ntv_value, **kwargs): 333 '''Generate a Series Object from a Ntv field object 334 335 *Parameters* 336 337 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 338 339 *parameters (kwargs)* 340 341 - **extkeys**: list (default None) - keys to use if not present in ntv_value 342 - **decode_str**: boolean (default False) - if True, string values are converted 343 in object values 344 - **index**: list (default None) - if present, add the index in Series 345 - **leng**: integer (default None) - leng of the Series (used with single codec value) 346 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 347 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 348 ''' 349 option = {'extkeys': None, 'decode_str': False, 'leng': None, 350 'annotated': False} | kwargs 351 if ntv_value is None: 352 return None 353 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 354 355 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 356 NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val) 357 if parent and not option['extkeys']: 358 return None 359 if coef: 360 ntv_keys = NtvConnector.keysfromcoef( 361 coef, leng_field//coef, option['leng']) 362 elif option['extkeys'] and parent: 363 ntv_keys = NtvConnector.keysfromderkeys( 364 option['extkeys'], ntv_keys) 365 elif option['extkeys'] and not parent: 366 ntv_keys = option['extkeys'] 367 ntv_codec = Ntv.fast(Ntv.obj_ntv( 368 codec, typ=typ, single=len(codec) == 1)) 369 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 370 371 @staticmethod 372 def to_json_ntv(value, name=None, typ=None, **kwargs): 373 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 374 375 *Parameters* 376 377 - **typ** : string (default None) - type of the NTV object, 378 - **name** : string (default None) - name of the NTV object 379 - **value** : Series values 380 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 381 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 382 383 table = kwargs.get('table', False) 384 no_val = kwargs.get('no_val', False) 385 srs = value.astype(SeriesConnec.astype.get( 386 value.dtype.name, value.dtype.name)) 387 sr_name = srs.name if srs.name else '' 388 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 389 390 if table: 391 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 392 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 393 if no_val: 394 return (ntv_name, ntv_type) 395 return (ntv_value, ntv_name, ntv_type) 396 if srs.dtype.name == 'category': 397 cdc = pd.Series(srs.cat.categories) 398 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 399 cat_value = PdUtil.ntv_val(ntv_type, cdc) 400 cat_value = NtvList(cat_value, ntv_type=ntv_type) 401 cod_value = list(srs.cat.codes) 402 coef = NtvConnector.encode_coef(cod_value) 403 ntv_value = [cat_value, NtvList( 404 [coef]) if coef else NtvList(cod_value)] 405 ntv_type = None 406 else: 407 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 408 ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 409 def_type=ntv_type).ntv_value 410 if len(ntv_value) == 1: 411 ntv_value[0].set_name(ntv_name) 412 return (ntv_value[0].to_obj(), name, 413 SeriesConnec.clas_typ if not typ else typ) 414 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 415 SeriesConnec.clas_typ if not typ else typ) 416 417 @staticmethod 418 def to_idx(ser): 419 ''' convert a Series in categorical data 420 421 *return (dict)* 422 423 { 'codec': 'list of pandas categories', 424 'name': 'name of the series', 425 'keys': 'list of pandas codes' } 426 ''' 427 idx = ser.astype('category') 428 lis = list(idx.cat.categories) 429 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 430 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 431 for ts in lis] 432 return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)} 433 434 @staticmethod 435 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 436 ''' return a pd.Series from Field data (codec, name, keys) 437 438 *Parameters* 439 440 - **ntv_codec**: Ntv object - codec value to convert in Series values 441 - **ntv_type**: string - default type to apply to convert in dtype 442 - **ntv_name**: string - name of the Series 443 444 *parameters (kwargs)* 445 446 - **index**: list (default None) - if present, add the index in Series 447 - **leng**: integer (default None) - leng of the Series (used with single codec value) 448 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 449 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 450 ''' 451 option = {'index': None, 'leng': None, 'alias': False, 452 'annotated': False} | kwargs 453 types = SeriesConnec.types.set_index('ntv_type') 454 astype = SeriesConnec.astype 455 leng = option['leng'] 456 457 ntv_type = ntv_codec.type_str 458 len_unique = leng if len(ntv_codec) == 1 and leng else 1 459 pd_convert = ntv_type in types.index 460 461 pd_name, name_type, dtype = PdUtil.pd_name( 462 ntv_name, ntv_type, pd_convert) 463 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 464 option['annotated'], pd_convert) 465 if ntv_keys: 466 if pd_convert and name_type != 'array': 467 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 468 cat_type = categ.dtype.name 469 categories = categ.astype(astype.get(cat_type, cat_type)) 470 else: 471 categories = pd.Series(ntv_obj, dtype='object') 472 cat = pd.CategoricalDtype(categories=categories) 473 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 474 srs = pd.Series(data, name=pd_name, 475 index=option['index'], dtype='category') 476 else: 477 data = ntv_obj * len_unique 478 if pd_convert: 479 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 480 else: 481 srs = pd.Series(data, name=pd_name, dtype=dtype) 482 483 if option['alias']: 484 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 485 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 486 487 @staticmethod 488 def _from_json(data, dtype, ntv_type, pd_name=None): 489 '''return a Series from a Json data. 490 491 *Parameters* 492 493 - **data**: Json-value - data to convert in a Series 494 - **dtype**: string - dtype of the Series 495 - **ntv_type**: string - default type to apply to convert in dtype 496 - **pd_name**: string - name of the Series including ntv_type 497 498 NTVvalue and a ntv_type''' 499 srs = pd.read_json(StringIO(json.dumps(data)), 500 dtype=dtype, typ='series') 501 if not pd_name is None: 502 srs = srs.rename(pd_name) 503 return PdUtil.convert(ntv_type, srs, tojson=False) 504 505 @staticmethod 506 def equals(pdself, pdother): 507 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 508 if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)): 509 return False 510 if pdself.name != pdother.name: 511 return False 512 type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category' 513 if type_cat: 514 return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories) 515 return as_def_type(pdself).equals(as_def_type(pdother))
NTV connector for pandas Series
Two static methods are included:
- to_idx: convert a Series in categorical data
- to_series: return a Series from Field data
331 @staticmethod 332 def to_obj_ntv(ntv_value, **kwargs): 333 '''Generate a Series Object from a Ntv field object 334 335 *Parameters* 336 337 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 338 339 *parameters (kwargs)* 340 341 - **extkeys**: list (default None) - keys to use if not present in ntv_value 342 - **decode_str**: boolean (default False) - if True, string values are converted 343 in object values 344 - **index**: list (default None) - if present, add the index in Series 345 - **leng**: integer (default None) - leng of the Series (used with single codec value) 346 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 347 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 348 ''' 349 option = {'extkeys': None, 'decode_str': False, 'leng': None, 350 'annotated': False} | kwargs 351 if ntv_value is None: 352 return None 353 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 354 355 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 356 NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val) 357 if parent and not option['extkeys']: 358 return None 359 if coef: 360 ntv_keys = NtvConnector.keysfromcoef( 361 coef, leng_field//coef, option['leng']) 362 elif option['extkeys'] and parent: 363 ntv_keys = NtvConnector.keysfromderkeys( 364 option['extkeys'], ntv_keys) 365 elif option['extkeys'] and not parent: 366 ntv_keys = option['extkeys'] 367 ntv_codec = Ntv.fast(Ntv.obj_ntv( 368 codec, typ=typ, single=len(codec) == 1)) 369 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
Generate a Series Object from a Ntv field object
Parameters
- ntv_value: Ntv object or Ntv value - value to convert in Series
parameters (kwargs)
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
371 @staticmethod 372 def to_json_ntv(value, name=None, typ=None, **kwargs): 373 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 374 375 *Parameters* 376 377 - **typ** : string (default None) - type of the NTV object, 378 - **name** : string (default None) - name of the NTV object 379 - **value** : Series values 380 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 381 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 382 383 table = kwargs.get('table', False) 384 no_val = kwargs.get('no_val', False) 385 srs = value.astype(SeriesConnec.astype.get( 386 value.dtype.name, value.dtype.name)) 387 sr_name = srs.name if srs.name else '' 388 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 389 390 if table: 391 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 392 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 393 if no_val: 394 return (ntv_name, ntv_type) 395 return (ntv_value, ntv_name, ntv_type) 396 if srs.dtype.name == 'category': 397 cdc = pd.Series(srs.cat.categories) 398 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 399 cat_value = PdUtil.ntv_val(ntv_type, cdc) 400 cat_value = NtvList(cat_value, ntv_type=ntv_type) 401 cod_value = list(srs.cat.codes) 402 coef = NtvConnector.encode_coef(cod_value) 403 ntv_value = [cat_value, NtvList( 404 [coef]) if coef else NtvList(cod_value)] 405 ntv_type = None 406 else: 407 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 408 ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 409 def_type=ntv_type).ntv_value 410 if len(ntv_value) == 1: 411 ntv_value[0].set_name(ntv_name) 412 return (ntv_value[0].to_obj(), name, 413 SeriesConnec.clas_typ if not typ else typ) 414 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 415 SeriesConnec.clas_typ if not typ else typ)
convert a Series (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : Series values
- table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
- no_val : boolean (default False) - if True return (ntv_name, ntv_type)
417 @staticmethod 418 def to_idx(ser): 419 ''' convert a Series in categorical data 420 421 *return (dict)* 422 423 { 'codec': 'list of pandas categories', 424 'name': 'name of the series', 425 'keys': 'list of pandas codes' } 426 ''' 427 idx = ser.astype('category') 428 lis = list(idx.cat.categories) 429 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 430 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 431 for ts in lis] 432 return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)}
convert a Series in categorical data
return (dict)
{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }
434 @staticmethod 435 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 436 ''' return a pd.Series from Field data (codec, name, keys) 437 438 *Parameters* 439 440 - **ntv_codec**: Ntv object - codec value to convert in Series values 441 - **ntv_type**: string - default type to apply to convert in dtype 442 - **ntv_name**: string - name of the Series 443 444 *parameters (kwargs)* 445 446 - **index**: list (default None) - if present, add the index in Series 447 - **leng**: integer (default None) - leng of the Series (used with single codec value) 448 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 449 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 450 ''' 451 option = {'index': None, 'leng': None, 'alias': False, 452 'annotated': False} | kwargs 453 types = SeriesConnec.types.set_index('ntv_type') 454 astype = SeriesConnec.astype 455 leng = option['leng'] 456 457 ntv_type = ntv_codec.type_str 458 len_unique = leng if len(ntv_codec) == 1 and leng else 1 459 pd_convert = ntv_type in types.index 460 461 pd_name, name_type, dtype = PdUtil.pd_name( 462 ntv_name, ntv_type, pd_convert) 463 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 464 option['annotated'], pd_convert) 465 if ntv_keys: 466 if pd_convert and name_type != 'array': 467 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 468 cat_type = categ.dtype.name 469 categories = categ.astype(astype.get(cat_type, cat_type)) 470 else: 471 categories = pd.Series(ntv_obj, dtype='object') 472 cat = pd.CategoricalDtype(categories=categories) 473 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 474 srs = pd.Series(data, name=pd_name, 475 index=option['index'], dtype='category') 476 else: 477 data = ntv_obj * len_unique 478 if pd_convert: 479 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 480 else: 481 srs = pd.Series(data, name=pd_name, dtype=dtype) 482 483 if option['alias']: 484 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 485 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
return a pd.Series from Field data (codec, name, keys)
Parameters
- ntv_codec: Ntv object - codec value to convert in Series values
- ntv_type: string - default type to apply to convert in dtype
- ntv_name: string - name of the Series
parameters (kwargs)
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
505 @staticmethod 506 def equals(pdself, pdother): 507 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 508 if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)): 509 return False 510 if pdself.name != pdother.name: 511 return False 512 type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category' 513 if type_cat: 514 return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories) 515 return as_def_type(pdself).equals(as_def_type(pdother))
return True if pd.equals is True and names are equal and dtype of categories are equal
Inherited Members
- json_ntv.ntv_util.NtvConnector
- DIC_NTV_CL
- DIC_GEO_CL
- DIC_DAT_CL
- DIC_FCT
- DIC_GEO
- DIC_CBOR
- DIC_OBJ
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- format_field
- init_ntv_keys
518class PdUtil: 519 '''ntv-pandas utilities. 520 521 This class includes static methods: 522 523 Ntv and pandas 524 - **ntv_type**: return NTVtype from name_type and dtype of a Series 525 - **convert**: convert Series with external NTVtype 526 - **ntv_val**: convert a simple Series into NTV json-value 527 - **ntv_obj**: return a list of values to convert in a Series 528 - **pd_name**: return a tuple with the name of the Series and the type deduced from the name 529 - **pd_index**: return a DataFrame with index 530 - **unic**: return simple value if the Series contains a single value 531 532 TableSchema 533 - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series 534 - **name_table**: return a list of non index field's names from a json Table 535 - **ntvtype_table**: return a list of non index field's ntv_type from a json Table 536 - **table_schema**: add 'format' and 'type' keys in a Json TableSchema 537 - **table_val**: convert a Series into TableSchema json-value 538 - **ntv_table**: return NTVtype from the TableSchema data 539 ''' 540 @staticmethod 541 def to_obj_table(jsn, **kwargs): 542 ''' convert json TableSchema data into a DataFrame or a Series''' 543 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 544 name = PdUtil.name_table(jsn['schema']['fields']) 545 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 546 for nam, ntvtyp in zip(name, ntv_type)] 547 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 548 for nam, ntvtyp in zip(name, ntv_type)] 549 dfr = pd.read_json(StringIO(json.dumps(jsn['data'])), orient='record') 550 dfr = PdUtil.pd_index(dfr) 551 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 552 for ind, col in enumerate(dfr.columns)}) 553 dfr = dfr.astype({col: pd_dtype[ind] 554 for ind, col in enumerate(dfr.columns)}) 555 dfr.columns = pd_name 556 if len(dfr.columns) == 1: 557 return dfr[dfr.columns[0]] 558 return dfr 559 560 @staticmethod 561 def decode_ntv_to_val(ntv): 562 ''' return a value from a ntv_field''' 563 if isinstance(ntv, NtvSingle): 564 return ntv.to_obj(simpleval=True) 565 return [ntv_val.to_obj() for ntv_val in ntv] 566 567 @staticmethod 568 def name_table(fields): 569 '''return a list of non index field's names from a json Table''' 570 names = [field.get('name', None) for field in fields 571 if field.get('name', None) != 'index'] 572 return [None if name == 'values' else name for name in names] 573 574 @staticmethod 575 def ntvtype_table(fields): 576 '''return a list of non index field's ntv_type from a json Table''' 577 return [PdUtil.ntv_table(field.get('format', 'default'), 578 field.get('type', None)) for field in fields 579 if field.get('name', None) != 'index'] 580 581 @staticmethod 582 def table_schema(schema, name, ntv_type): 583 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 584 for the field defined by 'name' ''' 585 ind = [field['name'] for field in schema['fields']].index(name) 586 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 587 if tabletype['format'] == 'default': 588 schema['fields'][ind].pop('format', None) 589 else: 590 schema['fields'][ind]['format'] = tabletype['format'] 591 schema['fields'][ind]['type'] = tabletype['type'] 592 schema['fields'][ind].pop('extDtype', None) 593 return schema 594 595 @staticmethod 596 def table_val(ntv_type, ntv_name, srs): 597 '''convert a Series into TableSchema json-value. 598 599 *Parameters* 600 601 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 602 - **ntv_name**: string - name of the Series 603 - **srs** : Series to be converted.''' 604 srs = PdUtil.convert(ntv_type, srs) 605 srs.name = ntv_name 606 tab_val = json.loads(srs.to_json(orient='table', 607 date_format='iso', default_handler=str)) 608 name = 'values' if srs.name is None else srs.name 609 tab_val['schema'] = PdUtil.table_schema( 610 tab_val['schema'], name, ntv_type) 611 return tab_val 612 613 @staticmethod 614 def convert(ntv_type, srs, tojson=True): 615 ''' convert Series with external NTVtype. 616 617 *Parameters* 618 619 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 620 - **srs** : Series to be converted. 621 - **tojson** : boolean (default True) - apply to json function''' 622 if tojson: 623 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 624 return srs.apply(ShapelyConnec.to_coord) 625 if ntv_type == 'geojson': 626 return srs.apply(ShapelyConnec.to_geojson) 627 if ntv_type == 'date': 628 return srs.astype(str) 629 return srs 630 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 631 return srs.apply(ShapelyConnec.to_geometry) 632 if ntv_type == 'geojson': 633 return srs.apply(ShapelyConnec.from_geojson) 634 if ntv_type == 'datetime': 635 return pd.to_datetime(srs) 636 if ntv_type == 'date': 637 return pd.to_datetime(srs).dt.date 638 if ntv_type == 'time': 639 return pd.to_datetime(srs, format='mixed').dt.time 640 return srs 641 642 @staticmethod 643 def ntv_type(name_type, dtype, table=False): 644 ''' return NTVtype from name_type and dtype of a Series . 645 646 *Parameters* 647 648 - **name_type** : string - type included in the Series name, 649 - **dtype** : string - dtype of the Series. 650 - **table** : boolean (default False) - True if Table Schema conversion 651 ''' 652 if not name_type: 653 types_none = SeriesConnec.types.set_index('name_type').loc[None] 654 if dtype in types_none.dtype.values: 655 return types_none.set_index('dtype').loc[dtype].ntv_type 656 if not table: 657 return None 658 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 659 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 660 return name_type 661 662 @staticmethod 663 def ntv_val(ntv_type, srs): 664 ''' convert a simple Series into NTV json-value. 665 666 *Parameters* 667 668 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 669 - **srs** : Series to be converted.''' 670 srs = PdUtil.convert(ntv_type, srs) 671 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 672 return srs.to_list() 673 if srs.dtype.name == 'object': 674 return srs.to_list() 675 return json.loads(srs.to_json(orient='records', 676 date_format='iso', default_handler=str)) 677 678 @staticmethod 679 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 680 '''return a list of values to convert in a Series''' 681 if pd_convert: 682 if name_type == 'array': 683 return ntv_codec.to_obj(format='obj', simpleval=True) 684 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 685 def_type=ntv_codec.type_str, fast=True) 686 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 687 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type) 688 689 @staticmethod 690 def ntv_table(table_format, table_type): 691 ''' return NTVtype from the TableSchema data. 692 693 *Parameters* 694 695 - **table_format** : string - TableSchema format, 696 - **table_type** : string - TableSchema type''' 697 return SeriesConnec.table.set_index(['type', 'format']).loc[ 698 (table_type, table_format)].values[0] 699 700 @staticmethod 701 def pd_index(dfr): 702 '''return a DataFrame with index''' 703 if 'index' in dfr.columns: 704 dfr = dfr.set_index('index') 705 dfr.index.rename(None, inplace=True) 706 return dfr 707 708 @staticmethod 709 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 710 '''return a tuple with the name of the Series, the type deduced from 711 the name and the dtype''' 712 ntv_name = '' if ntv_name is None else ntv_name 713 typtab = SeriesConnec.typtab.set_index('ntv_type') 714 types = SeriesConnec.types.set_index('ntv_type') 715 if table and ntv_type.lower() in typtab.index: 716 name_type = typtab.loc[ntv_type.lower()]['name_type'] 717 dtype = typtab.loc[ntv_type.lower()]['dtype'] 718 elif pd_convert or table: 719 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 720 dtype = types.loc[ntv_type]['dtype'] 721 else: 722 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 723 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 724 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 725 return (pd_name if pd_name else None, name_type, dtype) 726 727 @staticmethod 728 def unic(srs): 729 ''' return simple value if the Series contains a single value''' 730 if str(srs.dtype) == 'category': 731 return srs 732 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
ntv-pandas utilities.
This class includes static methods:
Ntv and pandas
- ntv_type: return NTVtype from name_type and dtype of a Series
- convert: convert Series with external NTVtype
- ntv_val: convert a simple Series into NTV json-value
- ntv_obj: return a list of values to convert in a Series
- pd_name: return a tuple with the name of the Series and the type deduced from the name
- pd_index: return a DataFrame with index
- unic: return simple value if the Series contains a single value
TableSchema
- to_obj_table: convert json TableSchema data into a DataFrame or a Series
- name_table: return a list of non index field's names from a json Table
- ntvtype_table: return a list of non index field's ntv_type from a json Table
- table_schema: add 'format' and 'type' keys in a Json TableSchema
- table_val: convert a Series into TableSchema json-value
- ntv_table: return NTVtype from the TableSchema data
540 @staticmethod 541 def to_obj_table(jsn, **kwargs): 542 ''' convert json TableSchema data into a DataFrame or a Series''' 543 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 544 name = PdUtil.name_table(jsn['schema']['fields']) 545 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 546 for nam, ntvtyp in zip(name, ntv_type)] 547 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 548 for nam, ntvtyp in zip(name, ntv_type)] 549 dfr = pd.read_json(StringIO(json.dumps(jsn['data'])), orient='record') 550 dfr = PdUtil.pd_index(dfr) 551 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 552 for ind, col in enumerate(dfr.columns)}) 553 dfr = dfr.astype({col: pd_dtype[ind] 554 for ind, col in enumerate(dfr.columns)}) 555 dfr.columns = pd_name 556 if len(dfr.columns) == 1: 557 return dfr[dfr.columns[0]] 558 return dfr
convert json TableSchema data into a DataFrame or a Series
560 @staticmethod 561 def decode_ntv_to_val(ntv): 562 ''' return a value from a ntv_field''' 563 if isinstance(ntv, NtvSingle): 564 return ntv.to_obj(simpleval=True) 565 return [ntv_val.to_obj() for ntv_val in ntv]
return a value from a ntv_field
567 @staticmethod 568 def name_table(fields): 569 '''return a list of non index field's names from a json Table''' 570 names = [field.get('name', None) for field in fields 571 if field.get('name', None) != 'index'] 572 return [None if name == 'values' else name for name in names]
return a list of non index field's names from a json Table
574 @staticmethod 575 def ntvtype_table(fields): 576 '''return a list of non index field's ntv_type from a json Table''' 577 return [PdUtil.ntv_table(field.get('format', 'default'), 578 field.get('type', None)) for field in fields 579 if field.get('name', None) != 'index']
return a list of non index field's ntv_type from a json Table
581 @staticmethod 582 def table_schema(schema, name, ntv_type): 583 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 584 for the field defined by 'name' ''' 585 ind = [field['name'] for field in schema['fields']].index(name) 586 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 587 if tabletype['format'] == 'default': 588 schema['fields'][ind].pop('format', None) 589 else: 590 schema['fields'][ind]['format'] = tabletype['format'] 591 schema['fields'][ind]['type'] = tabletype['type'] 592 schema['fields'][ind].pop('extDtype', None) 593 return schema
convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema for the field defined by 'name'
595 @staticmethod 596 def table_val(ntv_type, ntv_name, srs): 597 '''convert a Series into TableSchema json-value. 598 599 *Parameters* 600 601 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 602 - **ntv_name**: string - name of the Series 603 - **srs** : Series to be converted.''' 604 srs = PdUtil.convert(ntv_type, srs) 605 srs.name = ntv_name 606 tab_val = json.loads(srs.to_json(orient='table', 607 date_format='iso', default_handler=str)) 608 name = 'values' if srs.name is None else srs.name 609 tab_val['schema'] = PdUtil.table_schema( 610 tab_val['schema'], name, ntv_type) 611 return tab_val
convert a Series into TableSchema json-value.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- ntv_name: string - name of the Series
- srs : Series to be converted.
613 @staticmethod 614 def convert(ntv_type, srs, tojson=True): 615 ''' convert Series with external NTVtype. 616 617 *Parameters* 618 619 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 620 - **srs** : Series to be converted. 621 - **tojson** : boolean (default True) - apply to json function''' 622 if tojson: 623 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 624 return srs.apply(ShapelyConnec.to_coord) 625 if ntv_type == 'geojson': 626 return srs.apply(ShapelyConnec.to_geojson) 627 if ntv_type == 'date': 628 return srs.astype(str) 629 return srs 630 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 631 return srs.apply(ShapelyConnec.to_geometry) 632 if ntv_type == 'geojson': 633 return srs.apply(ShapelyConnec.from_geojson) 634 if ntv_type == 'datetime': 635 return pd.to_datetime(srs) 636 if ntv_type == 'date': 637 return pd.to_datetime(srs).dt.date 638 if ntv_type == 'time': 639 return pd.to_datetime(srs, format='mixed').dt.time 640 return srs
convert Series with external NTVtype.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- srs : Series to be converted.
- tojson : boolean (default True) - apply to json function
642 @staticmethod 643 def ntv_type(name_type, dtype, table=False): 644 ''' return NTVtype from name_type and dtype of a Series . 645 646 *Parameters* 647 648 - **name_type** : string - type included in the Series name, 649 - **dtype** : string - dtype of the Series. 650 - **table** : boolean (default False) - True if Table Schema conversion 651 ''' 652 if not name_type: 653 types_none = SeriesConnec.types.set_index('name_type').loc[None] 654 if dtype in types_none.dtype.values: 655 return types_none.set_index('dtype').loc[dtype].ntv_type 656 if not table: 657 return None 658 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 659 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 660 return name_type
return NTVtype from name_type and dtype of a Series .
Parameters
- name_type : string - type included in the Series name,
- dtype : string - dtype of the Series.
- table : boolean (default False) - True if Table Schema conversion
662 @staticmethod 663 def ntv_val(ntv_type, srs): 664 ''' convert a simple Series into NTV json-value. 665 666 *Parameters* 667 668 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 669 - **srs** : Series to be converted.''' 670 srs = PdUtil.convert(ntv_type, srs) 671 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 672 return srs.to_list() 673 if srs.dtype.name == 'object': 674 return srs.to_list() 675 return json.loads(srs.to_json(orient='records', 676 date_format='iso', default_handler=str))
convert a simple Series into NTV json-value.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- srs : Series to be converted.
678 @staticmethod 679 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 680 '''return a list of values to convert in a Series''' 681 if pd_convert: 682 if name_type == 'array': 683 return ntv_codec.to_obj(format='obj', simpleval=True) 684 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 685 def_type=ntv_codec.type_str, fast=True) 686 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 687 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
return a list of values to convert in a Series
689 @staticmethod 690 def ntv_table(table_format, table_type): 691 ''' return NTVtype from the TableSchema data. 692 693 *Parameters* 694 695 - **table_format** : string - TableSchema format, 696 - **table_type** : string - TableSchema type''' 697 return SeriesConnec.table.set_index(['type', 'format']).loc[ 698 (table_type, table_format)].values[0]
return NTVtype from the TableSchema data.
Parameters
- table_format : string - TableSchema format,
- table_type : string - TableSchema type
700 @staticmethod 701 def pd_index(dfr): 702 '''return a DataFrame with index''' 703 if 'index' in dfr.columns: 704 dfr = dfr.set_index('index') 705 dfr.index.rename(None, inplace=True) 706 return dfr
return a DataFrame with index
708 @staticmethod 709 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 710 '''return a tuple with the name of the Series, the type deduced from 711 the name and the dtype''' 712 ntv_name = '' if ntv_name is None else ntv_name 713 typtab = SeriesConnec.typtab.set_index('ntv_type') 714 types = SeriesConnec.types.set_index('ntv_type') 715 if table and ntv_type.lower() in typtab.index: 716 name_type = typtab.loc[ntv_type.lower()]['name_type'] 717 dtype = typtab.loc[ntv_type.lower()]['dtype'] 718 elif pd_convert or table: 719 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 720 dtype = types.loc[ntv_type]['dtype'] 721 else: 722 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 723 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 724 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 725 return (pd_name if pd_name else None, name_type, dtype)
return a tuple with the name of the Series, the type deduced from the name and the dtype
727 @staticmethod 728 def unic(srs): 729 ''' return simple value if the Series contains a single value''' 730 if str(srs.dtype) == 'category': 731 return srs 732 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
return simple value if the Series contains a single value