ntv-pandas.ntv_pandas
NTV-pandas Package
Created on Sept 2023
@author: philippe@loco-labs.io
This package contains the following classes and functions:
ntv-pandas.ntv_pandas.pandas_ntv_connector
:ntv-pandas.ntv_pandas.pandas_ntv_connector.DataFrameConnec
ntv-pandas.ntv_pandas.pandas_ntv_connector.SeriesConnec
ntv-pandas.ntv_pandas.pandas_ntv_connector.PdUtil
ntv-pandas.ntv_pandas.pandas_ntv_connector.to_json
ntv-pandas.ntv_pandas.pandas_ntv_connector.read_json
ntv-pandas.ntv_pandas.pandas_ntv_connector.analysis
ntv-pandas.ntv_pandas.pandas_ntv_connector.as_def_type
ntv-pandas.ntv_pandas.pandas_ntv_connector.equals
ntv-pandas.ntv_pandas.accessors
:ntv-pandas.ntv_pandas.accessors.NpdSeriesAccessor
ntv-pandas.ntv_pandas.accessors.NpdDataFrameAccessor
1# -*- coding: utf-8 -*- 2""" 3***NTV-pandas Package*** 4 5Created on Sept 2023 6 7@author: philippe@loco-labs.io 8 9This package contains the following classes and functions: 10 11- `ntv-pandas.ntv_pandas.pandas_ntv_connector` : 12 13 - `ntv-pandas.ntv_pandas.pandas_ntv_connector.DataFrameConnec` 14 - `ntv-pandas.ntv_pandas.pandas_ntv_connector.SeriesConnec` 15 - `ntv-pandas.ntv_pandas.pandas_ntv_connector.PdUtil` 16 - `ntv-pandas.ntv_pandas.pandas_ntv_connector.to_json` 17 - `ntv-pandas.ntv_pandas.pandas_ntv_connector.read_json` 18 - `ntv-pandas.ntv_pandas.pandas_ntv_connector.analysis` 19 - `ntv-pandas.ntv_pandas.pandas_ntv_connector.as_def_type` 20 - `ntv-pandas.ntv_pandas.pandas_ntv_connector.equals` 21 22- `ntv-pandas.ntv_pandas.accessors` : 23 24 - `ntv-pandas.ntv_pandas.accessors.NpdSeriesAccessor` 25 - `ntv-pandas.ntv_pandas.accessors.NpdDataFrameAccessor` 26""" 27 28from ntv_pandas.pandas_ntv_connector import DataFrameConnec, SeriesConnec, read_json 29from ntv_pandas.pandas_ntv_connector import to_json, as_def_type, equals, to_analysis 30from ntv_pandas.pandas_ntv_connector import from_xarray, from_scipp 31import ntv_pandas.pandas_accessors as pandas_accessors 32 33__all__ = [ 34 "DataFrameConnec", 35 "SeriesConnec", 36 "read_json", 37 "to_json", 38 "as_def_type", 39 "equals", 40 "to_analysis", 41 "from_xarray", 42 "from_scipp", 43 "pandas_accessors", 44]
239class DataFrameConnec(NtvConnector): 240 """NTV connector for pandas DataFrame. 241 242 One static methods is included: 243 244 - to_listidx: convert a DataFrame in categorical data 245 """ 246 247 clas_obj = "DataFrame" 248 clas_typ = "tab" 249 250 @staticmethod 251 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 252 """convert json ntv_value into a DataFrame. 253 254 *Parameters* 255 256 - **index** : list (default None) - list of index values, 257 - **alias** : boolean (default False) - if True, alias dtype else default dtype 258 - **annotated** : boolean (default False) - if True, NTV names are not included.""" 259 series = SeriesConnec.to_series 260 261 ntv = Ntv.fast(ntv_value) 262 lidx = [ 263 list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) for ntvf in ntv 264 ] 265 leng = max([idx[6] for idx in lidx]) 266 option = kwargs | {"leng": leng} 267 no_keys = [] 268 for ind, lind in enumerate(lidx): 269 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 270 NtvConnector.init_ntv_keys(ind, lidx, leng) 271 lind[2] = Ntv.fast( 272 Ntv.obj_ntv(lind[2], typ=lind[1], single=len(lind[2]) == 1) 273 ) 274 list_series = [ 275 series( 276 lidx[ind][2], 277 lidx[ind][0], 278 None if no_keys[ind] else lidx[ind][4], 279 **option, 280 ) 281 for ind in range(len(lidx)) 282 ] 283 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 284 return PdUtil.pd_index(dfr) 285 286 @staticmethod 287 def to_json_ntv(value, name=None, typ=None, **kwargs): 288 """convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 289 290 *Parameters* 291 292 - **typ** : string (default None) - type of the NTV object, 293 - **name** : string (default None) - name of the NTV object 294 - **value** : DataFrame values 295 - **table** : boolean (default False) - if True return TableSchema format 296 - **index** : boolean (default True) - if True the index Series is included 297 """ 298 table = kwargs.get("table", False) 299 index = kwargs.get("index", True) 300 if not table: 301 df2 = value.reset_index() if index else value 302 jsn = Ntv.obj( 303 [ 304 SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 305 for col in df2.columns 306 ] 307 ).to_obj() 308 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 309 df2 = pd.DataFrame( 310 { 311 NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 312 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 313 value[col], 314 ) 315 for col in value.columns 316 } 317 ) 318 table_val = json.loads( 319 df2.to_json(orient="table", date_format="iso", default_handler=str) 320 ) 321 for nam in value.columns: 322 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 323 value[nam], table=True, no_val=True 324 ) 325 table_val["schema"] = PdUtil.table_schema( 326 table_val["schema"], ntv_name, ntv_type 327 ) 328 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 329 330 @staticmethod 331 def to_listidx(dtf): 332 """convert a DataFrame in categorical data 333 334 *Return: tuple with:* 335 336 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 337 - **lenght** of the DataFrame""" 338 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf)) 339 340 @staticmethod 341 def equals(pdself, pdother): 342 """return True if columns are equals""" 343 if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)): 344 return False 345 if len(pdself.columns) != len(pdother.columns): 346 return False 347 for cself, cother in zip(pdself, pdother): 348 if not SeriesConnec.equals(pdself[cself], pdother[cother]): 349 return False 350 return True
NTV connector for pandas DataFrame.
One static methods is included:
- to_listidx: convert a DataFrame in categorical data
250 @staticmethod 251 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 252 """convert json ntv_value into a DataFrame. 253 254 *Parameters* 255 256 - **index** : list (default None) - list of index values, 257 - **alias** : boolean (default False) - if True, alias dtype else default dtype 258 - **annotated** : boolean (default False) - if True, NTV names are not included.""" 259 series = SeriesConnec.to_series 260 261 ntv = Ntv.fast(ntv_value) 262 lidx = [ 263 list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) for ntvf in ntv 264 ] 265 leng = max([idx[6] for idx in lidx]) 266 option = kwargs | {"leng": leng} 267 no_keys = [] 268 for ind, lind in enumerate(lidx): 269 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 270 NtvConnector.init_ntv_keys(ind, lidx, leng) 271 lind[2] = Ntv.fast( 272 Ntv.obj_ntv(lind[2], typ=lind[1], single=len(lind[2]) == 1) 273 ) 274 list_series = [ 275 series( 276 lidx[ind][2], 277 lidx[ind][0], 278 None if no_keys[ind] else lidx[ind][4], 279 **option, 280 ) 281 for ind in range(len(lidx)) 282 ] 283 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 284 return PdUtil.pd_index(dfr)
convert json ntv_value into a DataFrame.
Parameters
- index : list (default None) - list of index values,
- alias : boolean (default False) - if True, alias dtype else default dtype
- annotated : boolean (default False) - if True, NTV names are not included.
286 @staticmethod 287 def to_json_ntv(value, name=None, typ=None, **kwargs): 288 """convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 289 290 *Parameters* 291 292 - **typ** : string (default None) - type of the NTV object, 293 - **name** : string (default None) - name of the NTV object 294 - **value** : DataFrame values 295 - **table** : boolean (default False) - if True return TableSchema format 296 - **index** : boolean (default True) - if True the index Series is included 297 """ 298 table = kwargs.get("table", False) 299 index = kwargs.get("index", True) 300 if not table: 301 df2 = value.reset_index() if index else value 302 jsn = Ntv.obj( 303 [ 304 SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 305 for col in df2.columns 306 ] 307 ).to_obj() 308 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 309 df2 = pd.DataFrame( 310 { 311 NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 312 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 313 value[col], 314 ) 315 for col in value.columns 316 } 317 ) 318 table_val = json.loads( 319 df2.to_json(orient="table", date_format="iso", default_handler=str) 320 ) 321 for nam in value.columns: 322 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 323 value[nam], table=True, no_val=True 324 ) 325 table_val["schema"] = PdUtil.table_schema( 326 table_val["schema"], ntv_name, ntv_type 327 ) 328 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : DataFrame values
- table : boolean (default False) - if True return TableSchema format
- index : boolean (default True) - if True the index Series is included
330 @staticmethod 331 def to_listidx(dtf): 332 """convert a DataFrame in categorical data 333 334 *Return: tuple with:* 335 336 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 337 - **lenght** of the DataFrame""" 338 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
convert a DataFrame in categorical data
Return: tuple with:
- list of dict (keys : 'codec', 'name, 'keys') for each column
- lenght of the DataFrame
340 @staticmethod 341 def equals(pdself, pdother): 342 """return True if columns are equals""" 343 if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)): 344 return False 345 if len(pdself.columns) != len(pdother.columns): 346 return False 347 for cself, cother in zip(pdself, pdother): 348 if not SeriesConnec.equals(pdself[cself], pdother[cother]): 349 return False 350 return True
return True if columns are equals
Inherited Members
- json_ntv.ntv_util.NtvConnector
- DIC_NTV_CL
- DIC_GEO_CL
- DIC_DAT_CL
- DIC_FCT
- DIC_GEO
- DIC_CBOR
- DIC_OBJ
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- format_field
- init_ntv_keys
353class SeriesConnec(NtvConnector): 354 """NTV connector for pandas Series 355 356 Two static methods are included: 357 358 - to_idx: convert a Series in categorical data 359 - to_series: return a Series from Field data 360 """ 361 362 clas_obj = "Series" 363 clas_typ = "field" 364 config = configparser.ConfigParser() 365 config.read(path_ntv_pandas.joinpath("ntv_pandas.ini")) 366 types = pd.DataFrame( 367 json.loads(config["data"]["type"]), columns=json.loads(config["data"]["column"]) 368 ) 369 astype = json.loads(config["data"]["astype"]) 370 deftype = {val: key for key, val in astype.items()} 371 config = configparser.ConfigParser() 372 config.read(path_ntv_pandas.joinpath("ntv_table.ini")) 373 table = pd.DataFrame( 374 json.loads(config["data"]["mapping"]), 375 columns=json.loads(config["data"]["column"]), 376 ) 377 typtab = pd.DataFrame( 378 json.loads(config["data"]["type"]), 379 columns=json.loads(config["data"]["col_type"]), 380 ) 381 382 @staticmethod 383 def to_obj_ntv(ntv_value, **kwargs): 384 """Generate a Series Object from a Ntv field object 385 386 *Parameters* 387 388 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 389 390 *parameters (kwargs)* 391 392 - **extkeys**: list (default None) - keys to use if not present in ntv_value 393 - **decode_str**: boolean (default False) - if True, string values are converted 394 in object values 395 - **index**: list (default None) - if present, add the index in Series 396 - **leng**: integer (default None) - leng of the Series (used with single codec value) 397 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 398 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 399 """ 400 option = { 401 "extkeys": None, 402 "decode_str": False, 403 "leng": None, 404 "annotated": False, 405 } | kwargs 406 if ntv_value is None: 407 return None 408 ntv = Ntv.obj(ntv_value, decode_str=option["decode_str"]) 409 410 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = ( 411 NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val) 412 ) 413 if parent and not option["extkeys"]: 414 return None 415 if coef: 416 ntv_keys = NtvConnector.keysfromcoef( 417 coef, leng_field // coef, option["leng"] 418 ) 419 elif option["extkeys"] and parent: 420 ntv_keys = NtvConnector.keysfromderkeys(option["extkeys"], ntv_keys) 421 elif option["extkeys"] and not parent: 422 ntv_keys = option["extkeys"] 423 ntv_codec = Ntv.fast(Ntv.obj_ntv(codec, typ=typ, single=len(codec) == 1)) 424 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 425 426 @staticmethod 427 def to_json_ntv(value, name=None, typ=None, **kwargs): 428 """convert a Series (value, name, type) into NTV json (json-value, name, type). 429 430 *Parameters* 431 432 - **typ** : string (default None) - type of the NTV object, 433 - **name** : string (default None) - name of the NTV object 434 - **value** : Series values 435 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 436 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)""" 437 438 table = kwargs.get("table", False) 439 no_val = kwargs.get("no_val", False) 440 srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name)) 441 sr_name = srs.name if srs.name else "" 442 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 443 444 if table: 445 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 446 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 447 if no_val: 448 return (ntv_name, ntv_type) 449 return (ntv_value, ntv_name, ntv_type) 450 if srs.dtype.name == "category": 451 cdc = pd.Series(srs.cat.categories) 452 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 453 cat_value = PdUtil.ntv_val(ntv_type, cdc) 454 cat_value = NtvList(cat_value, ntv_type=ntv_type) 455 cod_value = list(srs.cat.codes) 456 coef = NtvConnector.encode_coef(cod_value) 457 ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)] 458 ntv_type = None 459 else: 460 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 461 ntv_value = Ntv.from_obj( 462 PdUtil.ntv_val(ntv_type, srs), def_type=ntv_type 463 ).ntv_value 464 if len(ntv_value) == 1: 465 ntv_value[0].set_name(ntv_name) 466 return ( 467 ntv_value[0].to_obj(), 468 name, 469 SeriesConnec.clas_typ if not typ else typ, 470 ) 471 return ( 472 NtvList(ntv_value, ntv_name, ntv_type).to_obj(), 473 name, 474 SeriesConnec.clas_typ if not typ else typ, 475 ) 476 477 @staticmethod 478 def to_idx(ser): 479 """convert a Series in categorical data 480 481 *return (dict)* 482 483 { 'codec': 'list of pandas categories', 484 'name': 'name of the series', 485 'keys': 'list of pandas codes' } 486 """ 487 idx = ser.astype("category") 488 lis = list(idx.cat.categories) 489 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 490 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) for ts in lis] 491 return {"codec": lis, "name": ser.name, "keys": list(idx.cat.codes)} 492 493 @staticmethod 494 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 495 """return a pd.Series from Field data (codec, name, keys) 496 497 *Parameters* 498 499 - **ntv_codec**: Ntv object - codec value to convert in Series values 500 - **ntv_type**: string - default type to apply to convert in dtype 501 - **ntv_name**: string - name of the Series 502 503 *parameters (kwargs)* 504 505 - **index**: list (default None) - if present, add the index in Series 506 - **leng**: integer (default None) - leng of the Series (used with single codec value) 507 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 508 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 509 """ 510 option = { 511 "index": None, 512 "leng": None, 513 "alias": False, 514 "annotated": False, 515 } | kwargs 516 types = SeriesConnec.types.set_index("ntv_type") 517 astype = SeriesConnec.astype 518 leng = option["leng"] 519 520 ntv_type = ntv_codec.type_str 521 len_unique = leng if len(ntv_codec) == 1 and leng else 1 522 pd_convert = ntv_type in types.index 523 524 pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert) 525 ntv_obj = PdUtil.ntv_obj( 526 ntv_codec, 527 name_type if pd_convert else ntv_type, 528 option["annotated"], 529 pd_convert, 530 ) 531 if ntv_keys: 532 if pd_convert and name_type != "array": 533 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 534 cat_type = categ.dtype.name 535 categories = categ.astype(astype.get(cat_type, cat_type)) 536 else: 537 categories = pd.Series(ntv_obj, dtype="object") 538 cat = pd.CategoricalDtype(categories=categories) 539 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 540 srs = pd.Series(data, name=pd_name, index=option["index"], dtype="category") 541 else: 542 data = ntv_obj * len_unique 543 if pd_convert: 544 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 545 else: 546 srs = pd.Series(data, name=pd_name, dtype=dtype) 547 548 if option["alias"]: 549 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 550 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 551 552 @staticmethod 553 def _from_json(data, dtype, ntv_type, pd_name=None): 554 """return a Series from a Json data. 555 556 *Parameters* 557 558 - **data**: Json-value - data to convert in a Series 559 - **dtype**: string - dtype of the Series 560 - **ntv_type**: string - default type to apply to convert in dtype 561 - **pd_name**: string - name of the Series including ntv_type 562 563 NTVvalue and a ntv_type""" 564 srs = pd.read_json(StringIO(json.dumps(data)), dtype=dtype, typ="series") 565 if pd_name is not None: 566 srs = srs.rename(pd_name) 567 return PdUtil.convert(ntv_type, srs, tojson=False) 568 569 @staticmethod 570 def equals(pdself, pdother): 571 """return True if pd.equals is True and names are equal and dtype of categories are equal""" 572 if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)): 573 return False 574 if pdself.name != pdother.name: 575 return False 576 type_cat = str(pdself.dtype) == str(pdother.dtype) == "category" 577 if type_cat: 578 return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories) 579 return as_def_type(pdself).equals(as_def_type(pdother))
NTV connector for pandas Series
Two static methods are included:
- to_idx: convert a Series in categorical data
- to_series: return a Series from Field data
382 @staticmethod 383 def to_obj_ntv(ntv_value, **kwargs): 384 """Generate a Series Object from a Ntv field object 385 386 *Parameters* 387 388 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 389 390 *parameters (kwargs)* 391 392 - **extkeys**: list (default None) - keys to use if not present in ntv_value 393 - **decode_str**: boolean (default False) - if True, string values are converted 394 in object values 395 - **index**: list (default None) - if present, add the index in Series 396 - **leng**: integer (default None) - leng of the Series (used with single codec value) 397 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 398 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 399 """ 400 option = { 401 "extkeys": None, 402 "decode_str": False, 403 "leng": None, 404 "annotated": False, 405 } | kwargs 406 if ntv_value is None: 407 return None 408 ntv = Ntv.obj(ntv_value, decode_str=option["decode_str"]) 409 410 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = ( 411 NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val) 412 ) 413 if parent and not option["extkeys"]: 414 return None 415 if coef: 416 ntv_keys = NtvConnector.keysfromcoef( 417 coef, leng_field // coef, option["leng"] 418 ) 419 elif option["extkeys"] and parent: 420 ntv_keys = NtvConnector.keysfromderkeys(option["extkeys"], ntv_keys) 421 elif option["extkeys"] and not parent: 422 ntv_keys = option["extkeys"] 423 ntv_codec = Ntv.fast(Ntv.obj_ntv(codec, typ=typ, single=len(codec) == 1)) 424 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
Generate a Series Object from a Ntv field object
Parameters
- ntv_value: Ntv object or Ntv value - value to convert in Series
parameters (kwargs)
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
426 @staticmethod 427 def to_json_ntv(value, name=None, typ=None, **kwargs): 428 """convert a Series (value, name, type) into NTV json (json-value, name, type). 429 430 *Parameters* 431 432 - **typ** : string (default None) - type of the NTV object, 433 - **name** : string (default None) - name of the NTV object 434 - **value** : Series values 435 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 436 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)""" 437 438 table = kwargs.get("table", False) 439 no_val = kwargs.get("no_val", False) 440 srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name)) 441 sr_name = srs.name if srs.name else "" 442 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 443 444 if table: 445 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 446 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 447 if no_val: 448 return (ntv_name, ntv_type) 449 return (ntv_value, ntv_name, ntv_type) 450 if srs.dtype.name == "category": 451 cdc = pd.Series(srs.cat.categories) 452 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 453 cat_value = PdUtil.ntv_val(ntv_type, cdc) 454 cat_value = NtvList(cat_value, ntv_type=ntv_type) 455 cod_value = list(srs.cat.codes) 456 coef = NtvConnector.encode_coef(cod_value) 457 ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)] 458 ntv_type = None 459 else: 460 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 461 ntv_value = Ntv.from_obj( 462 PdUtil.ntv_val(ntv_type, srs), def_type=ntv_type 463 ).ntv_value 464 if len(ntv_value) == 1: 465 ntv_value[0].set_name(ntv_name) 466 return ( 467 ntv_value[0].to_obj(), 468 name, 469 SeriesConnec.clas_typ if not typ else typ, 470 ) 471 return ( 472 NtvList(ntv_value, ntv_name, ntv_type).to_obj(), 473 name, 474 SeriesConnec.clas_typ if not typ else typ, 475 )
convert a Series (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : Series values
- table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
- no_val : boolean (default False) - if True return (ntv_name, ntv_type)
477 @staticmethod 478 def to_idx(ser): 479 """convert a Series in categorical data 480 481 *return (dict)* 482 483 { 'codec': 'list of pandas categories', 484 'name': 'name of the series', 485 'keys': 'list of pandas codes' } 486 """ 487 idx = ser.astype("category") 488 lis = list(idx.cat.categories) 489 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 490 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) for ts in lis] 491 return {"codec": lis, "name": ser.name, "keys": list(idx.cat.codes)}
convert a Series in categorical data
return (dict)
{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }
493 @staticmethod 494 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 495 """return a pd.Series from Field data (codec, name, keys) 496 497 *Parameters* 498 499 - **ntv_codec**: Ntv object - codec value to convert in Series values 500 - **ntv_type**: string - default type to apply to convert in dtype 501 - **ntv_name**: string - name of the Series 502 503 *parameters (kwargs)* 504 505 - **index**: list (default None) - if present, add the index in Series 506 - **leng**: integer (default None) - leng of the Series (used with single codec value) 507 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 508 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 509 """ 510 option = { 511 "index": None, 512 "leng": None, 513 "alias": False, 514 "annotated": False, 515 } | kwargs 516 types = SeriesConnec.types.set_index("ntv_type") 517 astype = SeriesConnec.astype 518 leng = option["leng"] 519 520 ntv_type = ntv_codec.type_str 521 len_unique = leng if len(ntv_codec) == 1 and leng else 1 522 pd_convert = ntv_type in types.index 523 524 pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert) 525 ntv_obj = PdUtil.ntv_obj( 526 ntv_codec, 527 name_type if pd_convert else ntv_type, 528 option["annotated"], 529 pd_convert, 530 ) 531 if ntv_keys: 532 if pd_convert and name_type != "array": 533 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 534 cat_type = categ.dtype.name 535 categories = categ.astype(astype.get(cat_type, cat_type)) 536 else: 537 categories = pd.Series(ntv_obj, dtype="object") 538 cat = pd.CategoricalDtype(categories=categories) 539 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 540 srs = pd.Series(data, name=pd_name, index=option["index"], dtype="category") 541 else: 542 data = ntv_obj * len_unique 543 if pd_convert: 544 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 545 else: 546 srs = pd.Series(data, name=pd_name, dtype=dtype) 547 548 if option["alias"]: 549 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 550 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
return a pd.Series from Field data (codec, name, keys)
Parameters
- ntv_codec: Ntv object - codec value to convert in Series values
- ntv_type: string - default type to apply to convert in dtype
- ntv_name: string - name of the Series
parameters (kwargs)
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
569 @staticmethod 570 def equals(pdself, pdother): 571 """return True if pd.equals is True and names are equal and dtype of categories are equal""" 572 if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)): 573 return False 574 if pdself.name != pdother.name: 575 return False 576 type_cat = str(pdself.dtype) == str(pdother.dtype) == "category" 577 if type_cat: 578 return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories) 579 return as_def_type(pdself).equals(as_def_type(pdother))
return True if pd.equals is True and names are equal and dtype of categories are equal
Inherited Members
- json_ntv.ntv_util.NtvConnector
- DIC_NTV_CL
- DIC_GEO_CL
- DIC_DAT_CL
- DIC_FCT
- DIC_GEO
- DIC_CBOR
- DIC_OBJ
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- format_field
- init_ntv_keys
98def read_json(jsn, **kwargs): 99 """convert JSON text or JSON Value to pandas Series or Dataframe. 100 101 *parameters* 102 103 - **jsn** : JSON text or JSON value to convert 104 - **extkeys**: list (default None) - keys to use if not present in ntv_value 105 - **decode_str**: boolean (default False) - if True, string values are converted 106 in object values 107 - **leng**: integer (default None) - leng of the Series (used with single codec value) 108 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 109 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 110 - **series**: boolean (default False) - used only without header. If True 111 JSON data is converted into Series else DataFrame 112 """ 113 option = { 114 "extkeys": None, 115 "decode_str": False, 116 "leng": None, 117 "alias": False, 118 "annotated": False, 119 "series": False, 120 } | kwargs 121 jso = json.loads(jsn) if isinstance(jsn, str) else jsn 122 if "schema" in jso: 123 return PdUtil.to_obj_table(jso, **option) 124 ntv = Ntv.from_obj(jso) 125 if ntv.type_str == "field": 126 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 127 if ntv.type_str == "tab": 128 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 129 if option["series"]: 130 return SeriesConnec.to_obj_ntv(ntv, **option) 131 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
convert JSON text or JSON Value to pandas Series or Dataframe.
parameters
- jsn : JSON text or JSON value to convert
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
- series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
186def to_json(pd_array, **kwargs): 187 """convert pandas Series or Dataframe to JSON text or JSON Value. 188 189 *parameters* 190 191 - **pd_array** : Series or Dataframe to convert 192 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 193 - **header** : boolean (default: True) - if True the JSON data is included as 194 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 195 - **table** : boolean (default False) - if True return TableSchema format 196 - **index** : boolean (default True) - if True the index Series is included 197 """ 198 option = {"encoded": False, "header": True, "table": False, "index": True} | kwargs 199 option["header"] = False if option["table"] else option["header"] 200 if isinstance(pd_array, pd.Series): 201 jsn = SeriesConnec.to_json_ntv(pd_array, table=option["table"])[0] 202 head = ":field" 203 else: 204 jsn = DataFrameConnec.to_json_ntv( 205 pd_array, table=option["table"], index=option["index"] 206 )[0] 207 head = ":tab" 208 if option["header"]: 209 jsn = {head: jsn} 210 if option["encoded"]: 211 return json.dumps(jsn) 212 return jsn
convert pandas Series or Dataframe to JSON text or JSON Value.
parameters
- pd_array : Series or Dataframe to convert
- encoded : boolean (default: False) - if True return a JSON text else a JSON value
- header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
- table : boolean (default False) - if True return TableSchema format
- index : boolean (default True) - if True the index Series is included
56def as_def_type(pd_array): 57 """convert a Series or DataFrame with default dtype""" 58 if isinstance(pd_array, (pd.Series, pd.Index)): 59 return pd_array.astype( 60 SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name) 61 ) 62 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
convert a Series or DataFrame with default dtype
89def equals(pdself, pdother): 90 """return True if pd.equals is True and names are equal and dtype of categories are equal""" 91 if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series): 92 return SeriesConnec.equals(pdself, pdother) 93 if isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame): 94 return DataFrameConnec.equals(pdself, pdother) 95 return False
return True if pd.equals is True and names are equal and dtype of categories are equal
151def to_analysis(pd_df, distr=False): 152 """return a dict with data used in AnaDataset module""" 153 154 keys = [list(pd_df[col].astype("category").cat.codes) for col in pd_df.columns] 155 lencodec = [len(set(key)) for key in keys] 156 if distr: 157 dist = [ 158 [_dist(keys[i], keys[j], distr) for j in range(i + 1, len(keys))] 159 for i in range(len(keys) - 1) 160 ] 161 else: 162 dist = [ 163 [len(set(zip(keys[i], keys[j]))) for j in range(i + 1, len(keys))] 164 for i in range(len(keys) - 1) 165 ] 166 return { 167 "fields": [ 168 { 169 "lencodec": lencodec[ind], 170 "id": pd_df.columns[ind], 171 "mincodec": lencodec[ind], 172 } 173 for ind in range(len(pd_df.columns)) 174 ], 175 "name": None, 176 "length": len(pd_df), 177 "relations": { 178 pd_df.columns[i]: { 179 pd_df.columns[j + i + 1]: dist[i][j] for j in range(len(dist[i])) 180 } 181 for i in range(len(dist)) 182 }, 183 }
return a dict with data used in AnaDataset module
215def from_xarray(xdt, **kwargs): 216 """convert xarray.Dataset to pandas DataFrame. 217 218 *Parameters* 219 220 - **ntv_type**: Boolean (default True) - if False use full_name else json_name 221 - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs 222 - **dims**: list of string (default None) - order of dimensions full_name to apply 223 """ 224 return Xdataset.from_xarray(xdt).to_dataframe(**kwargs)
convert xarray.Dataset to pandas DataFrame.
Parameters
- ntv_type: Boolean (default True) - if False use full_name else json_name
- info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
- dims: list of string (default None) - order of dimensions full_name to apply
227def from_scipp(sci, **kwargs): 228 """convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame. 229 230 *Parameters* 231 232 - **ntv_type**: Boolean (default True) - if False use full_name else json_name 233 - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs 234 - **dims**: list of string (default None) - order of dimensions full_name to apply 235 """ 236 return Xdataset.from_scipp(sci).to_dataframe(**kwargs)
convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame.
Parameters
- ntv_type: Boolean (default True) - if False use full_name else json_name
- info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
- dims: list of string (default None) - order of dimensions full_name to apply