ntv-pandas.ntv_pandas.pandas_ntv_connector
Created on Feb 27 2023
@author: Philippe@loco-labs.io
The pandas_ntv_connector
module is part of the ntv-pandas.ntv_pandas
package
(specification document).
A NtvConnector is defined by:
- clas_obj: str - define the class name of the object to convert
- clas_typ: str - define the NTVtype of the converted object
- to_obj_ntv: method - converter from JsonNTV to the object
- to_json_ntv: method - converter from the object to JsonNTV
It contains :
- functions
read_json
andto_json
to convert JSON data and pandas entities - function
to_analysis
to create data used by thetab_analysis
module - function
check_relation
to identify rows with inconsistent relationships (tab_dataset
function) functions
as_def_type
andequals
the child classes of
NTV.json_ntv.ntv.NtvConnector
abstract class:DataFrameConnec
: 'tab' connectorSeriesConnec
: 'field' connector
an utility class with static methods :
PdUtil
The functions to_json
, to_analysis
, check_relation
, as_def_type
and
equals
are used with the npd
accessor.
1# -*- coding: utf-8 -*- 2""" 3Created on Feb 27 2023 4 5@author: Philippe@loco-labs.io 6 7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package 8([specification document]( 9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)). 10 11A NtvConnector is defined by: 12- clas_obj: str - define the class name of the object to convert 13- clas_typ: str - define the NTVtype of the converted object 14- to_obj_ntv: method - converter from JsonNTV to the object 15- to_json_ntv: method - converter from the object to JsonNTV 16 17It contains : 18 19- functions `read_json` and `to_json` to convert JSON data and pandas entities 20- function `to_analysis` to create data used by the `tab_analysis` module 21- function `check_relation` to identify rows with inconsistent relationships (`tab_dataset` function) 22- functions `as_def_type` and `equals` 23 24- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class: 25 - `DataFrameConnec`: 'tab' connector 26 - `SeriesConnec`: 'field' connector 27 28- an utility class with static methods : `PdUtil` 29 30The functions `to_json`, `to_analysis`, `check_relation`, `as_def_type` and 31`equals` are used with the `npd` accessor. 32 33""" 34import os 35import datetime 36import json 37import configparser 38from pathlib import Path 39import pandas as pd 40import numpy as np 41 42 43from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle 44from json_ntv.ntv_util import NtvUtil 45from json_ntv.ntv_connector import ShapelyConnec 46from tab_dataset.cfield import Cfield 47 48path_ntv_pandas = Path(os.path.abspath(__file__)).parent 49 50def as_def_type(pd_array): 51 '''convert a Series or DataFrame with default dtype''' 52 if isinstance(pd_array, (pd.Series, pd.Index)): 53 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 54 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns}) 55 56def check_relation(pd_df, parent, child, typecoupl, value=True): 57 ''' Accessor for method `cdataset.Cdataset.check_relation` invoket as 58 `pd.DataFrame.npd.check_relation`. 59 Get the inconsistent records for a relationship. 60 61 *Parameters* 62 63 - **child** : str - name of the child Series involved in the relation 64 - **parent**: str - name of the parent Series involved in the relation 65 - **typecoupl**: str - relationship to check ('derived' or 'coupled') 66 - **value**: boolean (default True) - if True return a dict with inconsistent 67 values of the Series, else a tuple with index of records) 68 69 *Returns* : 70 71 - dict with inconsistent values of the Series 72 - or a tuple with row of records''' 73 parent_idx = SeriesConnec.to_idx(pd_df[parent]) 74 parent_field = Cfield(parent_idx['codec'], parent, parent_idx['keys']) 75 child_idx = SeriesConnec.to_idx(pd_df[child]) 76 child_field = Cfield(child_idx['codec'], child, child_idx['keys']) 77 return Cfield.check_relation(parent_field, child_field, typecoupl, value) 78 79 80def equals(pdself, pdother): 81 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 82 equ = True 83 if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series): 84 type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category' 85 if type_cat: 86 equ &= equals(pdself.cat.categories, pdother.cat.categories) 87 else: 88 equ &= as_def_type(pdself).equals(as_def_type(pdother)) 89 equ &= pdself.name == pdother.name 90 if not equ: 91 return False 92 elif isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame): 93 for cself, cother in zip(pdself, pdother): 94 equ &= equals(pdself[cself], pdother[cother]) 95 return equ 96 97def read_json(jsn, **kwargs): 98 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 99 100 *parameters* 101 102 - **jsn** : JSON text or JSON value to convert 103 - **extkeys**: list (default None) - keys to use if not present in ntv_value 104 - **decode_str**: boolean (default False) - if True, string values are converted 105 in object values 106 - **leng**: integer (default None) - leng of the Series (used with single codec value) 107 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 108 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 109 - **series**: boolean (default False) - used only without header. If True 110 JSON data is converted into Series else DataFrame 111 ''' 112 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 113 'annotated': False, 'series': False} | kwargs 114 jso = json.loads(jsn) if isinstance(jsn, str) else jsn 115 if 'schema' in jso: 116 return PdUtil.to_obj_table(jso, **option) 117 ntv = Ntv.from_obj(jso) 118 if ntv.type_str == 'field': 119 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 120 if ntv.type_str == 'tab': 121 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 122 if option['series']: 123 return SeriesConnec.to_obj_ntv(ntv, **option) 124 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 125 126def to_analysis(pd_df): 127 '''return a dict with data used in AnaDataset module''' 128 129 keys = [list(pd_df[col].astype('category').cat.codes) for col in pd_df.columns] 130 lencodec = [ len(set(key)) for key in keys] 131 dist = [[len(set(zip(keys[i], keys[j]))) 132 for j in range(i+1, len(keys))] 133 for i in range(len(keys)-1)] 134 return {'fields': [{'lencodec': lencodec[ind], 'id': pd_df.columns[ind], 135 'mincodec': lencodec[ind]} 136 for ind in range(len(pd_df.columns))], 137 'name': None, 'length': len(pd_df), 138 'relations': {pd_df.columns[i]: {pd_df.columns[j+i+1]: dist[i][j] 139 for j in range(len(dist[i]))} for i in range(len(dist))}} 140 141def to_json(pd_array, **kwargs): 142 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 143 144 *parameters* 145 146 - **pd_array** : Series or Dataframe to convert 147 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 148 - **header** : boolean (default: True) - if True the JSON data is included as 149 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 150 - **table** : boolean (default False) - if True return TableSchema format 151 ''' 152 option = {'encoded': False, 'header': True, 'table': False} | kwargs 153 option['header'] = False if option['table'] else option['header'] 154 if isinstance(pd_array, pd.Series): 155 jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0] 156 head = ':field' 157 else: 158 jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0] 159 head = ':tab' 160 if option['header']: 161 jsn = {head: jsn} 162 if option['encoded']: 163 return json.dumps(jsn) 164 return jsn 165 166 167class DataFrameConnec(NtvConnector): 168 169 '''NTV connector for pandas DataFrame. 170 171 One static methods is included: 172 173 - to_listidx: convert a DataFrame in categorical data 174 ''' 175 176 clas_obj = 'DataFrame' 177 clas_typ = 'tab' 178 179 @staticmethod 180 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 181 ''' convert json ntv_value into a DataFrame. 182 183 *Parameters* 184 185 - **index** : list (default None) - list of index values, 186 - **alias** : boolean (default False) - if True, alias dtype else default dtype 187 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 188 series = SeriesConnec.to_series 189 190 ntv = Ntv.fast(ntv_value) 191 lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) 192 for ntvf in ntv] 193 leng = max([idx[6] for idx in lidx]) 194 option = kwargs | {'leng': leng} 195 no_keys = [] 196 for ind in range(len(lidx)): 197 lind = lidx[ind] 198 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 199 NtvConnector.init_ntv_keys(ind, lidx, leng) 200 lind[2] = Ntv.fast(Ntv.obj_ntv( 201 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 202 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 203 else lidx[ind][4], **option) for ind in range(len(lidx))] 204 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 205 return PdUtil.pd_index(dfr) 206 207 @staticmethod 208 def to_json_ntv(value, name=None, typ=None, **kwargs): 209 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 210 211 *Parameters* 212 213 - **typ** : string (default None) - type of the NTV object, 214 - **name** : string (default None) - name of the NTV object 215 - **value** : DataFrame values 216 - **table** : boolean (default False) - if True return TableSchema format''' 217 218 table = kwargs.get('table', False) 219 if not table: 220 df2 = value.reset_index() 221 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 222 for col in df2.columns]).to_obj() 223 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 224 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 225 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 226 value[col]) for col in value.columns}) 227 table_val = json.loads(df2.to_json(orient='table', 228 date_format='iso', default_handler=str)) 229 for nam in value.columns: 230 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 231 value[nam], table=True, no_val=True) 232 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 233 ntv_name, ntv_type) 234 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 235 236 @staticmethod 237 def to_listidx(dtf): 238 ''' convert a DataFrame in categorical data 239 240 *Return: tuple with:* 241 242 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 243 - **lenght** of the DataFrame''' 244 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf)) 245 246 247class SeriesConnec(NtvConnector): 248 '''NTV connector for pandas Series 249 250 Two static methods are included: 251 252 - to_idx: convert a Series in categorical data 253 - to_series: return a Series from Field data 254 ''' 255 clas_obj = 'Series' 256 clas_typ = 'field' 257 config = configparser.ConfigParser() 258 # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini')) 259 config.read(path_ntv_pandas.joinpath('ntv_pandas.ini')) 260 types = pd.DataFrame(json.loads(config['data']['type']), 261 columns=json.loads(config['data']['column'])) 262 astype = json.loads(config['data']['astype']) 263 deftype = {val: key for key, val in astype.items()} 264 config = configparser.ConfigParser() 265 # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini')) 266 config.read(path_ntv_pandas.joinpath('ntv_table.ini')) 267 table = pd.DataFrame(json.loads(config['data']['mapping']), 268 columns=json.loads(config['data']['column'])) 269 typtab = pd.DataFrame(json.loads(config['data']['type']), 270 columns=json.loads(config['data']['col_type'])) 271 272 @staticmethod 273 def to_obj_ntv(ntv_value, **kwargs): 274 '''Generate a Series Object from a Ntv field object 275 276 *Parameters* 277 278 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 279 280 *parameters (kwargs)* 281 282 - **extkeys**: list (default None) - keys to use if not present in ntv_value 283 - **decode_str**: boolean (default False) - if True, string values are converted 284 in object values 285 - **index**: list (default None) - if present, add the index in Series 286 - **leng**: integer (default None) - leng of the Series (used with single codec value) 287 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 288 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 289 ''' 290 option = {'extkeys': None, 'decode_str': False, 'leng': None, 291 'annotated': False} | kwargs 292 if ntv_value is None: 293 return None 294 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 295 296 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 297 NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val) 298 if parent and not option['extkeys']: 299 return None 300 if coef: 301 ntv_keys = NtvConnector.keysfromcoef( 302 coef, leng_field//coef, option['leng']) 303 elif option['extkeys'] and parent: 304 ntv_keys = NtvConnector.keysfromderkeys( 305 option['extkeys'], ntv_keys) 306 elif option['extkeys'] and not parent: 307 ntv_keys = option['extkeys'] 308 ntv_codec = Ntv.fast(Ntv.obj_ntv( 309 codec, typ=typ, single=len(codec) == 1)) 310 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 311 312 @staticmethod 313 def to_json_ntv(value, name=None, typ=None, **kwargs): 314 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 315 316 *Parameters* 317 318 - **typ** : string (default None) - type of the NTV object, 319 - **name** : string (default None) - name of the NTV object 320 - **value** : Series values 321 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 322 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 323 324 table = kwargs.get('table', False) 325 no_val = kwargs.get('no_val', False) 326 srs = value.astype(SeriesConnec.astype.get( 327 value.dtype.name, value.dtype.name)) 328 sr_name = srs.name if srs.name else '' 329 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 330 331 if table: 332 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 333 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 334 if no_val: 335 return (ntv_name, ntv_type) 336 return (ntv_value, ntv_name, ntv_type) 337 if srs.dtype.name == 'category': 338 cdc = pd.Series(srs.cat.categories) 339 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 340 cat_value = PdUtil.ntv_val(ntv_type, cdc) 341 cat_value = NtvList(cat_value, ntv_type=ntv_type) 342 cod_value = list(srs.cat.codes) 343 coef = NtvConnector.encode_coef(cod_value) 344 ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)] 345 ntv_type = None 346 else: 347 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 348 ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 349 def_type=ntv_type).ntv_value 350 if len(ntv_value) == 1: 351 ntv_value[0].set_name(ntv_name) 352 return (ntv_value[0].to_obj(), name, 353 SeriesConnec.clas_typ if not typ else typ) 354 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 355 SeriesConnec.clas_typ if not typ else typ) 356 357 @staticmethod 358 def to_idx(ser): 359 ''' convert a Series in categorical data 360 361 *return (dict)* 362 363 { 'codec': 'list of pandas categories', 364 'name': 'name of the series', 365 'keys': 'list of pandas codes' } 366 ''' 367 idx = ser.astype('category') 368 lis = list(idx.cat.categories) 369 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 370 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 371 for ts in lis] 372 return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)} 373 374 @staticmethod 375 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 376 ''' return a pd.Series from Field data (codec, name, keys) 377 378 *Parameters* 379 380 - **ntv_codec**: Ntv object - codec value to convert in Series values 381 - **ntv_type**: string - default type to apply to convert in dtype 382 - **ntv_name**: string - name of the Series 383 384 *parameters (kwargs)* 385 386 - **index**: list (default None) - if present, add the index in Series 387 - **leng**: integer (default None) - leng of the Series (used with single codec value) 388 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 389 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 390 ''' 391 option = {'index': None, 'leng': None, 'alias': False, 392 'annotated': False} | kwargs 393 types = SeriesConnec.types.set_index('ntv_type') 394 astype = SeriesConnec.astype 395 leng = option['leng'] 396 397 ntv_type = ntv_codec.type_str 398 len_unique = leng if len(ntv_codec) == 1 and leng else 1 399 pd_convert = ntv_type in types.index 400 401 pd_name, name_type, dtype = PdUtil.pd_name( 402 ntv_name, ntv_type, pd_convert) 403 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 404 option['annotated'], pd_convert) 405 if ntv_keys: 406 if pd_convert and name_type != 'array': 407 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 408 cat_type = categ.dtype.name 409 categories = categ.astype(astype.get(cat_type, cat_type)) 410 else: 411 categories = pd.Series(ntv_obj, dtype='object') 412 cat = pd.CategoricalDtype(categories=categories) 413 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 414 srs = pd.Series(data, name=pd_name, 415 index=option['index'], dtype='category') 416 else: 417 data = ntv_obj * len_unique 418 if pd_convert: 419 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 420 else: 421 srs = pd.Series(data, name=pd_name, dtype=dtype) 422 423 if option['alias']: 424 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 425 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 426 427 @staticmethod 428 def _from_json(data, dtype, ntv_type, pd_name=None): 429 '''return a Series from a Json data. 430 431 *Parameters* 432 433 - **data**: Json-value - data to convert in a Series 434 - **dtype**: string - dtype of the Series 435 - **ntv_type**: string - default type to apply to convert in dtype 436 - **pd_name**: string - name of the Series including ntv_type 437 438 NTVvalue and a ntv_type''' 439 srs = pd.read_json(json.dumps(data), dtype=dtype, typ='series') 440 if not pd_name is None: 441 srs = srs.rename(pd_name) 442 return PdUtil.convert(ntv_type, srs, tojson=False) 443 444 445class PdUtil: 446 '''ntv-pandas utilities. 447 448 This class includes static methods: 449 450 Ntv and pandas 451 - **ntv_type**: return NTVtype from name_type and dtype of a Series 452 - **convert**: convert Series with external NTVtype 453 - **ntv_val**: convert a simple Series into NTV json-value 454 - **ntv_obj**: return a list of values to convert in a Series 455 - **pd_name**: return a tuple with the name of the Series and the type deduced from the name 456 - **pd_index**: return a DataFrame with index 457 - **unic**: return simple value if the Series contains a single value 458 459 TableSchema 460 - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series 461 - **name_table**: return a list of non index field's names from a json Table 462 - **ntvtype_table**: return a list of non index field's ntv_type from a json Table 463 - **table_schema**: add 'format' and 'type' keys in a Json TableSchema 464 - **table_val**: convert a Series into TableSchema json-value 465 - **ntv_table**: return NTVtype from the TableSchema data 466 ''' 467 @staticmethod 468 def to_obj_table(jsn, **kwargs): 469 ''' convert json TableSchema data into a DataFrame or a Series''' 470 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 471 name = PdUtil.name_table(jsn['schema']['fields']) 472 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 473 for nam, ntvtyp in zip(name, ntv_type)] 474 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 475 for nam, ntvtyp in zip(name, ntv_type)] 476 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 477 dfr = PdUtil.pd_index(dfr) 478 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 479 for ind, col in enumerate(dfr.columns)}) 480 dfr = dfr.astype({col: pd_dtype[ind] 481 for ind, col in enumerate(dfr.columns)}) 482 dfr.columns = pd_name 483 if len(dfr.columns) == 1: 484 return dfr[dfr.columns[0]] 485 return dfr 486 487 @staticmethod 488 def decode_ntv_to_val(ntv): 489 ''' return a value from a ntv_field''' 490 if isinstance(ntv, NtvSingle): 491 return ntv.to_obj(simpleval=True) 492 return [ntv_val.to_obj() for ntv_val in ntv] 493 494 @staticmethod 495 def name_table(fields): 496 '''return a list of non index field's names from a json Table''' 497 names = [field.get('name', None) for field in fields 498 if field.get('name', None) != 'index'] 499 return [None if name == 'values' else name for name in names] 500 501 @staticmethod 502 def ntvtype_table(fields): 503 '''return a list of non index field's ntv_type from a json Table''' 504 return [PdUtil.ntv_table(field.get('format', 'default'), 505 field.get('type', None)) for field in fields 506 if field.get('name', None) != 'index'] 507 508 @staticmethod 509 def table_schema(schema, name, ntv_type): 510 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 511 for the field defined by 'name' ''' 512 ind = [field['name'] for field in schema['fields']].index(name) 513 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 514 if tabletype['format'] == 'default': 515 schema['fields'][ind].pop('format', None) 516 else: 517 schema['fields'][ind]['format'] = tabletype['format'] 518 schema['fields'][ind]['type'] = tabletype['type'] 519 schema['fields'][ind].pop('extDtype', None) 520 return schema 521 522 @staticmethod 523 def table_val(ntv_type, ntv_name, srs): 524 '''convert a Series into TableSchema json-value. 525 526 *Parameters* 527 528 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 529 - **ntv_name**: string - name of the Series 530 - **srs** : Series to be converted.''' 531 srs = PdUtil.convert(ntv_type, srs) 532 srs.name = ntv_name 533 tab_val = json.loads(srs.to_json(orient='table', 534 date_format='iso', default_handler=str)) 535 name = 'values' if srs.name is None else srs.name 536 tab_val['schema'] = PdUtil.table_schema( 537 tab_val['schema'], name, ntv_type) 538 return tab_val 539 540 @staticmethod 541 def convert(ntv_type, srs, tojson=True): 542 ''' convert Series with external NTVtype. 543 544 *Parameters* 545 546 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 547 - **srs** : Series to be converted. 548 - **tojson** : boolean (default True) - apply to json function''' 549 if tojson: 550 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 551 return srs.apply(ShapelyConnec.to_coord) 552 if ntv_type == 'geojson': 553 return srs.apply(ShapelyConnec.to_geojson) 554 if ntv_type == 'date': 555 return srs.astype(str) 556 return srs 557 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 558 return srs.apply(ShapelyConnec.to_geometry) 559 if ntv_type == 'geojson': 560 return srs.apply(ShapelyConnec.from_geojson) 561 if ntv_type == 'datetime': 562 return pd.to_datetime(srs) 563 if ntv_type == 'date': 564 return pd.to_datetime(srs).dt.date 565 if ntv_type == 'time': 566 return pd.to_datetime(srs).dt.time 567 return srs 568 569 @staticmethod 570 def ntv_type(name_type, dtype, table=False): 571 ''' return NTVtype from name_type and dtype of a Series . 572 573 *Parameters* 574 575 - **name_type** : string - type included in the Series name, 576 - **dtype** : string - dtype of the Series. 577 - **table** : boolean (default False) - True if Table Schema conversion 578 ''' 579 if not name_type: 580 types_none = SeriesConnec.types.set_index('name_type').loc[None] 581 if dtype in types_none.dtype.values: 582 return types_none.set_index('dtype').loc[dtype].ntv_type 583 if not table: 584 return None 585 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 586 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 587 return name_type 588 589 @staticmethod 590 def ntv_val(ntv_type, srs): 591 ''' convert a simple Series into NTV json-value. 592 593 *Parameters* 594 595 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 596 - **srs** : Series to be *converted.''' 597 srs = PdUtil.convert(ntv_type, srs) 598 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 599 return srs.to_list() 600 if srs.dtype.name == 'object': 601 return srs.to_list() 602 return json.loads(srs.to_json(orient='records', 603 date_format='iso', default_handler=str)) 604 605 @staticmethod 606 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 607 '''return a list of values to convert in a Series''' 608 if pd_convert: 609 if name_type == 'array': 610 return ntv_codec.to_obj(format='obj', simpleval=True) 611 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 612 def_type=ntv_codec.type_str, fast=True) 613 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 614 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type) 615 616 @staticmethod 617 def ntv_table(table_format, table_type): 618 ''' return NTVtype from the TableSchema data. 619 620 *Parameters* 621 622 - **table_format** : string - TableSchema format, 623 - **table_type** : string - TableSchema type''' 624 return SeriesConnec.table.set_index(['type', 'format']).loc[ 625 (table_type, table_format)].values[0] 626 627 @staticmethod 628 def pd_index(dfr): 629 '''return a DataFrame with index''' 630 if 'index' in dfr.columns: 631 dfr = dfr.set_index('index') 632 dfr.index.rename(None, inplace=True) 633 return dfr 634 635 @staticmethod 636 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 637 '''return a tuple with the name of the Series, the type deduced from 638 the name and the dtype''' 639 ntv_name = '' if ntv_name is None else ntv_name 640 typtab = SeriesConnec.typtab.set_index('ntv_type') 641 types = SeriesConnec.types.set_index('ntv_type') 642 if table and ntv_type.lower() in typtab.index: 643 name_type = typtab.loc[ntv_type.lower()]['name_type'] 644 dtype = typtab.loc[ntv_type.lower()]['dtype'] 645 elif pd_convert or table: 646 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 647 dtype = types.loc[ntv_type]['dtype'] 648 else: 649 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 650 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 651 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 652 return (pd_name if pd_name else None, name_type, dtype) 653 654 @staticmethod 655 def unic(srs): 656 ''' return simple value if the Series contains a single value''' 657 if str(srs.dtype) == 'category': 658 return srs 659 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
51def as_def_type(pd_array): 52 '''convert a Series or DataFrame with default dtype''' 53 if isinstance(pd_array, (pd.Series, pd.Index)): 54 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 55 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
convert a Series or DataFrame with default dtype
57def check_relation(pd_df, parent, child, typecoupl, value=True): 58 ''' Accessor for method `cdataset.Cdataset.check_relation` invoket as 59 `pd.DataFrame.npd.check_relation`. 60 Get the inconsistent records for a relationship. 61 62 *Parameters* 63 64 - **child** : str - name of the child Series involved in the relation 65 - **parent**: str - name of the parent Series involved in the relation 66 - **typecoupl**: str - relationship to check ('derived' or 'coupled') 67 - **value**: boolean (default True) - if True return a dict with inconsistent 68 values of the Series, else a tuple with index of records) 69 70 *Returns* : 71 72 - dict with inconsistent values of the Series 73 - or a tuple with row of records''' 74 parent_idx = SeriesConnec.to_idx(pd_df[parent]) 75 parent_field = Cfield(parent_idx['codec'], parent, parent_idx['keys']) 76 child_idx = SeriesConnec.to_idx(pd_df[child]) 77 child_field = Cfield(child_idx['codec'], child, child_idx['keys']) 78 return Cfield.check_relation(parent_field, child_field, typecoupl, value)
Accessor for method cdataset.Cdataset.check_relation
invoket as
pd.DataFrame.npd.check_relation
.
Get the inconsistent records for a relationship.
Parameters
- child : str - name of the child Series involved in the relation
- parent: str - name of the parent Series involved in the relation
- typecoupl: str - relationship to check ('derived' or 'coupled')
- value: boolean (default True) - if True return a dict with inconsistent values of the Series, else a tuple with index of records)
Returns :
- dict with inconsistent values of the Series
- or a tuple with row of records
81def equals(pdself, pdother): 82 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 83 equ = True 84 if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series): 85 type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category' 86 if type_cat: 87 equ &= equals(pdself.cat.categories, pdother.cat.categories) 88 else: 89 equ &= as_def_type(pdself).equals(as_def_type(pdother)) 90 equ &= pdself.name == pdother.name 91 if not equ: 92 return False 93 elif isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame): 94 for cself, cother in zip(pdself, pdother): 95 equ &= equals(pdself[cself], pdother[cother]) 96 return equ
return True if pd.equals is True and names are equal and dtype of categories are equal
98def read_json(jsn, **kwargs): 99 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 100 101 *parameters* 102 103 - **jsn** : JSON text or JSON value to convert 104 - **extkeys**: list (default None) - keys to use if not present in ntv_value 105 - **decode_str**: boolean (default False) - if True, string values are converted 106 in object values 107 - **leng**: integer (default None) - leng of the Series (used with single codec value) 108 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 109 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 110 - **series**: boolean (default False) - used only without header. If True 111 JSON data is converted into Series else DataFrame 112 ''' 113 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 114 'annotated': False, 'series': False} | kwargs 115 jso = json.loads(jsn) if isinstance(jsn, str) else jsn 116 if 'schema' in jso: 117 return PdUtil.to_obj_table(jso, **option) 118 ntv = Ntv.from_obj(jso) 119 if ntv.type_str == 'field': 120 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 121 if ntv.type_str == 'tab': 122 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 123 if option['series']: 124 return SeriesConnec.to_obj_ntv(ntv, **option) 125 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
convert JSON text or JSON Value to pandas Series or Dataframe.
parameters
- jsn : JSON text or JSON value to convert
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
- series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
127def to_analysis(pd_df): 128 '''return a dict with data used in AnaDataset module''' 129 130 keys = [list(pd_df[col].astype('category').cat.codes) for col in pd_df.columns] 131 lencodec = [ len(set(key)) for key in keys] 132 dist = [[len(set(zip(keys[i], keys[j]))) 133 for j in range(i+1, len(keys))] 134 for i in range(len(keys)-1)] 135 return {'fields': [{'lencodec': lencodec[ind], 'id': pd_df.columns[ind], 136 'mincodec': lencodec[ind]} 137 for ind in range(len(pd_df.columns))], 138 'name': None, 'length': len(pd_df), 139 'relations': {pd_df.columns[i]: {pd_df.columns[j+i+1]: dist[i][j] 140 for j in range(len(dist[i]))} for i in range(len(dist))}}
return a dict with data used in AnaDataset module
142def to_json(pd_array, **kwargs): 143 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 144 145 *parameters* 146 147 - **pd_array** : Series or Dataframe to convert 148 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 149 - **header** : boolean (default: True) - if True the JSON data is included as 150 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 151 - **table** : boolean (default False) - if True return TableSchema format 152 ''' 153 option = {'encoded': False, 'header': True, 'table': False} | kwargs 154 option['header'] = False if option['table'] else option['header'] 155 if isinstance(pd_array, pd.Series): 156 jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0] 157 head = ':field' 158 else: 159 jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0] 160 head = ':tab' 161 if option['header']: 162 jsn = {head: jsn} 163 if option['encoded']: 164 return json.dumps(jsn) 165 return jsn
convert pandas Series or Dataframe to JSON text or JSON Value.
parameters
- pd_array : Series or Dataframe to convert
- encoded : boolean (default: False) - if True return a JSON text else a JSON value
- header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
- table : boolean (default False) - if True return TableSchema format
168class DataFrameConnec(NtvConnector): 169 170 '''NTV connector for pandas DataFrame. 171 172 One static methods is included: 173 174 - to_listidx: convert a DataFrame in categorical data 175 ''' 176 177 clas_obj = 'DataFrame' 178 clas_typ = 'tab' 179 180 @staticmethod 181 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 182 ''' convert json ntv_value into a DataFrame. 183 184 *Parameters* 185 186 - **index** : list (default None) - list of index values, 187 - **alias** : boolean (default False) - if True, alias dtype else default dtype 188 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 189 series = SeriesConnec.to_series 190 191 ntv = Ntv.fast(ntv_value) 192 lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) 193 for ntvf in ntv] 194 leng = max([idx[6] for idx in lidx]) 195 option = kwargs | {'leng': leng} 196 no_keys = [] 197 for ind in range(len(lidx)): 198 lind = lidx[ind] 199 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 200 NtvConnector.init_ntv_keys(ind, lidx, leng) 201 lind[2] = Ntv.fast(Ntv.obj_ntv( 202 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 203 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 204 else lidx[ind][4], **option) for ind in range(len(lidx))] 205 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 206 return PdUtil.pd_index(dfr) 207 208 @staticmethod 209 def to_json_ntv(value, name=None, typ=None, **kwargs): 210 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 211 212 *Parameters* 213 214 - **typ** : string (default None) - type of the NTV object, 215 - **name** : string (default None) - name of the NTV object 216 - **value** : DataFrame values 217 - **table** : boolean (default False) - if True return TableSchema format''' 218 219 table = kwargs.get('table', False) 220 if not table: 221 df2 = value.reset_index() 222 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 223 for col in df2.columns]).to_obj() 224 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 225 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 226 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 227 value[col]) for col in value.columns}) 228 table_val = json.loads(df2.to_json(orient='table', 229 date_format='iso', default_handler=str)) 230 for nam in value.columns: 231 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 232 value[nam], table=True, no_val=True) 233 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 234 ntv_name, ntv_type) 235 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 236 237 @staticmethod 238 def to_listidx(dtf): 239 ''' convert a DataFrame in categorical data 240 241 *Return: tuple with:* 242 243 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 244 - **lenght** of the DataFrame''' 245 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
NTV connector for pandas DataFrame.
One static methods is included:
- to_listidx: convert a DataFrame in categorical data
180 @staticmethod 181 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 182 ''' convert json ntv_value into a DataFrame. 183 184 *Parameters* 185 186 - **index** : list (default None) - list of index values, 187 - **alias** : boolean (default False) - if True, alias dtype else default dtype 188 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 189 series = SeriesConnec.to_series 190 191 ntv = Ntv.fast(ntv_value) 192 lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) 193 for ntvf in ntv] 194 leng = max([idx[6] for idx in lidx]) 195 option = kwargs | {'leng': leng} 196 no_keys = [] 197 for ind in range(len(lidx)): 198 lind = lidx[ind] 199 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 200 NtvConnector.init_ntv_keys(ind, lidx, leng) 201 lind[2] = Ntv.fast(Ntv.obj_ntv( 202 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 203 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 204 else lidx[ind][4], **option) for ind in range(len(lidx))] 205 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 206 return PdUtil.pd_index(dfr)
convert json ntv_value into a DataFrame.
Parameters
- index : list (default None) - list of index values,
- alias : boolean (default False) - if True, alias dtype else default dtype
- annotated : boolean (default False) - if True, NTV names are not included.
208 @staticmethod 209 def to_json_ntv(value, name=None, typ=None, **kwargs): 210 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 211 212 *Parameters* 213 214 - **typ** : string (default None) - type of the NTV object, 215 - **name** : string (default None) - name of the NTV object 216 - **value** : DataFrame values 217 - **table** : boolean (default False) - if True return TableSchema format''' 218 219 table = kwargs.get('table', False) 220 if not table: 221 df2 = value.reset_index() 222 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 223 for col in df2.columns]).to_obj() 224 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 225 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 226 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 227 value[col]) for col in value.columns}) 228 table_val = json.loads(df2.to_json(orient='table', 229 date_format='iso', default_handler=str)) 230 for nam in value.columns: 231 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 232 value[nam], table=True, no_val=True) 233 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 234 ntv_name, ntv_type) 235 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : DataFrame values
- table : boolean (default False) - if True return TableSchema format
237 @staticmethod 238 def to_listidx(dtf): 239 ''' convert a DataFrame in categorical data 240 241 *Return: tuple with:* 242 243 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 244 - **lenght** of the DataFrame''' 245 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
convert a DataFrame in categorical data
Return: tuple with:
- list of dict (keys : 'codec', 'name, 'keys') for each column
- lenght of the DataFrame
Inherited Members
- json_ntv.ntv_util.NtvConnector
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- init_ntv_keys
248class SeriesConnec(NtvConnector): 249 '''NTV connector for pandas Series 250 251 Two static methods are included: 252 253 - to_idx: convert a Series in categorical data 254 - to_series: return a Series from Field data 255 ''' 256 clas_obj = 'Series' 257 clas_typ = 'field' 258 config = configparser.ConfigParser() 259 # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini')) 260 config.read(path_ntv_pandas.joinpath('ntv_pandas.ini')) 261 types = pd.DataFrame(json.loads(config['data']['type']), 262 columns=json.loads(config['data']['column'])) 263 astype = json.loads(config['data']['astype']) 264 deftype = {val: key for key, val in astype.items()} 265 config = configparser.ConfigParser() 266 # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini')) 267 config.read(path_ntv_pandas.joinpath('ntv_table.ini')) 268 table = pd.DataFrame(json.loads(config['data']['mapping']), 269 columns=json.loads(config['data']['column'])) 270 typtab = pd.DataFrame(json.loads(config['data']['type']), 271 columns=json.loads(config['data']['col_type'])) 272 273 @staticmethod 274 def to_obj_ntv(ntv_value, **kwargs): 275 '''Generate a Series Object from a Ntv field object 276 277 *Parameters* 278 279 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 280 281 *parameters (kwargs)* 282 283 - **extkeys**: list (default None) - keys to use if not present in ntv_value 284 - **decode_str**: boolean (default False) - if True, string values are converted 285 in object values 286 - **index**: list (default None) - if present, add the index in Series 287 - **leng**: integer (default None) - leng of the Series (used with single codec value) 288 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 289 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 290 ''' 291 option = {'extkeys': None, 'decode_str': False, 'leng': None, 292 'annotated': False} | kwargs 293 if ntv_value is None: 294 return None 295 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 296 297 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 298 NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val) 299 if parent and not option['extkeys']: 300 return None 301 if coef: 302 ntv_keys = NtvConnector.keysfromcoef( 303 coef, leng_field//coef, option['leng']) 304 elif option['extkeys'] and parent: 305 ntv_keys = NtvConnector.keysfromderkeys( 306 option['extkeys'], ntv_keys) 307 elif option['extkeys'] and not parent: 308 ntv_keys = option['extkeys'] 309 ntv_codec = Ntv.fast(Ntv.obj_ntv( 310 codec, typ=typ, single=len(codec) == 1)) 311 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 312 313 @staticmethod 314 def to_json_ntv(value, name=None, typ=None, **kwargs): 315 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 316 317 *Parameters* 318 319 - **typ** : string (default None) - type of the NTV object, 320 - **name** : string (default None) - name of the NTV object 321 - **value** : Series values 322 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 323 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 324 325 table = kwargs.get('table', False) 326 no_val = kwargs.get('no_val', False) 327 srs = value.astype(SeriesConnec.astype.get( 328 value.dtype.name, value.dtype.name)) 329 sr_name = srs.name if srs.name else '' 330 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 331 332 if table: 333 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 334 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 335 if no_val: 336 return (ntv_name, ntv_type) 337 return (ntv_value, ntv_name, ntv_type) 338 if srs.dtype.name == 'category': 339 cdc = pd.Series(srs.cat.categories) 340 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 341 cat_value = PdUtil.ntv_val(ntv_type, cdc) 342 cat_value = NtvList(cat_value, ntv_type=ntv_type) 343 cod_value = list(srs.cat.codes) 344 coef = NtvConnector.encode_coef(cod_value) 345 ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)] 346 ntv_type = None 347 else: 348 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 349 ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 350 def_type=ntv_type).ntv_value 351 if len(ntv_value) == 1: 352 ntv_value[0].set_name(ntv_name) 353 return (ntv_value[0].to_obj(), name, 354 SeriesConnec.clas_typ if not typ else typ) 355 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 356 SeriesConnec.clas_typ if not typ else typ) 357 358 @staticmethod 359 def to_idx(ser): 360 ''' convert a Series in categorical data 361 362 *return (dict)* 363 364 { 'codec': 'list of pandas categories', 365 'name': 'name of the series', 366 'keys': 'list of pandas codes' } 367 ''' 368 idx = ser.astype('category') 369 lis = list(idx.cat.categories) 370 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 371 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 372 for ts in lis] 373 return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)} 374 375 @staticmethod 376 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 377 ''' return a pd.Series from Field data (codec, name, keys) 378 379 *Parameters* 380 381 - **ntv_codec**: Ntv object - codec value to convert in Series values 382 - **ntv_type**: string - default type to apply to convert in dtype 383 - **ntv_name**: string - name of the Series 384 385 *parameters (kwargs)* 386 387 - **index**: list (default None) - if present, add the index in Series 388 - **leng**: integer (default None) - leng of the Series (used with single codec value) 389 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 390 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 391 ''' 392 option = {'index': None, 'leng': None, 'alias': False, 393 'annotated': False} | kwargs 394 types = SeriesConnec.types.set_index('ntv_type') 395 astype = SeriesConnec.astype 396 leng = option['leng'] 397 398 ntv_type = ntv_codec.type_str 399 len_unique = leng if len(ntv_codec) == 1 and leng else 1 400 pd_convert = ntv_type in types.index 401 402 pd_name, name_type, dtype = PdUtil.pd_name( 403 ntv_name, ntv_type, pd_convert) 404 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 405 option['annotated'], pd_convert) 406 if ntv_keys: 407 if pd_convert and name_type != 'array': 408 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 409 cat_type = categ.dtype.name 410 categories = categ.astype(astype.get(cat_type, cat_type)) 411 else: 412 categories = pd.Series(ntv_obj, dtype='object') 413 cat = pd.CategoricalDtype(categories=categories) 414 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 415 srs = pd.Series(data, name=pd_name, 416 index=option['index'], dtype='category') 417 else: 418 data = ntv_obj * len_unique 419 if pd_convert: 420 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 421 else: 422 srs = pd.Series(data, name=pd_name, dtype=dtype) 423 424 if option['alias']: 425 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 426 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 427 428 @staticmethod 429 def _from_json(data, dtype, ntv_type, pd_name=None): 430 '''return a Series from a Json data. 431 432 *Parameters* 433 434 - **data**: Json-value - data to convert in a Series 435 - **dtype**: string - dtype of the Series 436 - **ntv_type**: string - default type to apply to convert in dtype 437 - **pd_name**: string - name of the Series including ntv_type 438 439 NTVvalue and a ntv_type''' 440 srs = pd.read_json(json.dumps(data), dtype=dtype, typ='series') 441 if not pd_name is None: 442 srs = srs.rename(pd_name) 443 return PdUtil.convert(ntv_type, srs, tojson=False)
NTV connector for pandas Series
Two static methods are included:
- to_idx: convert a Series in categorical data
- to_series: return a Series from Field data
273 @staticmethod 274 def to_obj_ntv(ntv_value, **kwargs): 275 '''Generate a Series Object from a Ntv field object 276 277 *Parameters* 278 279 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 280 281 *parameters (kwargs)* 282 283 - **extkeys**: list (default None) - keys to use if not present in ntv_value 284 - **decode_str**: boolean (default False) - if True, string values are converted 285 in object values 286 - **index**: list (default None) - if present, add the index in Series 287 - **leng**: integer (default None) - leng of the Series (used with single codec value) 288 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 289 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 290 ''' 291 option = {'extkeys': None, 'decode_str': False, 'leng': None, 292 'annotated': False} | kwargs 293 if ntv_value is None: 294 return None 295 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 296 297 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 298 NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val) 299 if parent and not option['extkeys']: 300 return None 301 if coef: 302 ntv_keys = NtvConnector.keysfromcoef( 303 coef, leng_field//coef, option['leng']) 304 elif option['extkeys'] and parent: 305 ntv_keys = NtvConnector.keysfromderkeys( 306 option['extkeys'], ntv_keys) 307 elif option['extkeys'] and not parent: 308 ntv_keys = option['extkeys'] 309 ntv_codec = Ntv.fast(Ntv.obj_ntv( 310 codec, typ=typ, single=len(codec) == 1)) 311 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
Generate a Series Object from a Ntv field object
Parameters
- ntv_value: Ntv object or Ntv value - value to convert in Series
parameters (kwargs)
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
313 @staticmethod 314 def to_json_ntv(value, name=None, typ=None, **kwargs): 315 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 316 317 *Parameters* 318 319 - **typ** : string (default None) - type of the NTV object, 320 - **name** : string (default None) - name of the NTV object 321 - **value** : Series values 322 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 323 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 324 325 table = kwargs.get('table', False) 326 no_val = kwargs.get('no_val', False) 327 srs = value.astype(SeriesConnec.astype.get( 328 value.dtype.name, value.dtype.name)) 329 sr_name = srs.name if srs.name else '' 330 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 331 332 if table: 333 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 334 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 335 if no_val: 336 return (ntv_name, ntv_type) 337 return (ntv_value, ntv_name, ntv_type) 338 if srs.dtype.name == 'category': 339 cdc = pd.Series(srs.cat.categories) 340 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 341 cat_value = PdUtil.ntv_val(ntv_type, cdc) 342 cat_value = NtvList(cat_value, ntv_type=ntv_type) 343 cod_value = list(srs.cat.codes) 344 coef = NtvConnector.encode_coef(cod_value) 345 ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)] 346 ntv_type = None 347 else: 348 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 349 ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 350 def_type=ntv_type).ntv_value 351 if len(ntv_value) == 1: 352 ntv_value[0].set_name(ntv_name) 353 return (ntv_value[0].to_obj(), name, 354 SeriesConnec.clas_typ if not typ else typ) 355 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 356 SeriesConnec.clas_typ if not typ else typ)
convert a Series (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : Series values
- table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
- no_val : boolean (default False) - if True return (ntv_name, ntv_type)
358 @staticmethod 359 def to_idx(ser): 360 ''' convert a Series in categorical data 361 362 *return (dict)* 363 364 { 'codec': 'list of pandas categories', 365 'name': 'name of the series', 366 'keys': 'list of pandas codes' } 367 ''' 368 idx = ser.astype('category') 369 lis = list(idx.cat.categories) 370 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 371 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 372 for ts in lis] 373 return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)}
convert a Series in categorical data
return (dict)
{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }
375 @staticmethod 376 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 377 ''' return a pd.Series from Field data (codec, name, keys) 378 379 *Parameters* 380 381 - **ntv_codec**: Ntv object - codec value to convert in Series values 382 - **ntv_type**: string - default type to apply to convert in dtype 383 - **ntv_name**: string - name of the Series 384 385 *parameters (kwargs)* 386 387 - **index**: list (default None) - if present, add the index in Series 388 - **leng**: integer (default None) - leng of the Series (used with single codec value) 389 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 390 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 391 ''' 392 option = {'index': None, 'leng': None, 'alias': False, 393 'annotated': False} | kwargs 394 types = SeriesConnec.types.set_index('ntv_type') 395 astype = SeriesConnec.astype 396 leng = option['leng'] 397 398 ntv_type = ntv_codec.type_str 399 len_unique = leng if len(ntv_codec) == 1 and leng else 1 400 pd_convert = ntv_type in types.index 401 402 pd_name, name_type, dtype = PdUtil.pd_name( 403 ntv_name, ntv_type, pd_convert) 404 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 405 option['annotated'], pd_convert) 406 if ntv_keys: 407 if pd_convert and name_type != 'array': 408 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 409 cat_type = categ.dtype.name 410 categories = categ.astype(astype.get(cat_type, cat_type)) 411 else: 412 categories = pd.Series(ntv_obj, dtype='object') 413 cat = pd.CategoricalDtype(categories=categories) 414 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 415 srs = pd.Series(data, name=pd_name, 416 index=option['index'], dtype='category') 417 else: 418 data = ntv_obj * len_unique 419 if pd_convert: 420 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 421 else: 422 srs = pd.Series(data, name=pd_name, dtype=dtype) 423 424 if option['alias']: 425 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 426 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
return a pd.Series from Field data (codec, name, keys)
Parameters
- ntv_codec: Ntv object - codec value to convert in Series values
- ntv_type: string - default type to apply to convert in dtype
- ntv_name: string - name of the Series
parameters (kwargs)
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
Inherited Members
- json_ntv.ntv_util.NtvConnector
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- init_ntv_keys
446class PdUtil: 447 '''ntv-pandas utilities. 448 449 This class includes static methods: 450 451 Ntv and pandas 452 - **ntv_type**: return NTVtype from name_type and dtype of a Series 453 - **convert**: convert Series with external NTVtype 454 - **ntv_val**: convert a simple Series into NTV json-value 455 - **ntv_obj**: return a list of values to convert in a Series 456 - **pd_name**: return a tuple with the name of the Series and the type deduced from the name 457 - **pd_index**: return a DataFrame with index 458 - **unic**: return simple value if the Series contains a single value 459 460 TableSchema 461 - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series 462 - **name_table**: return a list of non index field's names from a json Table 463 - **ntvtype_table**: return a list of non index field's ntv_type from a json Table 464 - **table_schema**: add 'format' and 'type' keys in a Json TableSchema 465 - **table_val**: convert a Series into TableSchema json-value 466 - **ntv_table**: return NTVtype from the TableSchema data 467 ''' 468 @staticmethod 469 def to_obj_table(jsn, **kwargs): 470 ''' convert json TableSchema data into a DataFrame or a Series''' 471 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 472 name = PdUtil.name_table(jsn['schema']['fields']) 473 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 474 for nam, ntvtyp in zip(name, ntv_type)] 475 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 476 for nam, ntvtyp in zip(name, ntv_type)] 477 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 478 dfr = PdUtil.pd_index(dfr) 479 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 480 for ind, col in enumerate(dfr.columns)}) 481 dfr = dfr.astype({col: pd_dtype[ind] 482 for ind, col in enumerate(dfr.columns)}) 483 dfr.columns = pd_name 484 if len(dfr.columns) == 1: 485 return dfr[dfr.columns[0]] 486 return dfr 487 488 @staticmethod 489 def decode_ntv_to_val(ntv): 490 ''' return a value from a ntv_field''' 491 if isinstance(ntv, NtvSingle): 492 return ntv.to_obj(simpleval=True) 493 return [ntv_val.to_obj() for ntv_val in ntv] 494 495 @staticmethod 496 def name_table(fields): 497 '''return a list of non index field's names from a json Table''' 498 names = [field.get('name', None) for field in fields 499 if field.get('name', None) != 'index'] 500 return [None if name == 'values' else name for name in names] 501 502 @staticmethod 503 def ntvtype_table(fields): 504 '''return a list of non index field's ntv_type from a json Table''' 505 return [PdUtil.ntv_table(field.get('format', 'default'), 506 field.get('type', None)) for field in fields 507 if field.get('name', None) != 'index'] 508 509 @staticmethod 510 def table_schema(schema, name, ntv_type): 511 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 512 for the field defined by 'name' ''' 513 ind = [field['name'] for field in schema['fields']].index(name) 514 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 515 if tabletype['format'] == 'default': 516 schema['fields'][ind].pop('format', None) 517 else: 518 schema['fields'][ind]['format'] = tabletype['format'] 519 schema['fields'][ind]['type'] = tabletype['type'] 520 schema['fields'][ind].pop('extDtype', None) 521 return schema 522 523 @staticmethod 524 def table_val(ntv_type, ntv_name, srs): 525 '''convert a Series into TableSchema json-value. 526 527 *Parameters* 528 529 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 530 - **ntv_name**: string - name of the Series 531 - **srs** : Series to be converted.''' 532 srs = PdUtil.convert(ntv_type, srs) 533 srs.name = ntv_name 534 tab_val = json.loads(srs.to_json(orient='table', 535 date_format='iso', default_handler=str)) 536 name = 'values' if srs.name is None else srs.name 537 tab_val['schema'] = PdUtil.table_schema( 538 tab_val['schema'], name, ntv_type) 539 return tab_val 540 541 @staticmethod 542 def convert(ntv_type, srs, tojson=True): 543 ''' convert Series with external NTVtype. 544 545 *Parameters* 546 547 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 548 - **srs** : Series to be converted. 549 - **tojson** : boolean (default True) - apply to json function''' 550 if tojson: 551 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 552 return srs.apply(ShapelyConnec.to_coord) 553 if ntv_type == 'geojson': 554 return srs.apply(ShapelyConnec.to_geojson) 555 if ntv_type == 'date': 556 return srs.astype(str) 557 return srs 558 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 559 return srs.apply(ShapelyConnec.to_geometry) 560 if ntv_type == 'geojson': 561 return srs.apply(ShapelyConnec.from_geojson) 562 if ntv_type == 'datetime': 563 return pd.to_datetime(srs) 564 if ntv_type == 'date': 565 return pd.to_datetime(srs).dt.date 566 if ntv_type == 'time': 567 return pd.to_datetime(srs).dt.time 568 return srs 569 570 @staticmethod 571 def ntv_type(name_type, dtype, table=False): 572 ''' return NTVtype from name_type and dtype of a Series . 573 574 *Parameters* 575 576 - **name_type** : string - type included in the Series name, 577 - **dtype** : string - dtype of the Series. 578 - **table** : boolean (default False) - True if Table Schema conversion 579 ''' 580 if not name_type: 581 types_none = SeriesConnec.types.set_index('name_type').loc[None] 582 if dtype in types_none.dtype.values: 583 return types_none.set_index('dtype').loc[dtype].ntv_type 584 if not table: 585 return None 586 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 587 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 588 return name_type 589 590 @staticmethod 591 def ntv_val(ntv_type, srs): 592 ''' convert a simple Series into NTV json-value. 593 594 *Parameters* 595 596 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 597 - **srs** : Series to be *converted.''' 598 srs = PdUtil.convert(ntv_type, srs) 599 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 600 return srs.to_list() 601 if srs.dtype.name == 'object': 602 return srs.to_list() 603 return json.loads(srs.to_json(orient='records', 604 date_format='iso', default_handler=str)) 605 606 @staticmethod 607 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 608 '''return a list of values to convert in a Series''' 609 if pd_convert: 610 if name_type == 'array': 611 return ntv_codec.to_obj(format='obj', simpleval=True) 612 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 613 def_type=ntv_codec.type_str, fast=True) 614 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 615 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type) 616 617 @staticmethod 618 def ntv_table(table_format, table_type): 619 ''' return NTVtype from the TableSchema data. 620 621 *Parameters* 622 623 - **table_format** : string - TableSchema format, 624 - **table_type** : string - TableSchema type''' 625 return SeriesConnec.table.set_index(['type', 'format']).loc[ 626 (table_type, table_format)].values[0] 627 628 @staticmethod 629 def pd_index(dfr): 630 '''return a DataFrame with index''' 631 if 'index' in dfr.columns: 632 dfr = dfr.set_index('index') 633 dfr.index.rename(None, inplace=True) 634 return dfr 635 636 @staticmethod 637 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 638 '''return a tuple with the name of the Series, the type deduced from 639 the name and the dtype''' 640 ntv_name = '' if ntv_name is None else ntv_name 641 typtab = SeriesConnec.typtab.set_index('ntv_type') 642 types = SeriesConnec.types.set_index('ntv_type') 643 if table and ntv_type.lower() in typtab.index: 644 name_type = typtab.loc[ntv_type.lower()]['name_type'] 645 dtype = typtab.loc[ntv_type.lower()]['dtype'] 646 elif pd_convert or table: 647 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 648 dtype = types.loc[ntv_type]['dtype'] 649 else: 650 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 651 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 652 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 653 return (pd_name if pd_name else None, name_type, dtype) 654 655 @staticmethod 656 def unic(srs): 657 ''' return simple value if the Series contains a single value''' 658 if str(srs.dtype) == 'category': 659 return srs 660 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
ntv-pandas utilities.
This class includes static methods:
Ntv and pandas
- ntv_type: return NTVtype from name_type and dtype of a Series
- convert: convert Series with external NTVtype
- ntv_val: convert a simple Series into NTV json-value
- ntv_obj: return a list of values to convert in a Series
- pd_name: return a tuple with the name of the Series and the type deduced from the name
- pd_index: return a DataFrame with index
- unic: return simple value if the Series contains a single value
TableSchema
- to_obj_table: convert json TableSchema data into a DataFrame or a Series
- name_table: return a list of non index field's names from a json Table
- ntvtype_table: return a list of non index field's ntv_type from a json Table
- table_schema: add 'format' and 'type' keys in a Json TableSchema
- table_val: convert a Series into TableSchema json-value
- ntv_table: return NTVtype from the TableSchema data
468 @staticmethod 469 def to_obj_table(jsn, **kwargs): 470 ''' convert json TableSchema data into a DataFrame or a Series''' 471 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 472 name = PdUtil.name_table(jsn['schema']['fields']) 473 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 474 for nam, ntvtyp in zip(name, ntv_type)] 475 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 476 for nam, ntvtyp in zip(name, ntv_type)] 477 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 478 dfr = PdUtil.pd_index(dfr) 479 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 480 for ind, col in enumerate(dfr.columns)}) 481 dfr = dfr.astype({col: pd_dtype[ind] 482 for ind, col in enumerate(dfr.columns)}) 483 dfr.columns = pd_name 484 if len(dfr.columns) == 1: 485 return dfr[dfr.columns[0]] 486 return dfr
convert json TableSchema data into a DataFrame or a Series
488 @staticmethod 489 def decode_ntv_to_val(ntv): 490 ''' return a value from a ntv_field''' 491 if isinstance(ntv, NtvSingle): 492 return ntv.to_obj(simpleval=True) 493 return [ntv_val.to_obj() for ntv_val in ntv]
return a value from a ntv_field
495 @staticmethod 496 def name_table(fields): 497 '''return a list of non index field's names from a json Table''' 498 names = [field.get('name', None) for field in fields 499 if field.get('name', None) != 'index'] 500 return [None if name == 'values' else name for name in names]
return a list of non index field's names from a json Table
502 @staticmethod 503 def ntvtype_table(fields): 504 '''return a list of non index field's ntv_type from a json Table''' 505 return [PdUtil.ntv_table(field.get('format', 'default'), 506 field.get('type', None)) for field in fields 507 if field.get('name', None) != 'index']
return a list of non index field's ntv_type from a json Table
509 @staticmethod 510 def table_schema(schema, name, ntv_type): 511 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 512 for the field defined by 'name' ''' 513 ind = [field['name'] for field in schema['fields']].index(name) 514 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 515 if tabletype['format'] == 'default': 516 schema['fields'][ind].pop('format', None) 517 else: 518 schema['fields'][ind]['format'] = tabletype['format'] 519 schema['fields'][ind]['type'] = tabletype['type'] 520 schema['fields'][ind].pop('extDtype', None) 521 return schema
convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema for the field defined by 'name'
523 @staticmethod 524 def table_val(ntv_type, ntv_name, srs): 525 '''convert a Series into TableSchema json-value. 526 527 *Parameters* 528 529 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 530 - **ntv_name**: string - name of the Series 531 - **srs** : Series to be converted.''' 532 srs = PdUtil.convert(ntv_type, srs) 533 srs.name = ntv_name 534 tab_val = json.loads(srs.to_json(orient='table', 535 date_format='iso', default_handler=str)) 536 name = 'values' if srs.name is None else srs.name 537 tab_val['schema'] = PdUtil.table_schema( 538 tab_val['schema'], name, ntv_type) 539 return tab_val
convert a Series into TableSchema json-value.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- ntv_name: string - name of the Series
- srs : Series to be converted.
541 @staticmethod 542 def convert(ntv_type, srs, tojson=True): 543 ''' convert Series with external NTVtype. 544 545 *Parameters* 546 547 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 548 - **srs** : Series to be converted. 549 - **tojson** : boolean (default True) - apply to json function''' 550 if tojson: 551 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 552 return srs.apply(ShapelyConnec.to_coord) 553 if ntv_type == 'geojson': 554 return srs.apply(ShapelyConnec.to_geojson) 555 if ntv_type == 'date': 556 return srs.astype(str) 557 return srs 558 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 559 return srs.apply(ShapelyConnec.to_geometry) 560 if ntv_type == 'geojson': 561 return srs.apply(ShapelyConnec.from_geojson) 562 if ntv_type == 'datetime': 563 return pd.to_datetime(srs) 564 if ntv_type == 'date': 565 return pd.to_datetime(srs).dt.date 566 if ntv_type == 'time': 567 return pd.to_datetime(srs).dt.time 568 return srs
convert Series with external NTVtype.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- srs : Series to be converted.
- tojson : boolean (default True) - apply to json function
570 @staticmethod 571 def ntv_type(name_type, dtype, table=False): 572 ''' return NTVtype from name_type and dtype of a Series . 573 574 *Parameters* 575 576 - **name_type** : string - type included in the Series name, 577 - **dtype** : string - dtype of the Series. 578 - **table** : boolean (default False) - True if Table Schema conversion 579 ''' 580 if not name_type: 581 types_none = SeriesConnec.types.set_index('name_type').loc[None] 582 if dtype in types_none.dtype.values: 583 return types_none.set_index('dtype').loc[dtype].ntv_type 584 if not table: 585 return None 586 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 587 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 588 return name_type
return NTVtype from name_type and dtype of a Series .
Parameters
- name_type : string - type included in the Series name,
- dtype : string - dtype of the Series.
- table : boolean (default False) - True if Table Schema conversion
590 @staticmethod 591 def ntv_val(ntv_type, srs): 592 ''' convert a simple Series into NTV json-value. 593 594 *Parameters* 595 596 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 597 - **srs** : Series to be *converted.''' 598 srs = PdUtil.convert(ntv_type, srs) 599 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 600 return srs.to_list() 601 if srs.dtype.name == 'object': 602 return srs.to_list() 603 return json.loads(srs.to_json(orient='records', 604 date_format='iso', default_handler=str))
convert a simple Series into NTV json-value.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- srs : Series to be *converted.
606 @staticmethod 607 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 608 '''return a list of values to convert in a Series''' 609 if pd_convert: 610 if name_type == 'array': 611 return ntv_codec.to_obj(format='obj', simpleval=True) 612 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 613 def_type=ntv_codec.type_str, fast=True) 614 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 615 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
return a list of values to convert in a Series
617 @staticmethod 618 def ntv_table(table_format, table_type): 619 ''' return NTVtype from the TableSchema data. 620 621 *Parameters* 622 623 - **table_format** : string - TableSchema format, 624 - **table_type** : string - TableSchema type''' 625 return SeriesConnec.table.set_index(['type', 'format']).loc[ 626 (table_type, table_format)].values[0]
return NTVtype from the TableSchema data.
Parameters
- table_format : string - TableSchema format,
- table_type : string - TableSchema type
628 @staticmethod 629 def pd_index(dfr): 630 '''return a DataFrame with index''' 631 if 'index' in dfr.columns: 632 dfr = dfr.set_index('index') 633 dfr.index.rename(None, inplace=True) 634 return dfr
return a DataFrame with index
636 @staticmethod 637 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 638 '''return a tuple with the name of the Series, the type deduced from 639 the name and the dtype''' 640 ntv_name = '' if ntv_name is None else ntv_name 641 typtab = SeriesConnec.typtab.set_index('ntv_type') 642 types = SeriesConnec.types.set_index('ntv_type') 643 if table and ntv_type.lower() in typtab.index: 644 name_type = typtab.loc[ntv_type.lower()]['name_type'] 645 dtype = typtab.loc[ntv_type.lower()]['dtype'] 646 elif pd_convert or table: 647 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 648 dtype = types.loc[ntv_type]['dtype'] 649 else: 650 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 651 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 652 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 653 return (pd_name if pd_name else None, name_type, dtype)
return a tuple with the name of the Series, the type deduced from the name and the dtype
655 @staticmethod 656 def unic(srs): 657 ''' return simple value if the Series contains a single value''' 658 if str(srs.dtype) == 'category': 659 return srs 660 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
return simple value if the Series contains a single value