ntv-pandas.ntv_pandas.pandas_ntv_connector
Created on Feb 27 2023
@author: Philippe@loco-labs.io
The pandas_ntv_connector
module is part of the ntv-pandas.ntv_pandas
package
(specification document).
A NtvConnector is defined by:
- clas_obj: str - define the class name of the object to convert
- clas_typ: str - define the NTVtype of the converted object
- to_obj_ntv: method - converter from JsonNTV to the object
- to_json_ntv: method - converter from the object to JsonNTV
It contains :
functions
read_json
andto_json
to convert JSON data and pandas entitiesthe child classes of
NTV.json_ntv.ntv.NtvConnector
abstract class:DataFrameConnec
: 'tab' connectorSeriesConnec
: 'field' connector
an utility class with static methods :
PdUtil
1# -*- coding: utf-8 -*- 2""" 3Created on Feb 27 2023 4 5@author: Philippe@loco-labs.io 6 7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package 8([specification document]( 9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)). 10 11A NtvConnector is defined by: 12- clas_obj: str - define the class name of the object to convert 13- clas_typ: str - define the NTVtype of the converted object 14- to_obj_ntv: method - converter from JsonNTV to the object 15- to_json_ntv: method - converter from the object to JsonNTV 16 17It contains : 18 19- functions `read_json` and `to_json` to convert JSON data and pandas entities 20 21- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class: 22 - `DataFrameConnec`: 'tab' connector 23 - `SeriesConnec`: 'field' connector 24 25- an utility class with static methods : `PdUtil` 26""" 27import os 28import datetime 29import json 30import configparser 31from pathlib import Path 32import pandas as pd 33import numpy as np 34 35 36from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle 37from json_ntv.ntv_util import NtvUtil 38from json_ntv.ntv_connector import ShapelyConnec 39 40path_ntv_pandas = Path(os.path.abspath(__file__)).parent 41 42 43def to_json(pd_array, **kwargs): 44 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 45 46 *parameters* 47 48 - **pd_array** : Series or Dataframe to convert 49 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 50 - **header** : boolean (default: True) - if True the JSON data is included as 51 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 52 - **table** : boolean (default False) - if True return TableSchema format 53 ''' 54 option = {'encoded': False, 'header': True, 'table': False} | kwargs 55 option['header'] = False if option['table'] else option['header'] 56 if isinstance(pd_array, pd.Series): 57 jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0] 58 head = ':field' 59 else: 60 jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0] 61 head = ':tab' 62 if option['header']: 63 jsn = {head: jsn} 64 if option['encoded']: 65 return json.dumps(jsn) 66 return jsn 67 68 69def read_json(jsn, **kwargs): 70 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 71 72 *parameters* 73 74 - **jsn** : JSON text or JSON value to convert 75 - **extkeys**: list (default None) - keys to use if not present in ntv_value 76 - **decode_str**: boolean (default False) - if True, string values are converted 77 in object values 78 - **leng**: integer (default None) - leng of the Series (used with single codec value) 79 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 80 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 81 - **series**: boolean (default False) - used only without header. If True 82 JSON data is converted into Series else DataFrame 83 ''' 84 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 85 'annotated': False, 'series': False} | kwargs 86 jso = json.loads(jsn) if isinstance(jsn, str) else jsn 87 if 'schema' in jso: 88 return PdUtil.to_obj_table(jso, **option) 89 ntv = Ntv.from_obj(jso) 90 if ntv.type_str == 'field': 91 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 92 if ntv.type_str == 'tab': 93 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 94 if option['series']: 95 return SeriesConnec.to_obj_ntv(ntv, **option) 96 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 97 98 99def as_def_type(pd_array): 100 '''convert a Series or DataFrame with default dtype''' 101 if isinstance(pd_array, (pd.Series, pd.Index)): 102 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 103 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns}) 104 105 106def equals(pdself, pdother): 107 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 108 equ = True 109 if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series): 110 type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category' 111 if type_cat: 112 equ &= equals(pdself.cat.categories, pdother.cat.categories) 113 else: 114 equ &= as_def_type(pdself).equals(as_def_type(pdother)) 115 equ &= pdself.name == pdother.name 116 if not equ: 117 return False 118 elif isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame): 119 for cself, cother in zip(pdself, pdother): 120 equ &= equals(pdself[cself], pdother[cother]) 121 return equ 122 123 124class DataFrameConnec(NtvConnector): 125 126 '''NTV connector for pandas DataFrame. 127 128 One static methods is included: 129 130 - to_listidx: convert a DataFrame in categorical data 131 ''' 132 133 clas_obj = 'DataFrame' 134 clas_typ = 'tab' 135 136 @staticmethod 137 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 138 ''' convert json ntv_value into a DataFrame. 139 140 *Parameters* 141 142 - **index** : list (default None) - list of index values, 143 - **alias** : boolean (default False) - if True, alias dtype else default dtype 144 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 145 series = SeriesConnec.to_series 146 147 ntv = Ntv.fast(ntv_value) 148 lidx = [list(PdUtil.decode_ntv_tab(ntvf)) 149 for ntvf in ntv] 150 leng = max([idx[6] for idx in lidx]) 151 option = kwargs | {'leng': leng} 152 no_keys = [] 153 for ind in range(len(lidx)): 154 lind = lidx[ind] 155 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 156 NtvConnector.init_ntv_keys(ind, lidx, leng) 157 lind[2] = Ntv.fast(Ntv.obj_ntv( 158 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 159 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 160 else lidx[ind][4], **option) for ind in range(len(lidx))] 161 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 162 return PdUtil.pd_index(dfr) 163 164 @staticmethod 165 def to_json_ntv(value, name=None, typ=None, **kwargs): 166 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 167 168 *Parameters* 169 170 - **typ** : string (default None) - type of the NTV object, 171 - **name** : string (default None) - name of the NTV object 172 - **value** : DataFrame values 173 - **table** : boolean (default False) - if True return TableSchema format''' 174 175 table = kwargs.get('table', False) 176 if not table: 177 df2 = value.reset_index() 178 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 179 for col in df2.columns]).to_obj() 180 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 181 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 182 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 183 value[col]) for col in value.columns}) 184 table_val = json.loads(df2.to_json(orient='table', 185 date_format='iso', default_handler=str)) 186 for nam in value.columns: 187 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 188 value[nam], table=True, no_val=True) 189 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 190 ntv_name, ntv_type) 191 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 192 193 @staticmethod 194 def to_listidx(dtf): 195 ''' convert a DataFrame in categorical data 196 197 *Return: tuple with:* 198 199 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 200 - **lenght** of the DataFrame''' 201 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf)) 202 203 204class SeriesConnec(NtvConnector): 205 '''NTV connector for pandas Series 206 207 Two static methods are included: 208 209 - to_idx: convert a Series in categorical data 210 - to_series: return a Series from Field data 211 ''' 212 clas_obj = 'Series' 213 clas_typ = 'field' 214 config = configparser.ConfigParser() 215 # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini')) 216 config.read(path_ntv_pandas.joinpath('ntv_pandas.ini')) 217 types = pd.DataFrame(json.loads(config['data']['type']), 218 columns=json.loads(config['data']['column'])) 219 astype = json.loads(config['data']['astype']) 220 deftype = {val: key for key, val in astype.items()} 221 config = configparser.ConfigParser() 222 # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini')) 223 config.read(path_ntv_pandas.joinpath('ntv_table.ini')) 224 table = pd.DataFrame(json.loads(config['data']['mapping']), 225 columns=json.loads(config['data']['column'])) 226 typtab = pd.DataFrame(json.loads(config['data']['type']), 227 columns=json.loads(config['data']['col_type'])) 228 229 @staticmethod 230 def to_obj_ntv(ntv_value, **kwargs): 231 '''Generate a Series Object from a Ntv field object 232 233 *Parameters* 234 235 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 236 237 *parameters (kwargs)* 238 239 - **extkeys**: list (default None) - keys to use if not present in ntv_value 240 - **decode_str**: boolean (default False) - if True, string values are converted 241 in object values 242 - **index**: list (default None) - if present, add the index in Series 243 - **leng**: integer (default None) - leng of the Series (used with single codec value) 244 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 245 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 246 ''' 247 option = {'extkeys': None, 'decode_str': False, 'leng': None, 248 'annotated': False} | kwargs 249 if ntv_value is None: 250 return None 251 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 252 253 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 254 PdUtil.decode_ntv_tab(ntv) 255 if parent and not option['extkeys']: 256 return None 257 if coef: 258 ntv_keys = NtvConnector.keysfromcoef( 259 coef, leng_field//coef, option['leng']) 260 elif option['extkeys'] and parent: 261 ntv_keys = NtvConnector.keysfromderkeys( 262 option['extkeys'], ntv_keys) 263 elif option['extkeys'] and not parent: 264 ntv_keys = option['extkeys'] 265 ntv_codec = Ntv.fast(Ntv.obj_ntv( 266 codec, typ=typ, single=len(codec) == 1)) 267 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 268 269 @staticmethod 270 def to_json_ntv(value, name=None, typ=None, **kwargs): 271 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 272 273 *Parameters* 274 275 - **typ** : string (default None) - type of the NTV object, 276 - **name** : string (default None) - name of the NTV object 277 - **value** : Series values 278 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 279 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 280 281 table = kwargs.get('table', False) 282 no_val = kwargs.get('no_val', False) 283 srs = value.astype(SeriesConnec.astype.get( 284 value.dtype.name, value.dtype.name)) 285 sr_name = srs.name if srs.name else '' 286 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 287 288 if table: 289 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 290 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 291 if no_val: 292 return (ntv_name, ntv_type) 293 return (ntv_value, ntv_name, ntv_type) 294 if srs.dtype.name == 'category': 295 cdc = pd.Series(srs.cat.categories) 296 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 297 cat_value = PdUtil.ntv_val(ntv_type, cdc) 298 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 299 cod_value = list(srs.cat.codes) 300 coef = NtvConnector.encode_coef(cod_value) 301 ntv_value = [cat_value, [coef] if coef else cod_value] 302 ntv_type = 'json' 303 else: 304 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 305 ntv_value = PdUtil.ntv_val(ntv_type, srs) 306 if len(ntv_value) == 1: 307 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 308 SeriesConnec.clas_typ if not typ else typ) 309 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 310 SeriesConnec.clas_typ if not typ else typ) 311 312 @staticmethod 313 def to_idx(ser): 314 ''' convert a Series in categorical data 315 316 *return (dict)* 317 318 { 'codec': 'list of pandas categories', 319 'name': 'name of the series', 320 'keys': 'list of pandas codes' } 321 ''' 322 idx = ser.astype('category') 323 lis = list(idx.cat.categories) 324 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 325 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 326 for ts in lis] 327 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)} 328 329 @staticmethod 330 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 331 ''' return a pd.Series from Field data (codec, name, keys) 332 333 *Parameters* 334 335 - **ntv_codec**: Ntv object - codec value to convert in Series values 336 - **ntv_type**: string - default type to apply to convert in dtype 337 - **ntv_name**: string - name of the Series 338 339 *parameters (kwargs)* 340 341 - **index**: list (default None) - if present, add the index in Series 342 - **leng**: integer (default None) - leng of the Series (used with single codec value) 343 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 344 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 345 ''' 346 option = {'index': None, 'leng': None, 'alias': False, 347 'annotated': False} | kwargs 348 types = SeriesConnec.types.set_index('ntv_type') 349 astype = SeriesConnec.astype 350 leng = option['leng'] 351 352 ntv_type = ntv_codec.type_str 353 len_unique = leng if len(ntv_codec) == 1 and leng else 1 354 pd_convert = ntv_type in types.index 355 356 pd_name, name_type, dtype = PdUtil.pd_name( 357 ntv_name, ntv_type, pd_convert) 358 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 359 option['annotated'], pd_convert) 360 if ntv_keys: 361 if pd_convert and name_type != 'array': 362 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 363 cat_type = categ.dtype.name 364 categories = categ.astype(astype.get(cat_type, cat_type)) 365 else: 366 categories = pd.Series(ntv_obj, dtype='object') 367 cat = pd.CategoricalDtype(categories=categories) 368 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 369 srs = pd.Series(data, name=pd_name, 370 index=option['index'], dtype='category') 371 else: 372 data = ntv_obj * len_unique 373 if pd_convert: 374 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 375 else: 376 srs = pd.Series(data, name=pd_name, dtype=dtype) 377 378 if option['alias']: 379 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 380 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 381 382 @staticmethod 383 def _from_json(data, dtype, ntv_type, pd_name=None): 384 '''return a Series from a Json data. 385 386 *Parameters* 387 388 - **data**: Json-value - data to convert in a Series 389 - **dtype**: string - dtype of the Series 390 - **ntv_type**: string - default type to apply to convert in dtype 391 - **pd_name**: string - name of the Series including ntv_type 392 393 NTVvalue and a ntv_type''' 394 srs = pd.read_json(json.dumps(data), dtype=dtype, typ='series') 395 if not pd_name is None: 396 srs = srs.rename(pd_name) 397 return PdUtil.convert(ntv_type, srs, tojson=False) 398 399 400class PdUtil: 401 '''ntv-pandas utilities. 402 403 This class includes static methods: 404 405 Ntv and pandas 406 - **decode_ntv_tab**: Generate a tuple data from a NTVvalue 407 - **ntv_type**: return NTVtype from name_type and dtype of a Series 408 - **convert**: convert Series with external NTVtype 409 - **ntv_val**: convert a simple Series into NTV json-value 410 - **ntv_obj**: return a list of values to convert in a Series 411 - **pd_name**: return a tuple with the name of the Series and the type deduced from the name 412 - **pd_index**: return a DataFrame with index 413 - **unic**: return simple value if the Series contains a single value 414 415 TableSchema 416 - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series 417 - **name_table**: return a list of non index field's names from a json Table 418 - **ntvtype_table**: return a list of non index field's ntv_type from a json Table 419 - **table_schema**: add 'format' and 'type' keys in a Json TableSchema 420 - **table_val**: convert a Series into TableSchema json-value 421 - **ntv_table**: return NTVtype from the TableSchema data 422 ''' 423 @staticmethod 424 def to_obj_table(jsn, **kwargs): 425 ''' convert json TableSchema data into a DataFrame or a Series''' 426 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 427 name = PdUtil.name_table(jsn['schema']['fields']) 428 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 429 for nam, ntvtyp in zip(name, ntv_type)] 430 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 431 for nam, ntvtyp in zip(name, ntv_type)] 432 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 433 dfr = PdUtil.pd_index(dfr) 434 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 435 for ind, col in enumerate(dfr.columns)}) 436 dfr = dfr.astype({col: pd_dtype[ind] 437 for ind, col in enumerate(dfr.columns)}) 438 dfr.columns = pd_name 439 if len(dfr.columns) == 1: 440 return dfr[dfr.columns[0]] 441 return dfr 442 443 @staticmethod 444 def decode_ntv_tab(field): 445 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 446 447 *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)* 448 449 - name (None or string): name of the Field 450 - dtype (None or string): type of data 451 - codec (list): list of Field codec values 452 - parent (None or int): Field parent or None 453 - keys (None or list): Field keys 454 - coef (None or int): coef if primary Field else None 455 - leng (int): length of the Field 456 ''' 457 ntv = Ntv.obj(field) 458 typ = ntv.type_str if ntv.ntv_type else None 459 nam = ntv.name 460 if isinstance(ntv, NtvSingle): 461 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 462 val = [ntv_val.to_obj() for ntv_val in ntv] 463 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 464 return (nam, typ, val, None, None, None, len(ntv)) 465 466 ntvc = ntv[0] 467 leng = max(len(ind) for ind in ntv) 468 typc = ntvc.type_str if ntvc.ntv_type else None 469 valc = ntvc.to_obj(simpleval=True) 470 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 471 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 472 isinstance(ntv[2][0].val, int): 473 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 474 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 475 return (nam, typc, valc, ntv[1].val, None, None, leng) 476 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 477 leng = leng * ntv[1][0].val 478 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 479 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 480 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 481 return (nam, typ, val, None, None, None, len(ntv)) 482 483 @staticmethod 484 def name_table(fields): 485 '''return a list of non index field's names from a json Table''' 486 names = [field.get('name', None) for field in fields 487 if field.get('name', None) != 'index'] 488 return [None if name == 'values' else name for name in names] 489 490 @staticmethod 491 def ntvtype_table(fields): 492 '''return a list of non index field's ntv_type from a json Table''' 493 return [PdUtil.ntv_table(field.get('format', 'default'), 494 field.get('type', None)) for field in fields 495 if field.get('name', None) != 'index'] 496 497 @staticmethod 498 def table_schema(schema, name, ntv_type): 499 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 500 for the field defined by 'name' ''' 501 ind = [field['name'] for field in schema['fields']].index(name) 502 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 503 if tabletype['format'] == 'default': 504 schema['fields'][ind].pop('format', None) 505 else: 506 schema['fields'][ind]['format'] = tabletype['format'] 507 schema['fields'][ind]['type'] = tabletype['type'] 508 schema['fields'][ind].pop('extDtype', None) 509 return schema 510 511 @staticmethod 512 def table_val(ntv_type, ntv_name, srs): 513 '''convert a Series into TableSchema json-value. 514 515 *Parameters* 516 517 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 518 - **ntv_name**: string - name of the Series 519 - **srs** : Series to be converted.''' 520 srs = PdUtil.convert(ntv_type, srs) 521 srs.name = ntv_name 522 tab_val = json.loads(srs.to_json(orient='table', 523 date_format='iso', default_handler=str)) 524 name = 'values' if srs.name is None else srs.name 525 tab_val['schema'] = PdUtil.table_schema( 526 tab_val['schema'], name, ntv_type) 527 return tab_val 528 529 @staticmethod 530 def convert(ntv_type, srs, tojson=True): 531 ''' convert Series with external NTVtype. 532 533 *Parameters* 534 535 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 536 - **srs** : Series to be converted. 537 - **tojson** : boolean (default True) - apply to json function''' 538 if tojson: 539 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 540 return srs.apply(ShapelyConnec.to_coord) 541 if ntv_type == 'geojson': 542 return srs.apply(ShapelyConnec.to_geojson) 543 if ntv_type == 'date': 544 return srs.astype(str) 545 return srs 546 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 547 return srs.apply(ShapelyConnec.to_geometry) 548 if ntv_type == 'geojson': 549 return srs.apply(ShapelyConnec.from_geojson) 550 if ntv_type == 'datetime': 551 return pd.to_datetime(srs) 552 if ntv_type == 'date': 553 return pd.to_datetime(srs).dt.date 554 if ntv_type == 'time': 555 return pd.to_datetime(srs).dt.time 556 return srs 557 558 @staticmethod 559 def ntv_type(name_type, dtype, table=False): 560 ''' return NTVtype from name_type and dtype of a Series . 561 562 *Parameters* 563 564 - **name_type** : string - type included in the Series name, 565 - **dtype** : string - dtype of the Series. 566 - **table** : boolean (default False) - True if Table Schema conversion 567 ''' 568 if not name_type: 569 types_none = SeriesConnec.types.set_index('name_type').loc[None] 570 if dtype in types_none.dtype.values: 571 return types_none.set_index('dtype').loc[dtype].ntv_type 572 if not table: 573 return 'json' 574 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 575 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 576 return name_type 577 578 @staticmethod 579 def ntv_val(ntv_type, srs): 580 ''' convert a simple Series into NTV json-value. 581 582 *Parameters* 583 584 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 585 - **srs** : Series to be *converted.''' 586 srs = PdUtil.convert(ntv_type, srs) 587 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 588 return srs.to_list() 589 if srs.dtype.name == 'object': 590 return srs.to_list() 591 return json.loads(srs.to_json(orient='records', 592 date_format='iso', default_handler=str)) 593 594 @staticmethod 595 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 596 '''return a list of values to convert in a Series''' 597 if pd_convert: 598 if name_type == 'array': 599 return ntv_codec.to_obj(format='obj', simpleval=True) 600 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 601 def_type=ntv_codec.type_str, fast=True) 602 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 603 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type) 604 605 @staticmethod 606 def ntv_table(table_format, table_type): 607 ''' return NTVtype from the TableSchema data. 608 609 *Parameters* 610 611 - **table_format** : string - TableSchema format, 612 - **table_type** : string - TableSchema type''' 613 return SeriesConnec.table.set_index(['type', 'format']).loc[ 614 (table_type, table_format)].values[0] 615 616 @staticmethod 617 def pd_index(dfr): 618 '''return a DataFrame with index''' 619 if 'index' in dfr.columns: 620 dfr = dfr.set_index('index') 621 dfr.index.rename(None, inplace=True) 622 return dfr 623 624 @staticmethod 625 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 626 '''return a tuple with the name of the Series, the type deduced from 627 the name and the dtype''' 628 ntv_name = '' if ntv_name is None else ntv_name 629 typtab = SeriesConnec.typtab.set_index('ntv_type') 630 types = SeriesConnec.types.set_index('ntv_type') 631 if table and ntv_type.lower() in typtab.index: 632 name_type = typtab.loc[ntv_type.lower()]['name_type'] 633 dtype = typtab.loc[ntv_type.lower()]['dtype'] 634 elif pd_convert or table: 635 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 636 dtype = types.loc[ntv_type]['dtype'] 637 else: 638 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 639 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 640 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 641 return (pd_name if pd_name else None, name_type, dtype) 642 643 @staticmethod 644 def unic(srs): 645 ''' return simple value if the Series contains a single value''' 646 if str(srs.dtype) == 'category': 647 return srs 648 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
44def to_json(pd_array, **kwargs): 45 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 46 47 *parameters* 48 49 - **pd_array** : Series or Dataframe to convert 50 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 51 - **header** : boolean (default: True) - if True the JSON data is included as 52 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 53 - **table** : boolean (default False) - if True return TableSchema format 54 ''' 55 option = {'encoded': False, 'header': True, 'table': False} | kwargs 56 option['header'] = False if option['table'] else option['header'] 57 if isinstance(pd_array, pd.Series): 58 jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0] 59 head = ':field' 60 else: 61 jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0] 62 head = ':tab' 63 if option['header']: 64 jsn = {head: jsn} 65 if option['encoded']: 66 return json.dumps(jsn) 67 return jsn
convert pandas Series or Dataframe to JSON text or JSON Value.
parameters
- pd_array : Series or Dataframe to convert
- encoded : boolean (default: False) - if True return a JSON text else a JSON value
- header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
- table : boolean (default False) - if True return TableSchema format
70def read_json(jsn, **kwargs): 71 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 72 73 *parameters* 74 75 - **jsn** : JSON text or JSON value to convert 76 - **extkeys**: list (default None) - keys to use if not present in ntv_value 77 - **decode_str**: boolean (default False) - if True, string values are converted 78 in object values 79 - **leng**: integer (default None) - leng of the Series (used with single codec value) 80 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 81 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 82 - **series**: boolean (default False) - used only without header. If True 83 JSON data is converted into Series else DataFrame 84 ''' 85 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 86 'annotated': False, 'series': False} | kwargs 87 jso = json.loads(jsn) if isinstance(jsn, str) else jsn 88 if 'schema' in jso: 89 return PdUtil.to_obj_table(jso, **option) 90 ntv = Ntv.from_obj(jso) 91 if ntv.type_str == 'field': 92 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 93 if ntv.type_str == 'tab': 94 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 95 if option['series']: 96 return SeriesConnec.to_obj_ntv(ntv, **option) 97 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
convert JSON text or JSON Value to pandas Series or Dataframe.
parameters
- jsn : JSON text or JSON value to convert
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
- series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
100def as_def_type(pd_array): 101 '''convert a Series or DataFrame with default dtype''' 102 if isinstance(pd_array, (pd.Series, pd.Index)): 103 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 104 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
convert a Series or DataFrame with default dtype
107def equals(pdself, pdother): 108 '''return True if pd.equals is True and names are equal and dtype of categories are equal''' 109 equ = True 110 if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series): 111 type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category' 112 if type_cat: 113 equ &= equals(pdself.cat.categories, pdother.cat.categories) 114 else: 115 equ &= as_def_type(pdself).equals(as_def_type(pdother)) 116 equ &= pdself.name == pdother.name 117 if not equ: 118 return False 119 elif isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame): 120 for cself, cother in zip(pdself, pdother): 121 equ &= equals(pdself[cself], pdother[cother]) 122 return equ
return True if pd.equals is True and names are equal and dtype of categories are equal
125class DataFrameConnec(NtvConnector): 126 127 '''NTV connector for pandas DataFrame. 128 129 One static methods is included: 130 131 - to_listidx: convert a DataFrame in categorical data 132 ''' 133 134 clas_obj = 'DataFrame' 135 clas_typ = 'tab' 136 137 @staticmethod 138 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 139 ''' convert json ntv_value into a DataFrame. 140 141 *Parameters* 142 143 - **index** : list (default None) - list of index values, 144 - **alias** : boolean (default False) - if True, alias dtype else default dtype 145 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 146 series = SeriesConnec.to_series 147 148 ntv = Ntv.fast(ntv_value) 149 lidx = [list(PdUtil.decode_ntv_tab(ntvf)) 150 for ntvf in ntv] 151 leng = max([idx[6] for idx in lidx]) 152 option = kwargs | {'leng': leng} 153 no_keys = [] 154 for ind in range(len(lidx)): 155 lind = lidx[ind] 156 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 157 NtvConnector.init_ntv_keys(ind, lidx, leng) 158 lind[2] = Ntv.fast(Ntv.obj_ntv( 159 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 160 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 161 else lidx[ind][4], **option) for ind in range(len(lidx))] 162 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 163 return PdUtil.pd_index(dfr) 164 165 @staticmethod 166 def to_json_ntv(value, name=None, typ=None, **kwargs): 167 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 168 169 *Parameters* 170 171 - **typ** : string (default None) - type of the NTV object, 172 - **name** : string (default None) - name of the NTV object 173 - **value** : DataFrame values 174 - **table** : boolean (default False) - if True return TableSchema format''' 175 176 table = kwargs.get('table', False) 177 if not table: 178 df2 = value.reset_index() 179 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 180 for col in df2.columns]).to_obj() 181 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 182 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 183 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 184 value[col]) for col in value.columns}) 185 table_val = json.loads(df2.to_json(orient='table', 186 date_format='iso', default_handler=str)) 187 for nam in value.columns: 188 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 189 value[nam], table=True, no_val=True) 190 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 191 ntv_name, ntv_type) 192 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 193 194 @staticmethod 195 def to_listidx(dtf): 196 ''' convert a DataFrame in categorical data 197 198 *Return: tuple with:* 199 200 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 201 - **lenght** of the DataFrame''' 202 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
NTV connector for pandas DataFrame.
One static methods is included:
- to_listidx: convert a DataFrame in categorical data
137 @staticmethod 138 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 139 ''' convert json ntv_value into a DataFrame. 140 141 *Parameters* 142 143 - **index** : list (default None) - list of index values, 144 - **alias** : boolean (default False) - if True, alias dtype else default dtype 145 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 146 series = SeriesConnec.to_series 147 148 ntv = Ntv.fast(ntv_value) 149 lidx = [list(PdUtil.decode_ntv_tab(ntvf)) 150 for ntvf in ntv] 151 leng = max([idx[6] for idx in lidx]) 152 option = kwargs | {'leng': leng} 153 no_keys = [] 154 for ind in range(len(lidx)): 155 lind = lidx[ind] 156 no_keys.append(not lind[3] and not lind[4] and not lind[5]) 157 NtvConnector.init_ntv_keys(ind, lidx, leng) 158 lind[2] = Ntv.fast(Ntv.obj_ntv( 159 lind[2], typ=lind[1], single=len(lind[2]) == 1)) 160 list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind] 161 else lidx[ind][4], **option) for ind in range(len(lidx))] 162 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 163 return PdUtil.pd_index(dfr)
convert json ntv_value into a DataFrame.
Parameters
- index : list (default None) - list of index values,
- alias : boolean (default False) - if True, alias dtype else default dtype
- annotated : boolean (default False) - if True, NTV names are not included.
165 @staticmethod 166 def to_json_ntv(value, name=None, typ=None, **kwargs): 167 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 168 169 *Parameters* 170 171 - **typ** : string (default None) - type of the NTV object, 172 - **name** : string (default None) - name of the NTV object 173 - **value** : DataFrame values 174 - **table** : boolean (default False) - if True return TableSchema format''' 175 176 table = kwargs.get('table', False) 177 if not table: 178 df2 = value.reset_index() 179 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 180 for col in df2.columns]).to_obj() 181 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 182 df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 183 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 184 value[col]) for col in value.columns}) 185 table_val = json.loads(df2.to_json(orient='table', 186 date_format='iso', default_handler=str)) 187 for nam in value.columns: 188 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 189 value[nam], table=True, no_val=True) 190 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 191 ntv_name, ntv_type) 192 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : DataFrame values
- table : boolean (default False) - if True return TableSchema format
194 @staticmethod 195 def to_listidx(dtf): 196 ''' convert a DataFrame in categorical data 197 198 *Return: tuple with:* 199 200 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 201 - **lenght** of the DataFrame''' 202 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
convert a DataFrame in categorical data
Return: tuple with:
- list of dict (keys : 'codec', 'name, 'keys') for each column
- lenght of the DataFrame
Inherited Members
- json_ntv.ntv_util.NtvConnector
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- init_ntv_keys
205class SeriesConnec(NtvConnector): 206 '''NTV connector for pandas Series 207 208 Two static methods are included: 209 210 - to_idx: convert a Series in categorical data 211 - to_series: return a Series from Field data 212 ''' 213 clas_obj = 'Series' 214 clas_typ = 'field' 215 config = configparser.ConfigParser() 216 # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini')) 217 config.read(path_ntv_pandas.joinpath('ntv_pandas.ini')) 218 types = pd.DataFrame(json.loads(config['data']['type']), 219 columns=json.loads(config['data']['column'])) 220 astype = json.loads(config['data']['astype']) 221 deftype = {val: key for key, val in astype.items()} 222 config = configparser.ConfigParser() 223 # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini')) 224 config.read(path_ntv_pandas.joinpath('ntv_table.ini')) 225 table = pd.DataFrame(json.loads(config['data']['mapping']), 226 columns=json.loads(config['data']['column'])) 227 typtab = pd.DataFrame(json.loads(config['data']['type']), 228 columns=json.loads(config['data']['col_type'])) 229 230 @staticmethod 231 def to_obj_ntv(ntv_value, **kwargs): 232 '''Generate a Series Object from a Ntv field object 233 234 *Parameters* 235 236 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 237 238 *parameters (kwargs)* 239 240 - **extkeys**: list (default None) - keys to use if not present in ntv_value 241 - **decode_str**: boolean (default False) - if True, string values are converted 242 in object values 243 - **index**: list (default None) - if present, add the index in Series 244 - **leng**: integer (default None) - leng of the Series (used with single codec value) 245 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 246 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 247 ''' 248 option = {'extkeys': None, 'decode_str': False, 'leng': None, 249 'annotated': False} | kwargs 250 if ntv_value is None: 251 return None 252 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 253 254 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 255 PdUtil.decode_ntv_tab(ntv) 256 if parent and not option['extkeys']: 257 return None 258 if coef: 259 ntv_keys = NtvConnector.keysfromcoef( 260 coef, leng_field//coef, option['leng']) 261 elif option['extkeys'] and parent: 262 ntv_keys = NtvConnector.keysfromderkeys( 263 option['extkeys'], ntv_keys) 264 elif option['extkeys'] and not parent: 265 ntv_keys = option['extkeys'] 266 ntv_codec = Ntv.fast(Ntv.obj_ntv( 267 codec, typ=typ, single=len(codec) == 1)) 268 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 269 270 @staticmethod 271 def to_json_ntv(value, name=None, typ=None, **kwargs): 272 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 273 274 *Parameters* 275 276 - **typ** : string (default None) - type of the NTV object, 277 - **name** : string (default None) - name of the NTV object 278 - **value** : Series values 279 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 280 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 281 282 table = kwargs.get('table', False) 283 no_val = kwargs.get('no_val', False) 284 srs = value.astype(SeriesConnec.astype.get( 285 value.dtype.name, value.dtype.name)) 286 sr_name = srs.name if srs.name else '' 287 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 288 289 if table: 290 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 291 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 292 if no_val: 293 return (ntv_name, ntv_type) 294 return (ntv_value, ntv_name, ntv_type) 295 if srs.dtype.name == 'category': 296 cdc = pd.Series(srs.cat.categories) 297 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 298 cat_value = PdUtil.ntv_val(ntv_type, cdc) 299 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 300 cod_value = list(srs.cat.codes) 301 coef = NtvConnector.encode_coef(cod_value) 302 ntv_value = [cat_value, [coef] if coef else cod_value] 303 ntv_type = 'json' 304 else: 305 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 306 ntv_value = PdUtil.ntv_val(ntv_type, srs) 307 if len(ntv_value) == 1: 308 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 309 SeriesConnec.clas_typ if not typ else typ) 310 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 311 SeriesConnec.clas_typ if not typ else typ) 312 313 @staticmethod 314 def to_idx(ser): 315 ''' convert a Series in categorical data 316 317 *return (dict)* 318 319 { 'codec': 'list of pandas categories', 320 'name': 'name of the series', 321 'keys': 'list of pandas codes' } 322 ''' 323 idx = ser.astype('category') 324 lis = list(idx.cat.categories) 325 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 326 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 327 for ts in lis] 328 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)} 329 330 @staticmethod 331 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 332 ''' return a pd.Series from Field data (codec, name, keys) 333 334 *Parameters* 335 336 - **ntv_codec**: Ntv object - codec value to convert in Series values 337 - **ntv_type**: string - default type to apply to convert in dtype 338 - **ntv_name**: string - name of the Series 339 340 *parameters (kwargs)* 341 342 - **index**: list (default None) - if present, add the index in Series 343 - **leng**: integer (default None) - leng of the Series (used with single codec value) 344 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 345 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 346 ''' 347 option = {'index': None, 'leng': None, 'alias': False, 348 'annotated': False} | kwargs 349 types = SeriesConnec.types.set_index('ntv_type') 350 astype = SeriesConnec.astype 351 leng = option['leng'] 352 353 ntv_type = ntv_codec.type_str 354 len_unique = leng if len(ntv_codec) == 1 and leng else 1 355 pd_convert = ntv_type in types.index 356 357 pd_name, name_type, dtype = PdUtil.pd_name( 358 ntv_name, ntv_type, pd_convert) 359 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 360 option['annotated'], pd_convert) 361 if ntv_keys: 362 if pd_convert and name_type != 'array': 363 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 364 cat_type = categ.dtype.name 365 categories = categ.astype(astype.get(cat_type, cat_type)) 366 else: 367 categories = pd.Series(ntv_obj, dtype='object') 368 cat = pd.CategoricalDtype(categories=categories) 369 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 370 srs = pd.Series(data, name=pd_name, 371 index=option['index'], dtype='category') 372 else: 373 data = ntv_obj * len_unique 374 if pd_convert: 375 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 376 else: 377 srs = pd.Series(data, name=pd_name, dtype=dtype) 378 379 if option['alias']: 380 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 381 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 382 383 @staticmethod 384 def _from_json(data, dtype, ntv_type, pd_name=None): 385 '''return a Series from a Json data. 386 387 *Parameters* 388 389 - **data**: Json-value - data to convert in a Series 390 - **dtype**: string - dtype of the Series 391 - **ntv_type**: string - default type to apply to convert in dtype 392 - **pd_name**: string - name of the Series including ntv_type 393 394 NTVvalue and a ntv_type''' 395 srs = pd.read_json(json.dumps(data), dtype=dtype, typ='series') 396 if not pd_name is None: 397 srs = srs.rename(pd_name) 398 return PdUtil.convert(ntv_type, srs, tojson=False)
NTV connector for pandas Series
Two static methods are included:
- to_idx: convert a Series in categorical data
- to_series: return a Series from Field data
230 @staticmethod 231 def to_obj_ntv(ntv_value, **kwargs): 232 '''Generate a Series Object from a Ntv field object 233 234 *Parameters* 235 236 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 237 238 *parameters (kwargs)* 239 240 - **extkeys**: list (default None) - keys to use if not present in ntv_value 241 - **decode_str**: boolean (default False) - if True, string values are converted 242 in object values 243 - **index**: list (default None) - if present, add the index in Series 244 - **leng**: integer (default None) - leng of the Series (used with single codec value) 245 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 246 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 247 ''' 248 option = {'extkeys': None, 'decode_str': False, 'leng': None, 249 'annotated': False} | kwargs 250 if ntv_value is None: 251 return None 252 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 253 254 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 255 PdUtil.decode_ntv_tab(ntv) 256 if parent and not option['extkeys']: 257 return None 258 if coef: 259 ntv_keys = NtvConnector.keysfromcoef( 260 coef, leng_field//coef, option['leng']) 261 elif option['extkeys'] and parent: 262 ntv_keys = NtvConnector.keysfromderkeys( 263 option['extkeys'], ntv_keys) 264 elif option['extkeys'] and not parent: 265 ntv_keys = option['extkeys'] 266 ntv_codec = Ntv.fast(Ntv.obj_ntv( 267 codec, typ=typ, single=len(codec) == 1)) 268 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
Generate a Series Object from a Ntv field object
Parameters
- ntv_value: Ntv object or Ntv value - value to convert in Series
parameters (kwargs)
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
270 @staticmethod 271 def to_json_ntv(value, name=None, typ=None, **kwargs): 272 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 273 274 *Parameters* 275 276 - **typ** : string (default None) - type of the NTV object, 277 - **name** : string (default None) - name of the NTV object 278 - **value** : Series values 279 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 280 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 281 282 table = kwargs.get('table', False) 283 no_val = kwargs.get('no_val', False) 284 srs = value.astype(SeriesConnec.astype.get( 285 value.dtype.name, value.dtype.name)) 286 sr_name = srs.name if srs.name else '' 287 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 288 289 if table: 290 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 291 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 292 if no_val: 293 return (ntv_name, ntv_type) 294 return (ntv_value, ntv_name, ntv_type) 295 if srs.dtype.name == 'category': 296 cdc = pd.Series(srs.cat.categories) 297 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 298 cat_value = PdUtil.ntv_val(ntv_type, cdc) 299 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 300 cod_value = list(srs.cat.codes) 301 coef = NtvConnector.encode_coef(cod_value) 302 ntv_value = [cat_value, [coef] if coef else cod_value] 303 ntv_type = 'json' 304 else: 305 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 306 ntv_value = PdUtil.ntv_val(ntv_type, srs) 307 if len(ntv_value) == 1: 308 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 309 SeriesConnec.clas_typ if not typ else typ) 310 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 311 SeriesConnec.clas_typ if not typ else typ)
convert a Series (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : Series values
- table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
- no_val : boolean (default False) - if True return (ntv_name, ntv_type)
313 @staticmethod 314 def to_idx(ser): 315 ''' convert a Series in categorical data 316 317 *return (dict)* 318 319 { 'codec': 'list of pandas categories', 320 'name': 'name of the series', 321 'keys': 'list of pandas codes' } 322 ''' 323 idx = ser.astype('category') 324 lis = list(idx.cat.categories) 325 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 326 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 327 for ts in lis] 328 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
convert a Series in categorical data
return (dict)
{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }
330 @staticmethod 331 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 332 ''' return a pd.Series from Field data (codec, name, keys) 333 334 *Parameters* 335 336 - **ntv_codec**: Ntv object - codec value to convert in Series values 337 - **ntv_type**: string - default type to apply to convert in dtype 338 - **ntv_name**: string - name of the Series 339 340 *parameters (kwargs)* 341 342 - **index**: list (default None) - if present, add the index in Series 343 - **leng**: integer (default None) - leng of the Series (used with single codec value) 344 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 345 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 346 ''' 347 option = {'index': None, 'leng': None, 'alias': False, 348 'annotated': False} | kwargs 349 types = SeriesConnec.types.set_index('ntv_type') 350 astype = SeriesConnec.astype 351 leng = option['leng'] 352 353 ntv_type = ntv_codec.type_str 354 len_unique = leng if len(ntv_codec) == 1 and leng else 1 355 pd_convert = ntv_type in types.index 356 357 pd_name, name_type, dtype = PdUtil.pd_name( 358 ntv_name, ntv_type, pd_convert) 359 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 360 option['annotated'], pd_convert) 361 if ntv_keys: 362 if pd_convert and name_type != 'array': 363 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 364 cat_type = categ.dtype.name 365 categories = categ.astype(astype.get(cat_type, cat_type)) 366 else: 367 categories = pd.Series(ntv_obj, dtype='object') 368 cat = pd.CategoricalDtype(categories=categories) 369 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 370 srs = pd.Series(data, name=pd_name, 371 index=option['index'], dtype='category') 372 else: 373 data = ntv_obj * len_unique 374 if pd_convert: 375 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 376 else: 377 srs = pd.Series(data, name=pd_name, dtype=dtype) 378 379 if option['alias']: 380 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 381 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
return a pd.Series from Field data (codec, name, keys)
Parameters
- ntv_codec: Ntv object - codec value to convert in Series values
- ntv_type: string - default type to apply to convert in dtype
- ntv_name: string - name of the Series
parameters (kwargs)
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
Inherited Members
- json_ntv.ntv_util.NtvConnector
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- init_ntv_keys
401class PdUtil: 402 '''ntv-pandas utilities. 403 404 This class includes static methods: 405 406 Ntv and pandas 407 - **decode_ntv_tab**: Generate a tuple data from a NTVvalue 408 - **ntv_type**: return NTVtype from name_type and dtype of a Series 409 - **convert**: convert Series with external NTVtype 410 - **ntv_val**: convert a simple Series into NTV json-value 411 - **ntv_obj**: return a list of values to convert in a Series 412 - **pd_name**: return a tuple with the name of the Series and the type deduced from the name 413 - **pd_index**: return a DataFrame with index 414 - **unic**: return simple value if the Series contains a single value 415 416 TableSchema 417 - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series 418 - **name_table**: return a list of non index field's names from a json Table 419 - **ntvtype_table**: return a list of non index field's ntv_type from a json Table 420 - **table_schema**: add 'format' and 'type' keys in a Json TableSchema 421 - **table_val**: convert a Series into TableSchema json-value 422 - **ntv_table**: return NTVtype from the TableSchema data 423 ''' 424 @staticmethod 425 def to_obj_table(jsn, **kwargs): 426 ''' convert json TableSchema data into a DataFrame or a Series''' 427 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 428 name = PdUtil.name_table(jsn['schema']['fields']) 429 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 430 for nam, ntvtyp in zip(name, ntv_type)] 431 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 432 for nam, ntvtyp in zip(name, ntv_type)] 433 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 434 dfr = PdUtil.pd_index(dfr) 435 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 436 for ind, col in enumerate(dfr.columns)}) 437 dfr = dfr.astype({col: pd_dtype[ind] 438 for ind, col in enumerate(dfr.columns)}) 439 dfr.columns = pd_name 440 if len(dfr.columns) == 1: 441 return dfr[dfr.columns[0]] 442 return dfr 443 444 @staticmethod 445 def decode_ntv_tab(field): 446 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 447 448 *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)* 449 450 - name (None or string): name of the Field 451 - dtype (None or string): type of data 452 - codec (list): list of Field codec values 453 - parent (None or int): Field parent or None 454 - keys (None or list): Field keys 455 - coef (None or int): coef if primary Field else None 456 - leng (int): length of the Field 457 ''' 458 ntv = Ntv.obj(field) 459 typ = ntv.type_str if ntv.ntv_type else None 460 nam = ntv.name 461 if isinstance(ntv, NtvSingle): 462 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 463 val = [ntv_val.to_obj() for ntv_val in ntv] 464 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 465 return (nam, typ, val, None, None, None, len(ntv)) 466 467 ntvc = ntv[0] 468 leng = max(len(ind) for ind in ntv) 469 typc = ntvc.type_str if ntvc.ntv_type else None 470 valc = ntvc.to_obj(simpleval=True) 471 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 472 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 473 isinstance(ntv[2][0].val, int): 474 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 475 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 476 return (nam, typc, valc, ntv[1].val, None, None, leng) 477 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 478 leng = leng * ntv[1][0].val 479 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 480 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 481 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 482 return (nam, typ, val, None, None, None, len(ntv)) 483 484 @staticmethod 485 def name_table(fields): 486 '''return a list of non index field's names from a json Table''' 487 names = [field.get('name', None) for field in fields 488 if field.get('name', None) != 'index'] 489 return [None if name == 'values' else name for name in names] 490 491 @staticmethod 492 def ntvtype_table(fields): 493 '''return a list of non index field's ntv_type from a json Table''' 494 return [PdUtil.ntv_table(field.get('format', 'default'), 495 field.get('type', None)) for field in fields 496 if field.get('name', None) != 'index'] 497 498 @staticmethod 499 def table_schema(schema, name, ntv_type): 500 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 501 for the field defined by 'name' ''' 502 ind = [field['name'] for field in schema['fields']].index(name) 503 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 504 if tabletype['format'] == 'default': 505 schema['fields'][ind].pop('format', None) 506 else: 507 schema['fields'][ind]['format'] = tabletype['format'] 508 schema['fields'][ind]['type'] = tabletype['type'] 509 schema['fields'][ind].pop('extDtype', None) 510 return schema 511 512 @staticmethod 513 def table_val(ntv_type, ntv_name, srs): 514 '''convert a Series into TableSchema json-value. 515 516 *Parameters* 517 518 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 519 - **ntv_name**: string - name of the Series 520 - **srs** : Series to be converted.''' 521 srs = PdUtil.convert(ntv_type, srs) 522 srs.name = ntv_name 523 tab_val = json.loads(srs.to_json(orient='table', 524 date_format='iso', default_handler=str)) 525 name = 'values' if srs.name is None else srs.name 526 tab_val['schema'] = PdUtil.table_schema( 527 tab_val['schema'], name, ntv_type) 528 return tab_val 529 530 @staticmethod 531 def convert(ntv_type, srs, tojson=True): 532 ''' convert Series with external NTVtype. 533 534 *Parameters* 535 536 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 537 - **srs** : Series to be converted. 538 - **tojson** : boolean (default True) - apply to json function''' 539 if tojson: 540 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 541 return srs.apply(ShapelyConnec.to_coord) 542 if ntv_type == 'geojson': 543 return srs.apply(ShapelyConnec.to_geojson) 544 if ntv_type == 'date': 545 return srs.astype(str) 546 return srs 547 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 548 return srs.apply(ShapelyConnec.to_geometry) 549 if ntv_type == 'geojson': 550 return srs.apply(ShapelyConnec.from_geojson) 551 if ntv_type == 'datetime': 552 return pd.to_datetime(srs) 553 if ntv_type == 'date': 554 return pd.to_datetime(srs).dt.date 555 if ntv_type == 'time': 556 return pd.to_datetime(srs).dt.time 557 return srs 558 559 @staticmethod 560 def ntv_type(name_type, dtype, table=False): 561 ''' return NTVtype from name_type and dtype of a Series . 562 563 *Parameters* 564 565 - **name_type** : string - type included in the Series name, 566 - **dtype** : string - dtype of the Series. 567 - **table** : boolean (default False) - True if Table Schema conversion 568 ''' 569 if not name_type: 570 types_none = SeriesConnec.types.set_index('name_type').loc[None] 571 if dtype in types_none.dtype.values: 572 return types_none.set_index('dtype').loc[dtype].ntv_type 573 if not table: 574 return 'json' 575 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 576 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 577 return name_type 578 579 @staticmethod 580 def ntv_val(ntv_type, srs): 581 ''' convert a simple Series into NTV json-value. 582 583 *Parameters* 584 585 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 586 - **srs** : Series to be *converted.''' 587 srs = PdUtil.convert(ntv_type, srs) 588 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 589 return srs.to_list() 590 if srs.dtype.name == 'object': 591 return srs.to_list() 592 return json.loads(srs.to_json(orient='records', 593 date_format='iso', default_handler=str)) 594 595 @staticmethod 596 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 597 '''return a list of values to convert in a Series''' 598 if pd_convert: 599 if name_type == 'array': 600 return ntv_codec.to_obj(format='obj', simpleval=True) 601 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 602 def_type=ntv_codec.type_str, fast=True) 603 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 604 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type) 605 606 @staticmethod 607 def ntv_table(table_format, table_type): 608 ''' return NTVtype from the TableSchema data. 609 610 *Parameters* 611 612 - **table_format** : string - TableSchema format, 613 - **table_type** : string - TableSchema type''' 614 return SeriesConnec.table.set_index(['type', 'format']).loc[ 615 (table_type, table_format)].values[0] 616 617 @staticmethod 618 def pd_index(dfr): 619 '''return a DataFrame with index''' 620 if 'index' in dfr.columns: 621 dfr = dfr.set_index('index') 622 dfr.index.rename(None, inplace=True) 623 return dfr 624 625 @staticmethod 626 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 627 '''return a tuple with the name of the Series, the type deduced from 628 the name and the dtype''' 629 ntv_name = '' if ntv_name is None else ntv_name 630 typtab = SeriesConnec.typtab.set_index('ntv_type') 631 types = SeriesConnec.types.set_index('ntv_type') 632 if table and ntv_type.lower() in typtab.index: 633 name_type = typtab.loc[ntv_type.lower()]['name_type'] 634 dtype = typtab.loc[ntv_type.lower()]['dtype'] 635 elif pd_convert or table: 636 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 637 dtype = types.loc[ntv_type]['dtype'] 638 else: 639 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 640 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 641 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 642 return (pd_name if pd_name else None, name_type, dtype) 643 644 @staticmethod 645 def unic(srs): 646 ''' return simple value if the Series contains a single value''' 647 if str(srs.dtype) == 'category': 648 return srs 649 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
ntv-pandas utilities.
This class includes static methods:
Ntv and pandas
- decode_ntv_tab: Generate a tuple data from a NTVvalue
- ntv_type: return NTVtype from name_type and dtype of a Series
- convert: convert Series with external NTVtype
- ntv_val: convert a simple Series into NTV json-value
- ntv_obj: return a list of values to convert in a Series
- pd_name: return a tuple with the name of the Series and the type deduced from the name
- pd_index: return a DataFrame with index
- unic: return simple value if the Series contains a single value
TableSchema
- to_obj_table: convert json TableSchema data into a DataFrame or a Series
- name_table: return a list of non index field's names from a json Table
- ntvtype_table: return a list of non index field's ntv_type from a json Table
- table_schema: add 'format' and 'type' keys in a Json TableSchema
- table_val: convert a Series into TableSchema json-value
- ntv_table: return NTVtype from the TableSchema data
424 @staticmethod 425 def to_obj_table(jsn, **kwargs): 426 ''' convert json TableSchema data into a DataFrame or a Series''' 427 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 428 name = PdUtil.name_table(jsn['schema']['fields']) 429 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 430 for nam, ntvtyp in zip(name, ntv_type)] 431 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 432 for nam, ntvtyp in zip(name, ntv_type)] 433 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 434 dfr = PdUtil.pd_index(dfr) 435 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False) 436 for ind, col in enumerate(dfr.columns)}) 437 dfr = dfr.astype({col: pd_dtype[ind] 438 for ind, col in enumerate(dfr.columns)}) 439 dfr.columns = pd_name 440 if len(dfr.columns) == 1: 441 return dfr[dfr.columns[0]] 442 return dfr
convert json TableSchema data into a DataFrame or a Series
444 @staticmethod 445 def decode_ntv_tab(field): 446 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 447 448 *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)* 449 450 - name (None or string): name of the Field 451 - dtype (None or string): type of data 452 - codec (list): list of Field codec values 453 - parent (None or int): Field parent or None 454 - keys (None or list): Field keys 455 - coef (None or int): coef if primary Field else None 456 - leng (int): length of the Field 457 ''' 458 ntv = Ntv.obj(field) 459 typ = ntv.type_str if ntv.ntv_type else None 460 nam = ntv.name 461 if isinstance(ntv, NtvSingle): 462 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 463 val = [ntv_val.to_obj() for ntv_val in ntv] 464 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 465 return (nam, typ, val, None, None, None, len(ntv)) 466 467 ntvc = ntv[0] 468 leng = max(len(ind) for ind in ntv) 469 typc = ntvc.type_str if ntvc.ntv_type else None 470 valc = ntvc.to_obj(simpleval=True) 471 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 472 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 473 isinstance(ntv[2][0].val, int): 474 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 475 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 476 return (nam, typc, valc, ntv[1].val, None, None, leng) 477 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 478 leng = leng * ntv[1][0].val 479 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 480 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 481 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 482 return (nam, typ, val, None, None, None, len(ntv))
Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
Returns tuple: (name, dtype, codec, parent, keys, coef, leng)
- name (None or string): name of the Field
- dtype (None or string): type of data
- codec (list): list of Field codec values
- parent (None or int): Field parent or None
- keys (None or list): Field keys
- coef (None or int): coef if primary Field else None
- leng (int): length of the Field
484 @staticmethod 485 def name_table(fields): 486 '''return a list of non index field's names from a json Table''' 487 names = [field.get('name', None) for field in fields 488 if field.get('name', None) != 'index'] 489 return [None if name == 'values' else name for name in names]
return a list of non index field's names from a json Table
491 @staticmethod 492 def ntvtype_table(fields): 493 '''return a list of non index field's ntv_type from a json Table''' 494 return [PdUtil.ntv_table(field.get('format', 'default'), 495 field.get('type', None)) for field in fields 496 if field.get('name', None) != 'index']
return a list of non index field's ntv_type from a json Table
498 @staticmethod 499 def table_schema(schema, name, ntv_type): 500 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 501 for the field defined by 'name' ''' 502 ind = [field['name'] for field in schema['fields']].index(name) 503 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 504 if tabletype['format'] == 'default': 505 schema['fields'][ind].pop('format', None) 506 else: 507 schema['fields'][ind]['format'] = tabletype['format'] 508 schema['fields'][ind]['type'] = tabletype['type'] 509 schema['fields'][ind].pop('extDtype', None) 510 return schema
convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema for the field defined by 'name'
512 @staticmethod 513 def table_val(ntv_type, ntv_name, srs): 514 '''convert a Series into TableSchema json-value. 515 516 *Parameters* 517 518 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 519 - **ntv_name**: string - name of the Series 520 - **srs** : Series to be converted.''' 521 srs = PdUtil.convert(ntv_type, srs) 522 srs.name = ntv_name 523 tab_val = json.loads(srs.to_json(orient='table', 524 date_format='iso', default_handler=str)) 525 name = 'values' if srs.name is None else srs.name 526 tab_val['schema'] = PdUtil.table_schema( 527 tab_val['schema'], name, ntv_type) 528 return tab_val
convert a Series into TableSchema json-value.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- ntv_name: string - name of the Series
- srs : Series to be converted.
530 @staticmethod 531 def convert(ntv_type, srs, tojson=True): 532 ''' convert Series with external NTVtype. 533 534 *Parameters* 535 536 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 537 - **srs** : Series to be converted. 538 - **tojson** : boolean (default True) - apply to json function''' 539 if tojson: 540 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 541 return srs.apply(ShapelyConnec.to_coord) 542 if ntv_type == 'geojson': 543 return srs.apply(ShapelyConnec.to_geojson) 544 if ntv_type == 'date': 545 return srs.astype(str) 546 return srs 547 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 548 return srs.apply(ShapelyConnec.to_geometry) 549 if ntv_type == 'geojson': 550 return srs.apply(ShapelyConnec.from_geojson) 551 if ntv_type == 'datetime': 552 return pd.to_datetime(srs) 553 if ntv_type == 'date': 554 return pd.to_datetime(srs).dt.date 555 if ntv_type == 'time': 556 return pd.to_datetime(srs).dt.time 557 return srs
convert Series with external NTVtype.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- srs : Series to be converted.
- tojson : boolean (default True) - apply to json function
559 @staticmethod 560 def ntv_type(name_type, dtype, table=False): 561 ''' return NTVtype from name_type and dtype of a Series . 562 563 *Parameters* 564 565 - **name_type** : string - type included in the Series name, 566 - **dtype** : string - dtype of the Series. 567 - **table** : boolean (default False) - True if Table Schema conversion 568 ''' 569 if not name_type: 570 types_none = SeriesConnec.types.set_index('name_type').loc[None] 571 if dtype in types_none.dtype.values: 572 return types_none.set_index('dtype').loc[dtype].ntv_type 573 if not table: 574 return 'json' 575 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 576 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 577 return name_type
return NTVtype from name_type and dtype of a Series .
Parameters
- name_type : string - type included in the Series name,
- dtype : string - dtype of the Series.
- table : boolean (default False) - True if Table Schema conversion
579 @staticmethod 580 def ntv_val(ntv_type, srs): 581 ''' convert a simple Series into NTV json-value. 582 583 *Parameters* 584 585 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 586 - **srs** : Series to be *converted.''' 587 srs = PdUtil.convert(ntv_type, srs) 588 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 589 return srs.to_list() 590 if srs.dtype.name == 'object': 591 return srs.to_list() 592 return json.loads(srs.to_json(orient='records', 593 date_format='iso', default_handler=str))
convert a simple Series into NTV json-value.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- srs : Series to be *converted.
595 @staticmethod 596 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 597 '''return a list of values to convert in a Series''' 598 if pd_convert: 599 if name_type == 'array': 600 return ntv_codec.to_obj(format='obj', simpleval=True) 601 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 602 def_type=ntv_codec.type_str, fast=True) 603 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 604 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
return a list of values to convert in a Series
606 @staticmethod 607 def ntv_table(table_format, table_type): 608 ''' return NTVtype from the TableSchema data. 609 610 *Parameters* 611 612 - **table_format** : string - TableSchema format, 613 - **table_type** : string - TableSchema type''' 614 return SeriesConnec.table.set_index(['type', 'format']).loc[ 615 (table_type, table_format)].values[0]
return NTVtype from the TableSchema data.
Parameters
- table_format : string - TableSchema format,
- table_type : string - TableSchema type
617 @staticmethod 618 def pd_index(dfr): 619 '''return a DataFrame with index''' 620 if 'index' in dfr.columns: 621 dfr = dfr.set_index('index') 622 dfr.index.rename(None, inplace=True) 623 return dfr
return a DataFrame with index
625 @staticmethod 626 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 627 '''return a tuple with the name of the Series, the type deduced from 628 the name and the dtype''' 629 ntv_name = '' if ntv_name is None else ntv_name 630 typtab = SeriesConnec.typtab.set_index('ntv_type') 631 types = SeriesConnec.types.set_index('ntv_type') 632 if table and ntv_type.lower() in typtab.index: 633 name_type = typtab.loc[ntv_type.lower()]['name_type'] 634 dtype = typtab.loc[ntv_type.lower()]['dtype'] 635 elif pd_convert or table: 636 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 637 dtype = types.loc[ntv_type]['dtype'] 638 else: 639 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 640 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 641 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 642 return (pd_name if pd_name else None, name_type, dtype)
return a tuple with the name of the Series, the type deduced from the name and the dtype
644 @staticmethod 645 def unic(srs): 646 ''' return simple value if the Series contains a single value''' 647 if str(srs.dtype) == 'category': 648 return srs 649 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
return simple value if the Series contains a single value