ntv-pandas.ntv_pandas.pandas_ntv_connector
Created on Feb 27 2023
@author: Philippe@loco-labs.io
The pandas_ntv_connector
module is part of the ntv-pandas.ntv_pandas
package
(specification document).
A NtvConnector is defined by:
- clas_obj: str - define the class name of the object to convert
- clas_typ: str - define the NTVtype of the converted object
- to_obj_ntv: method - converter from JsonNTV to the object
- to_json_ntv: method - converter from the object to JsonNTV
It contains :
functions
read_json
andto_json
to convert JSON data and pandas entitiesthe child classes of
NTV.json_ntv.ntv.NtvConnector
abstract class:DataFrameConnec
: 'tab' connectorSeriesConnec
: 'field' connector
an utility class with static methods :
PdUtil
1# -*- coding: utf-8 -*- 2""" 3Created on Feb 27 2023 4 5@author: Philippe@loco-labs.io 6 7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package 8([specification document]( 9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)). 10 11A NtvConnector is defined by: 12- clas_obj: str - define the class name of the object to convert 13- clas_typ: str - define the NTVtype of the converted object 14- to_obj_ntv: method - converter from JsonNTV to the object 15- to_json_ntv: method - converter from the object to JsonNTV 16 17It contains : 18 19- functions `read_json` and `to_json` to convert JSON data and pandas entities 20 21- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class: 22 - `DataFrameConnec`: 'tab' connector 23 - `SeriesConnec`: 'field' connector 24 25- an utility class with static methods : `PdUtil` 26""" 27import datetime 28import json 29import configparser 30from pathlib import Path 31import pandas as pd 32import numpy as np 33 34import ntv_pandas 35from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle 36from json_ntv.ntv_util import NtvUtil 37from json_ntv.ntv_connector import ShapelyConnec 38 39def to_json(pd_array, **kwargs): 40 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 41 42 *parameters* 43 44 - **pd_array** : Series or Dataframe to convert 45 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 46 - **header** : boolean (default: True) - if True the JSON data is included as 47 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 48 - **table** : boolean (default False) - if True return TableSchema format 49 ''' 50 option = {'encoded': False, 'header': True, 'table': False} | kwargs 51 option['header'] = False if option['table'] else option['header'] 52 if isinstance(pd_array, pd.Series): 53 jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0] 54 head = ':field' 55 else: 56 jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0] 57 head = ':tab' 58 if option['header']: 59 jsn = { head: jsn} 60 if option['encoded']: 61 return json.dumps(jsn) 62 return jsn 63 64def read_json(js, **kwargs): 65 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 66 67 *parameters* 68 69 - **js** : JSON text or JSON value to convert 70 - **extkeys**: list (default None) - keys to use if not present in ntv_value 71 - **decode_str**: boolean (default False) - if True, string values are converted 72 in object values 73 - **leng**: integer (default None) - leng of the Series (used with single codec value) 74 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 75 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 76 - **series**: boolean (default False) - used only without header. If True 77 JSON data is converted into Series else DataFrame 78 ''' 79 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 80 'annotated':False, 'series':False} | kwargs 81 jso = json.loads(js) if isinstance(js, str) else js 82 if 'schema' in jso: 83 return PdUtil.to_obj_table(jso, **option) 84 ntv = Ntv.from_obj(jso) 85 if ntv.type_str == 'field': 86 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 87 if ntv.type_str == 'tab': 88 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 89 if option['series']: 90 return SeriesConnec.to_obj_ntv(ntv, **option) 91 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 92 93def as_def_type(pd_array): 94 '''convert a Series or DataFrame with default dtype''' 95 if isinstance(pd_array, pd.Series): 96 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 97 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns}) 98 99class DataFrameConnec(NtvConnector): 100 101 '''NTV connector for pandas DataFrame. 102 103 One static methods is included: 104 105 - to_listidx: convert a DataFrame in categorical data 106 ''' 107 108 clas_obj = 'DataFrame' 109 clas_typ = 'tab' 110 111 @staticmethod 112 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 113 ''' convert json ntv_value into a DataFrame. 114 115 *Parameters* 116 117 - **index** : list (default None) - list of index values, 118 - **alias** : boolean (default False) - if True, alias dtype else default dtype 119 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 120 series = SeriesConnec.to_series 121 122 ntv = Ntv.fast(ntv_value) 123 lidx = [list(PdUtil.decode_ntv_tab(ntvf)) 124 for ntvf in ntv] 125 leng = max([idx[6] for idx in lidx]) 126 option = kwargs | {'leng': leng} 127 no_keys = [] 128 for ind in range(len(lidx)): 129 no_keys.append(not lidx[ind][3] 130 and not lidx[ind][4] and not lidx[ind][5]) 131 NtvConnector.init_ntv_keys(ind, lidx, leng) 132 lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1], 133 single=len(lidx[ind][2]) == 1)) 134 list_series = [series(lidx[ind][2], lidx[ind][0], 135 None if no_keys[ind] else lidx[ind][4], **option) 136 for ind in range(len(lidx))] 137 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 138 return PdUtil.pd_index(dfr) 139 140 @staticmethod 141 def to_json_ntv(value, name=None, typ=None, **kwargs): 142 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 143 144 *Parameters* 145 146 - **typ** : string (default None) - type of the NTV object, 147 - **name** : string (default None) - name of the NTV object 148 - **value** : DataFrame values 149 - **table** : boolean (default False) - if True return TableSchema format''' 150 151 table = kwargs.get('table', False) 152 if not table: 153 df2 = value.reset_index() 154 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 155 for col in df2.columns]).to_obj() 156 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 157 df2 = pd.DataFrame({ NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 158 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 159 value[col]) for col in value.columns}) 160 table_val = json.loads(df2.to_json(orient='table', 161 date_format='iso', default_handler=str)) 162 for name in value.columns: 163 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 164 value[name], table=True, no_val=True) 165 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 166 ntv_name, ntv_type) 167 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 168 169 @staticmethod 170 def to_listidx(dtf): 171 ''' convert a DataFrame in categorical data 172 173 *Return: tuple with:* 174 175 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 176 - **lenght** of the DataFrame''' 177 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf)) 178 179 180class SeriesConnec(NtvConnector): 181 '''NTV connector for pandas Series 182 183 Two static methods are included: 184 185 - to_idx: convert a Series in categorical data 186 - to_series: return a Series from Field data 187 ''' 188 clas_obj = 'Series' 189 clas_typ = 'field' 190 config = configparser.ConfigParser() 191 config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini')) 192 types = pd.DataFrame(json.loads(config['data']['type']), 193 columns=json.loads(config['data']['column'])) 194 astype = json.loads(config['data']['astype']) 195 deftype = {val: key for key, val in astype.items()} 196 config = configparser.ConfigParser() 197 config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini')) 198 table = pd.DataFrame(json.loads(config['data']['mapping']), 199 columns=json.loads(config['data']['column'])) 200 typtab = pd.DataFrame(json.loads(config['data']['type']), 201 columns=json.loads(config['data']['col_type'])) 202 203 @staticmethod 204 def to_obj_ntv(ntv_value, **kwargs): 205 '''Generate a Series Object from a Ntv field object 206 207 *Parameters* 208 209 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 210 211 *parameters (kwargs)* 212 213 - **extkeys**: list (default None) - keys to use if not present in ntv_value 214 - **decode_str**: boolean (default False) - if True, string values are converted 215 in object values 216 - **index**: list (default None) - if present, add the index in Series 217 - **leng**: integer (default None) - leng of the Series (used with single codec value) 218 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 219 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 220 ''' 221 option = {'extkeys': None, 'decode_str': False, 'leng': None, 222 'annotated':False} | kwargs 223 if ntv_value is None: 224 return None 225 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 226 227 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 228 PdUtil.decode_ntv_tab(ntv) 229 if parent and not option['extkeys']: 230 return None 231 if coef: 232 ntv_keys = NtvConnector.keysfromcoef( 233 coef, leng_field//coef, option['leng']) 234 elif option['extkeys'] and parent: 235 ntv_keys = NtvConnector.keysfromderkeys( 236 option['extkeys'], ntv_keys) 237 elif option['extkeys'] and not parent: 238 ntv_keys = option['extkeys'] 239 ntv_codec = Ntv.fast(Ntv.obj_ntv( 240 codec, typ=typ, single=len(codec) == 1)) 241 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 242 243 @staticmethod 244 def to_json_ntv(value, name=None, typ=None, **kwargs): 245 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 246 247 *Parameters* 248 249 - **typ** : string (default None) - type of the NTV object, 250 - **name** : string (default None) - name of the NTV object 251 - **value** : Series values 252 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 253 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 254 255 table = kwargs.get('table', False) 256 no_val = kwargs.get('no_val', False) 257 srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name)) 258 sr_name = srs.name if srs.name else '' 259 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 260 261 if table: 262 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 263 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 264 if no_val: 265 return (ntv_name, ntv_type) 266 return (ntv_value, ntv_name, ntv_type) 267 if srs.dtype.name == 'category': 268 cdc = pd.Series(srs.cat.categories) 269 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 270 cat_value = PdUtil.ntv_val(ntv_type, cdc) 271 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 272 cod_value = list(srs.cat.codes) 273 coef = NtvConnector.encode_coef(cod_value) 274 ntv_value = [cat_value, [coef] if coef else cod_value] 275 ntv_type = 'json' 276 else: 277 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 278 ntv_value = PdUtil.ntv_val(ntv_type, srs) 279 if len(ntv_value) == 1: 280 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 281 SeriesConnec.clas_typ if not typ else typ) 282 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 283 SeriesConnec.clas_typ if not typ else typ) 284 285 @staticmethod 286 def to_idx(ser): 287 ''' convert a Series in categorical data 288 289 *return (dict)* 290 291 { 'codec': 'list of pandas categories', 292 'name': 'name of the series', 293 'keys': 'list of pandas codes' } 294 ''' 295 idx = ser.astype('category') 296 lis = list(idx.cat.categories) 297 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 298 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 299 for ts in lis] 300 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)} 301 302 @staticmethod 303 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 304 ''' return a pd.Series from Field data (codec, name, keys) 305 306 *Parameters* 307 308 - **ntv_codec**: Ntv object - codec value to convert in Series values 309 - **ntv_type**: string - default type to apply to convert in dtype 310 - **ntv_name**: string - name of the Series 311 312 *parameters (kwargs)* 313 314 - **index**: list (default None) - if present, add the index in Series 315 - **leng**: integer (default None) - leng of the Series (used with single codec value) 316 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 317 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 318 ''' 319 option = {'index': None, 'leng': None, 'alias': False, 320 'annotated': False} | kwargs 321 types = SeriesConnec.types.set_index('ntv_type') 322 astype = SeriesConnec.astype 323 leng = option['leng'] 324 325 ntv_type = ntv_codec.type_str 326 len_unique = leng if len(ntv_codec) == 1 and leng else 1 327 pd_convert = ntv_type in types.index 328 329 pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert) 330 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 331 option['annotated'], pd_convert) 332 if ntv_keys: 333 if pd_convert and name_type != 'array': 334 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 335 cat_type = categ.dtype.name 336 categories = categ.astype(astype.get(cat_type, cat_type)) 337 else: 338 categories = pd.Series(ntv_obj, dtype='object') 339 cat = pd.CategoricalDtype(categories=categories) 340 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 341 srs = pd.Series(data, name=pd_name, 342 index=option['index'], dtype='category') 343 else: 344 data = ntv_obj * len_unique 345 if pd_convert: 346 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 347 else: 348 srs = pd.Series(data, name=pd_name, dtype=dtype) 349 350 if option['alias']: 351 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 352 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 353 354 @staticmethod 355 def _from_json(data, dtype, ntv_type, pd_name=None): 356 '''return a Series from a Json data. 357 358 *Parameters* 359 360 - **data**: Json-value - data to convert in a Series 361 - **dtype**: string - dtype of the Series 362 - **ntv_type**: string - default type to apply to convert in dtype 363 - **pd_name**: string - name of the Series including ntv_type 364 365 NTVvalue and a ntv_type''' 366 srs = pd.read_json(json.dumps(data), dtype=dtype, 367 typ='series') 368 if not pd_name is None: 369 srs = srs.rename(pd_name) 370 return PdUtil.convert(ntv_type, srs, to_json=False) 371 372class PdUtil: 373 '''ntv-pandas utilities. 374 375 This class includes static methods: 376 377 Ntv and pandas 378 - **decode_ntv_tab**: Generate a tuple data from a NTVvalue 379 - **ntv_type**: return NTVtype from name_type and dtype of a Series 380 - **convert**: convert Series with external NTVtype 381 - **ntv_val**: convert a simple Series into NTV json-value 382 - **ntv_obj**: return a list of values to convert in a Series 383 - **pd_name**: return a tuple with the name of the Series and the type deduced from the name 384 - **pd_index**: return a DataFrame with index 385 - **unic**: return simple value if the Series contains a single value 386 387 TableSchema 388 - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series 389 - **name_table**: return a list of non index field's names from a json Table 390 - **ntvtype_table**: return a list of non index field's ntv_type from a json Table 391 - **table_schema**: add 'format' and 'type' keys in a Json TableSchema 392 - **table_val**: convert a Series into TableSchema json-value 393 - **ntv_table**: return NTVtype from the TableSchema data 394 ''' 395 @staticmethod 396 def to_obj_table(jsn, **kwargs): 397 ''' convert json TableSchema data into a DataFrame or a Series''' 398 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 399 name = PdUtil.name_table(jsn['schema']['fields']) 400 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 401 for nam, ntvtyp in zip(name, ntv_type)] 402 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 403 for nam, ntvtyp in zip(name, ntv_type)] 404 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 405 dfr = PdUtil.pd_index(dfr) 406 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], 407 to_json=False) for ind, col in enumerate(dfr.columns)}) 408 dfr = dfr.astype({col: pd_dtype[ind] for ind, col in enumerate(dfr.columns)}) 409 dfr.columns = pd_name 410 if len(dfr.columns) == 1: 411 return dfr[dfr.columns[0]] 412 return dfr 413 414 @staticmethod 415 def decode_ntv_tab(field): 416 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 417 418 *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)* 419 420 - name (None or string): name of the Field 421 - dtype (None or string): type of data 422 - codec (list): list of Field codec values 423 - parent (None or int): Field parent or None 424 - keys (None or list): Field keys 425 - coef (None or int): coef if primary Field else None 426 - leng (int): length of the Field 427 ''' 428 ntv = Ntv.obj(field) 429 typ = ntv.type_str if ntv.ntv_type else None 430 nam = ntv.name 431 if isinstance(ntv, NtvSingle): 432 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 433 val = [ntv_val.to_obj() for ntv_val in ntv] 434 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 435 return (nam, typ, val, None, None, None, len(ntv)) 436 437 ntvc = ntv[0] 438 leng = max(len(ind) for ind in ntv) 439 typc = ntvc.type_str if ntvc.ntv_type else None 440 valc = ntvc.to_obj(simpleval=True) 441 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 442 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 443 isinstance(ntv[2][0].val, int): 444 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 445 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 446 return (nam, typc, valc, ntv[1].val, None, None, leng) 447 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 448 leng = leng * ntv[1][0].val 449 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 450 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 451 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 452 return (nam, typ, val, None, None, None, len(ntv)) 453 454 @staticmethod 455 def name_table(fields): 456 '''return a list of non index field's names from a json Table''' 457 names = [field.get('name', None) for field in fields 458 if field.get('name', None) != 'index'] 459 return [ None if name == 'values' else name for name in names] 460 461 @staticmethod 462 def ntvtype_table(fields): 463 '''return a list of non index field's ntv_type from a json Table''' 464 return [PdUtil.ntv_table(field.get('format', 'default'), 465 field.get('type', None)) for field in fields 466 if field.get('name', None) != 'index'] 467 468 @staticmethod 469 def table_schema(schema, name, ntv_type): 470 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 471 for the field defined by 'name' ''' 472 ind = [field['name'] for field in schema['fields']].index(name) 473 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 474 if tabletype['format'] == 'default': 475 schema['fields'][ind].pop('format', None) 476 else: 477 schema['fields'][ind]['format'] = tabletype['format'] 478 schema['fields'][ind]['type'] = tabletype['type'] 479 schema['fields'][ind].pop('extDtype', None) 480 return schema 481 482 @staticmethod 483 def table_val(ntv_type, ntv_name, srs): 484 '''convert a Series into TableSchema json-value. 485 486 *Parameters* 487 488 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 489 - **ntv_name**: string - name of the Series 490 - **srs** : Series to be converted.''' 491 srs = PdUtil.convert(ntv_type, srs) 492 srs.name = ntv_name 493 tab_val = json.loads(srs.to_json(orient='table', 494 date_format='iso', default_handler=str)) 495 name = 'values' if srs.name is None else srs.name 496 tab_val['schema'] = PdUtil.table_schema(tab_val['schema'], name, ntv_type) 497 return tab_val 498 499 @staticmethod 500 def convert(ntv_type, srs, to_json=True): 501 ''' convert Series with external NTVtype. 502 503 *Parameters* 504 505 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 506 - **srs** : Series to be converted. 507 - **to_json** : boolean (default True) - apply to json function''' 508 if to_json: 509 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 510 return srs.apply(ShapelyConnec.to_coord) 511 if ntv_type == 'geojson': 512 return srs.apply(ShapelyConnec.to_geojson) 513 if ntv_type == 'date': 514 return srs.astype(str) 515 return srs 516 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 517 return srs.apply(ShapelyConnec.to_geometry) 518 if ntv_type == 'geojson': 519 return srs.apply(ShapelyConnec.from_geojson) 520 if ntv_type == 'datetime': 521 return pd.to_datetime(srs) 522 if ntv_type == 'date': 523 return pd.to_datetime(srs).dt.date 524 if ntv_type == 'time': 525 return pd.to_datetime(srs).dt.time 526 return srs 527 528 @staticmethod 529 def ntv_type(name_type, dtype, table=False): 530 ''' return NTVtype from name_type and dtype of a Series . 531 532 *Parameters* 533 534 - **name_type** : string - type included in the Series name, 535 - **dtype** : string - dtype of the Series. 536 - **table** : boolean (default False) - True if Table Schema conversion 537 ''' 538 if not name_type: 539 types_none = SeriesConnec.types.set_index('name_type').loc[None] 540 if dtype in types_none.dtype.values: 541 return types_none.set_index('dtype').loc[dtype].ntv_type 542 if not table: 543 return 'json' 544 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 545 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 546 return name_type 547 548 @staticmethod 549 def ntv_val(ntv_type, srs): 550 ''' convert a simple Series into NTV json-value. 551 552 *Parameters* 553 554 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 555 - **srs** : Series to be *converted.''' 556 srs = PdUtil.convert(ntv_type, srs) 557 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 558 return srs.to_list() 559 if srs.dtype.name == 'object': 560 return srs.to_list() 561 return json.loads(srs.to_json(orient='records', 562 date_format='iso', default_handler=str)) 563 564 @staticmethod 565 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 566 '''return a list of values to convert in a Series''' 567 if pd_convert: 568 if name_type == 'array': 569 return ntv_codec.to_obj(format='obj', simpleval=True) 570 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 571 def_type=ntv_codec.type_str, fast=True) 572 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 573 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type) 574 575 @staticmethod 576 def ntv_table(table_format, table_type): 577 ''' return NTVtype from the TableSchema data. 578 579 *Parameters* 580 581 - **table_format** : string - TableSchema format, 582 - **table_type** : string - TableSchema type''' 583 return SeriesConnec.table.set_index(['type', 'format']).loc[ 584 (table_type, table_format)].values[0] 585 586 @staticmethod 587 def pd_index(dfr): 588 '''return a DataFrame with index''' 589 if 'index' in dfr.columns: 590 dfr = dfr.set_index('index') 591 dfr.index.rename(None, inplace=True) 592 return dfr 593 594 @staticmethod 595 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 596 '''return a tuple with the name of the Series, the type deduced from 597 the name and the dtype''' 598 ntv_name = '' if ntv_name is None else ntv_name 599 typtab = SeriesConnec.typtab.set_index('ntv_type') 600 types = SeriesConnec.types.set_index('ntv_type') 601 if table and ntv_type.lower() in typtab.index: 602 name_type = typtab.loc[ntv_type.lower()]['name_type'] 603 dtype = typtab.loc[ntv_type.lower()]['dtype'] 604 elif pd_convert or table: 605 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 606 dtype = types.loc[ntv_type]['dtype'] 607 else: 608 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 609 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 610 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 611 return (pd_name if pd_name else None, name_type, dtype) 612 613 @staticmethod 614 def unic(srs): 615 ''' return simple value if the Series contains a single value''' 616 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
40def to_json(pd_array, **kwargs): 41 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 42 43 *parameters* 44 45 - **pd_array** : Series or Dataframe to convert 46 - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value 47 - **header** : boolean (default: True) - if True the JSON data is included as 48 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 49 - **table** : boolean (default False) - if True return TableSchema format 50 ''' 51 option = {'encoded': False, 'header': True, 'table': False} | kwargs 52 option['header'] = False if option['table'] else option['header'] 53 if isinstance(pd_array, pd.Series): 54 jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0] 55 head = ':field' 56 else: 57 jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0] 58 head = ':tab' 59 if option['header']: 60 jsn = { head: jsn} 61 if option['encoded']: 62 return json.dumps(jsn) 63 return jsn
convert pandas Series or Dataframe to JSON text or JSON Value.
parameters
- pd_array : Series or Dataframe to convert
- encoded : boolean (default: False) - if True return a JSON text else a JSON value
- header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
- table : boolean (default False) - if True return TableSchema format
65def read_json(js, **kwargs): 66 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 67 68 *parameters* 69 70 - **js** : JSON text or JSON value to convert 71 - **extkeys**: list (default None) - keys to use if not present in ntv_value 72 - **decode_str**: boolean (default False) - if True, string values are converted 73 in object values 74 - **leng**: integer (default None) - leng of the Series (used with single codec value) 75 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 76 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 77 - **series**: boolean (default False) - used only without header. If True 78 JSON data is converted into Series else DataFrame 79 ''' 80 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 81 'annotated':False, 'series':False} | kwargs 82 jso = json.loads(js) if isinstance(js, str) else js 83 if 'schema' in jso: 84 return PdUtil.to_obj_table(jso, **option) 85 ntv = Ntv.from_obj(jso) 86 if ntv.type_str == 'field': 87 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 88 if ntv.type_str == 'tab': 89 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 90 if option['series']: 91 return SeriesConnec.to_obj_ntv(ntv, **option) 92 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
convert JSON text or JSON Value to pandas Series or Dataframe.
parameters
- js : JSON text or JSON value to convert
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
- series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
94def as_def_type(pd_array): 95 '''convert a Series or DataFrame with default dtype''' 96 if isinstance(pd_array, pd.Series): 97 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 98 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
convert a Series or DataFrame with default dtype
100class DataFrameConnec(NtvConnector): 101 102 '''NTV connector for pandas DataFrame. 103 104 One static methods is included: 105 106 - to_listidx: convert a DataFrame in categorical data 107 ''' 108 109 clas_obj = 'DataFrame' 110 clas_typ = 'tab' 111 112 @staticmethod 113 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 114 ''' convert json ntv_value into a DataFrame. 115 116 *Parameters* 117 118 - **index** : list (default None) - list of index values, 119 - **alias** : boolean (default False) - if True, alias dtype else default dtype 120 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 121 series = SeriesConnec.to_series 122 123 ntv = Ntv.fast(ntv_value) 124 lidx = [list(PdUtil.decode_ntv_tab(ntvf)) 125 for ntvf in ntv] 126 leng = max([idx[6] for idx in lidx]) 127 option = kwargs | {'leng': leng} 128 no_keys = [] 129 for ind in range(len(lidx)): 130 no_keys.append(not lidx[ind][3] 131 and not lidx[ind][4] and not lidx[ind][5]) 132 NtvConnector.init_ntv_keys(ind, lidx, leng) 133 lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1], 134 single=len(lidx[ind][2]) == 1)) 135 list_series = [series(lidx[ind][2], lidx[ind][0], 136 None if no_keys[ind] else lidx[ind][4], **option) 137 for ind in range(len(lidx))] 138 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 139 return PdUtil.pd_index(dfr) 140 141 @staticmethod 142 def to_json_ntv(value, name=None, typ=None, **kwargs): 143 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 144 145 *Parameters* 146 147 - **typ** : string (default None) - type of the NTV object, 148 - **name** : string (default None) - name of the NTV object 149 - **value** : DataFrame values 150 - **table** : boolean (default False) - if True return TableSchema format''' 151 152 table = kwargs.get('table', False) 153 if not table: 154 df2 = value.reset_index() 155 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 156 for col in df2.columns]).to_obj() 157 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 158 df2 = pd.DataFrame({ NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 159 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 160 value[col]) for col in value.columns}) 161 table_val = json.loads(df2.to_json(orient='table', 162 date_format='iso', default_handler=str)) 163 for name in value.columns: 164 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 165 value[name], table=True, no_val=True) 166 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 167 ntv_name, ntv_type) 168 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ) 169 170 @staticmethod 171 def to_listidx(dtf): 172 ''' convert a DataFrame in categorical data 173 174 *Return: tuple with:* 175 176 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 177 - **lenght** of the DataFrame''' 178 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
NTV connector for pandas DataFrame.
One static methods is included:
- to_listidx: convert a DataFrame in categorical data
112 @staticmethod 113 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 114 ''' convert json ntv_value into a DataFrame. 115 116 *Parameters* 117 118 - **index** : list (default None) - list of index values, 119 - **alias** : boolean (default False) - if True, alias dtype else default dtype 120 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 121 series = SeriesConnec.to_series 122 123 ntv = Ntv.fast(ntv_value) 124 lidx = [list(PdUtil.decode_ntv_tab(ntvf)) 125 for ntvf in ntv] 126 leng = max([idx[6] for idx in lidx]) 127 option = kwargs | {'leng': leng} 128 no_keys = [] 129 for ind in range(len(lidx)): 130 no_keys.append(not lidx[ind][3] 131 and not lidx[ind][4] and not lidx[ind][5]) 132 NtvConnector.init_ntv_keys(ind, lidx, leng) 133 lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1], 134 single=len(lidx[ind][2]) == 1)) 135 list_series = [series(lidx[ind][2], lidx[ind][0], 136 None if no_keys[ind] else lidx[ind][4], **option) 137 for ind in range(len(lidx))] 138 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 139 return PdUtil.pd_index(dfr)
convert json ntv_value into a DataFrame.
Parameters
- index : list (default None) - list of index values,
- alias : boolean (default False) - if True, alias dtype else default dtype
- annotated : boolean (default False) - if True, NTV names are not included.
141 @staticmethod 142 def to_json_ntv(value, name=None, typ=None, **kwargs): 143 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 144 145 *Parameters* 146 147 - **typ** : string (default None) - type of the NTV object, 148 - **name** : string (default None) - name of the NTV object 149 - **value** : DataFrame values 150 - **table** : boolean (default False) - if True return TableSchema format''' 151 152 table = kwargs.get('table', False) 153 if not table: 154 df2 = value.reset_index() 155 jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0] 156 for col in df2.columns]).to_obj() 157 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 158 df2 = pd.DataFrame({ NtvUtil.from_obj_name(col)[0]: PdUtil.convert( 159 SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1], 160 value[col]) for col in value.columns}) 161 table_val = json.loads(df2.to_json(orient='table', 162 date_format='iso', default_handler=str)) 163 for name in value.columns: 164 ntv_name, ntv_type = SeriesConnec.to_json_ntv( 165 value[name], table=True, no_val=True) 166 table_val['schema'] = PdUtil.table_schema(table_val['schema'], 167 ntv_name, ntv_type) 168 return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : DataFrame values
- table : boolean (default False) - if True return TableSchema format
170 @staticmethod 171 def to_listidx(dtf): 172 ''' convert a DataFrame in categorical data 173 174 *Return: tuple with:* 175 176 - **list** of dict (keys : 'codec', 'name, 'keys') for each column 177 - **lenght** of the DataFrame''' 178 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
convert a DataFrame in categorical data
Return: tuple with:
- list of dict (keys : 'codec', 'name, 'keys') for each column
- lenght of the DataFrame
Inherited Members
- json_ntv.ntv_util.NtvConnector
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- init_ntv_keys
181class SeriesConnec(NtvConnector): 182 '''NTV connector for pandas Series 183 184 Two static methods are included: 185 186 - to_idx: convert a Series in categorical data 187 - to_series: return a Series from Field data 188 ''' 189 clas_obj = 'Series' 190 clas_typ = 'field' 191 config = configparser.ConfigParser() 192 config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini')) 193 types = pd.DataFrame(json.loads(config['data']['type']), 194 columns=json.loads(config['data']['column'])) 195 astype = json.loads(config['data']['astype']) 196 deftype = {val: key for key, val in astype.items()} 197 config = configparser.ConfigParser() 198 config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini')) 199 table = pd.DataFrame(json.loads(config['data']['mapping']), 200 columns=json.loads(config['data']['column'])) 201 typtab = pd.DataFrame(json.loads(config['data']['type']), 202 columns=json.loads(config['data']['col_type'])) 203 204 @staticmethod 205 def to_obj_ntv(ntv_value, **kwargs): 206 '''Generate a Series Object from a Ntv field object 207 208 *Parameters* 209 210 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 211 212 *parameters (kwargs)* 213 214 - **extkeys**: list (default None) - keys to use if not present in ntv_value 215 - **decode_str**: boolean (default False) - if True, string values are converted 216 in object values 217 - **index**: list (default None) - if present, add the index in Series 218 - **leng**: integer (default None) - leng of the Series (used with single codec value) 219 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 220 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 221 ''' 222 option = {'extkeys': None, 'decode_str': False, 'leng': None, 223 'annotated':False} | kwargs 224 if ntv_value is None: 225 return None 226 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 227 228 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 229 PdUtil.decode_ntv_tab(ntv) 230 if parent and not option['extkeys']: 231 return None 232 if coef: 233 ntv_keys = NtvConnector.keysfromcoef( 234 coef, leng_field//coef, option['leng']) 235 elif option['extkeys'] and parent: 236 ntv_keys = NtvConnector.keysfromderkeys( 237 option['extkeys'], ntv_keys) 238 elif option['extkeys'] and not parent: 239 ntv_keys = option['extkeys'] 240 ntv_codec = Ntv.fast(Ntv.obj_ntv( 241 codec, typ=typ, single=len(codec) == 1)) 242 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option) 243 244 @staticmethod 245 def to_json_ntv(value, name=None, typ=None, **kwargs): 246 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 247 248 *Parameters* 249 250 - **typ** : string (default None) - type of the NTV object, 251 - **name** : string (default None) - name of the NTV object 252 - **value** : Series values 253 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 254 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 255 256 table = kwargs.get('table', False) 257 no_val = kwargs.get('no_val', False) 258 srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name)) 259 sr_name = srs.name if srs.name else '' 260 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 261 262 if table: 263 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 264 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 265 if no_val: 266 return (ntv_name, ntv_type) 267 return (ntv_value, ntv_name, ntv_type) 268 if srs.dtype.name == 'category': 269 cdc = pd.Series(srs.cat.categories) 270 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 271 cat_value = PdUtil.ntv_val(ntv_type, cdc) 272 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 273 cod_value = list(srs.cat.codes) 274 coef = NtvConnector.encode_coef(cod_value) 275 ntv_value = [cat_value, [coef] if coef else cod_value] 276 ntv_type = 'json' 277 else: 278 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 279 ntv_value = PdUtil.ntv_val(ntv_type, srs) 280 if len(ntv_value) == 1: 281 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 282 SeriesConnec.clas_typ if not typ else typ) 283 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 284 SeriesConnec.clas_typ if not typ else typ) 285 286 @staticmethod 287 def to_idx(ser): 288 ''' convert a Series in categorical data 289 290 *return (dict)* 291 292 { 'codec': 'list of pandas categories', 293 'name': 'name of the series', 294 'keys': 'list of pandas codes' } 295 ''' 296 idx = ser.astype('category') 297 lis = list(idx.cat.categories) 298 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 299 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 300 for ts in lis] 301 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)} 302 303 @staticmethod 304 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 305 ''' return a pd.Series from Field data (codec, name, keys) 306 307 *Parameters* 308 309 - **ntv_codec**: Ntv object - codec value to convert in Series values 310 - **ntv_type**: string - default type to apply to convert in dtype 311 - **ntv_name**: string - name of the Series 312 313 *parameters (kwargs)* 314 315 - **index**: list (default None) - if present, add the index in Series 316 - **leng**: integer (default None) - leng of the Series (used with single codec value) 317 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 318 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 319 ''' 320 option = {'index': None, 'leng': None, 'alias': False, 321 'annotated': False} | kwargs 322 types = SeriesConnec.types.set_index('ntv_type') 323 astype = SeriesConnec.astype 324 leng = option['leng'] 325 326 ntv_type = ntv_codec.type_str 327 len_unique = leng if len(ntv_codec) == 1 and leng else 1 328 pd_convert = ntv_type in types.index 329 330 pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert) 331 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 332 option['annotated'], pd_convert) 333 if ntv_keys: 334 if pd_convert and name_type != 'array': 335 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 336 cat_type = categ.dtype.name 337 categories = categ.astype(astype.get(cat_type, cat_type)) 338 else: 339 categories = pd.Series(ntv_obj, dtype='object') 340 cat = pd.CategoricalDtype(categories=categories) 341 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 342 srs = pd.Series(data, name=pd_name, 343 index=option['index'], dtype='category') 344 else: 345 data = ntv_obj * len_unique 346 if pd_convert: 347 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 348 else: 349 srs = pd.Series(data, name=pd_name, dtype=dtype) 350 351 if option['alias']: 352 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 353 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 354 355 @staticmethod 356 def _from_json(data, dtype, ntv_type, pd_name=None): 357 '''return a Series from a Json data. 358 359 *Parameters* 360 361 - **data**: Json-value - data to convert in a Series 362 - **dtype**: string - dtype of the Series 363 - **ntv_type**: string - default type to apply to convert in dtype 364 - **pd_name**: string - name of the Series including ntv_type 365 366 NTVvalue and a ntv_type''' 367 srs = pd.read_json(json.dumps(data), dtype=dtype, 368 typ='series') 369 if not pd_name is None: 370 srs = srs.rename(pd_name) 371 return PdUtil.convert(ntv_type, srs, to_json=False)
NTV connector for pandas Series
Two static methods are included:
- to_idx: convert a Series in categorical data
- to_series: return a Series from Field data
204 @staticmethod 205 def to_obj_ntv(ntv_value, **kwargs): 206 '''Generate a Series Object from a Ntv field object 207 208 *Parameters* 209 210 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 211 212 *parameters (kwargs)* 213 214 - **extkeys**: list (default None) - keys to use if not present in ntv_value 215 - **decode_str**: boolean (default False) - if True, string values are converted 216 in object values 217 - **index**: list (default None) - if present, add the index in Series 218 - **leng**: integer (default None) - leng of the Series (used with single codec value) 219 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 220 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 221 ''' 222 option = {'extkeys': None, 'decode_str': False, 'leng': None, 223 'annotated':False} | kwargs 224 if ntv_value is None: 225 return None 226 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 227 228 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 229 PdUtil.decode_ntv_tab(ntv) 230 if parent and not option['extkeys']: 231 return None 232 if coef: 233 ntv_keys = NtvConnector.keysfromcoef( 234 coef, leng_field//coef, option['leng']) 235 elif option['extkeys'] and parent: 236 ntv_keys = NtvConnector.keysfromderkeys( 237 option['extkeys'], ntv_keys) 238 elif option['extkeys'] and not parent: 239 ntv_keys = option['extkeys'] 240 ntv_codec = Ntv.fast(Ntv.obj_ntv( 241 codec, typ=typ, single=len(codec) == 1)) 242 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
Generate a Series Object from a Ntv field object
Parameters
- ntv_value: Ntv object or Ntv value - value to convert in Series
parameters (kwargs)
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
244 @staticmethod 245 def to_json_ntv(value, name=None, typ=None, **kwargs): 246 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 247 248 *Parameters* 249 250 - **typ** : string (default None) - type of the NTV object, 251 - **name** : string (default None) - name of the NTV object 252 - **value** : Series values 253 - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type) 254 - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)''' 255 256 table = kwargs.get('table', False) 257 no_val = kwargs.get('no_val', False) 258 srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name)) 259 sr_name = srs.name if srs.name else '' 260 ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2] 261 262 if table: 263 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True) 264 ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs) 265 if no_val: 266 return (ntv_name, ntv_type) 267 return (ntv_value, ntv_name, ntv_type) 268 if srs.dtype.name == 'category': 269 cdc = pd.Series(srs.cat.categories) 270 ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name) 271 cat_value = PdUtil.ntv_val(ntv_type, cdc) 272 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 273 cod_value = list(srs.cat.codes) 274 coef = NtvConnector.encode_coef(cod_value) 275 ntv_value = [cat_value, [coef] if coef else cod_value] 276 ntv_type = 'json' 277 else: 278 ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name) 279 ntv_value = PdUtil.ntv_val(ntv_type, srs) 280 if len(ntv_value) == 1: 281 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 282 SeriesConnec.clas_typ if not typ else typ) 283 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 284 SeriesConnec.clas_typ if not typ else typ)
convert a Series (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : Series values
- table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
- no_val : boolean (default False) - if True return (ntv_name, ntv_type)
286 @staticmethod 287 def to_idx(ser): 288 ''' convert a Series in categorical data 289 290 *return (dict)* 291 292 { 'codec': 'list of pandas categories', 293 'name': 'name of the series', 294 'keys': 'list of pandas codes' } 295 ''' 296 idx = ser.astype('category') 297 lis = list(idx.cat.categories) 298 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 299 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 300 for ts in lis] 301 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
convert a Series in categorical data
return (dict)
{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }
303 @staticmethod 304 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 305 ''' return a pd.Series from Field data (codec, name, keys) 306 307 *Parameters* 308 309 - **ntv_codec**: Ntv object - codec value to convert in Series values 310 - **ntv_type**: string - default type to apply to convert in dtype 311 - **ntv_name**: string - name of the Series 312 313 *parameters (kwargs)* 314 315 - **index**: list (default None) - if present, add the index in Series 316 - **leng**: integer (default None) - leng of the Series (used with single codec value) 317 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 318 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 319 ''' 320 option = {'index': None, 'leng': None, 'alias': False, 321 'annotated': False} | kwargs 322 types = SeriesConnec.types.set_index('ntv_type') 323 astype = SeriesConnec.astype 324 leng = option['leng'] 325 326 ntv_type = ntv_codec.type_str 327 len_unique = leng if len(ntv_codec) == 1 and leng else 1 328 pd_convert = ntv_type in types.index 329 330 pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert) 331 ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 332 option['annotated'], pd_convert) 333 if ntv_keys: 334 if pd_convert and name_type != 'array': 335 categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type) 336 cat_type = categ.dtype.name 337 categories = categ.astype(astype.get(cat_type, cat_type)) 338 else: 339 categories = pd.Series(ntv_obj, dtype='object') 340 cat = pd.CategoricalDtype(categories=categories) 341 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 342 srs = pd.Series(data, name=pd_name, 343 index=option['index'], dtype='category') 344 else: 345 data = ntv_obj * len_unique 346 if pd_convert: 347 srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name) 348 else: 349 srs = pd.Series(data, name=pd_name, dtype=dtype) 350 351 if option['alias']: 352 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 353 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
return a pd.Series from Field data (codec, name, keys)
Parameters
- ntv_codec: Ntv object - codec value to convert in Series values
- ntv_type: string - default type to apply to convert in dtype
- ntv_name: string - name of the Series
parameters (kwargs)
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
Inherited Members
- json_ntv.ntv_util.NtvConnector
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- init_ntv_keys
373class PdUtil: 374 '''ntv-pandas utilities. 375 376 This class includes static methods: 377 378 Ntv and pandas 379 - **decode_ntv_tab**: Generate a tuple data from a NTVvalue 380 - **ntv_type**: return NTVtype from name_type and dtype of a Series 381 - **convert**: convert Series with external NTVtype 382 - **ntv_val**: convert a simple Series into NTV json-value 383 - **ntv_obj**: return a list of values to convert in a Series 384 - **pd_name**: return a tuple with the name of the Series and the type deduced from the name 385 - **pd_index**: return a DataFrame with index 386 - **unic**: return simple value if the Series contains a single value 387 388 TableSchema 389 - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series 390 - **name_table**: return a list of non index field's names from a json Table 391 - **ntvtype_table**: return a list of non index field's ntv_type from a json Table 392 - **table_schema**: add 'format' and 'type' keys in a Json TableSchema 393 - **table_val**: convert a Series into TableSchema json-value 394 - **ntv_table**: return NTVtype from the TableSchema data 395 ''' 396 @staticmethod 397 def to_obj_table(jsn, **kwargs): 398 ''' convert json TableSchema data into a DataFrame or a Series''' 399 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 400 name = PdUtil.name_table(jsn['schema']['fields']) 401 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 402 for nam, ntvtyp in zip(name, ntv_type)] 403 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 404 for nam, ntvtyp in zip(name, ntv_type)] 405 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 406 dfr = PdUtil.pd_index(dfr) 407 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], 408 to_json=False) for ind, col in enumerate(dfr.columns)}) 409 dfr = dfr.astype({col: pd_dtype[ind] for ind, col in enumerate(dfr.columns)}) 410 dfr.columns = pd_name 411 if len(dfr.columns) == 1: 412 return dfr[dfr.columns[0]] 413 return dfr 414 415 @staticmethod 416 def decode_ntv_tab(field): 417 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 418 419 *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)* 420 421 - name (None or string): name of the Field 422 - dtype (None or string): type of data 423 - codec (list): list of Field codec values 424 - parent (None or int): Field parent or None 425 - keys (None or list): Field keys 426 - coef (None or int): coef if primary Field else None 427 - leng (int): length of the Field 428 ''' 429 ntv = Ntv.obj(field) 430 typ = ntv.type_str if ntv.ntv_type else None 431 nam = ntv.name 432 if isinstance(ntv, NtvSingle): 433 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 434 val = [ntv_val.to_obj() for ntv_val in ntv] 435 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 436 return (nam, typ, val, None, None, None, len(ntv)) 437 438 ntvc = ntv[0] 439 leng = max(len(ind) for ind in ntv) 440 typc = ntvc.type_str if ntvc.ntv_type else None 441 valc = ntvc.to_obj(simpleval=True) 442 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 443 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 444 isinstance(ntv[2][0].val, int): 445 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 446 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 447 return (nam, typc, valc, ntv[1].val, None, None, leng) 448 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 449 leng = leng * ntv[1][0].val 450 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 451 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 452 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 453 return (nam, typ, val, None, None, None, len(ntv)) 454 455 @staticmethod 456 def name_table(fields): 457 '''return a list of non index field's names from a json Table''' 458 names = [field.get('name', None) for field in fields 459 if field.get('name', None) != 'index'] 460 return [ None if name == 'values' else name for name in names] 461 462 @staticmethod 463 def ntvtype_table(fields): 464 '''return a list of non index field's ntv_type from a json Table''' 465 return [PdUtil.ntv_table(field.get('format', 'default'), 466 field.get('type', None)) for field in fields 467 if field.get('name', None) != 'index'] 468 469 @staticmethod 470 def table_schema(schema, name, ntv_type): 471 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 472 for the field defined by 'name' ''' 473 ind = [field['name'] for field in schema['fields']].index(name) 474 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 475 if tabletype['format'] == 'default': 476 schema['fields'][ind].pop('format', None) 477 else: 478 schema['fields'][ind]['format'] = tabletype['format'] 479 schema['fields'][ind]['type'] = tabletype['type'] 480 schema['fields'][ind].pop('extDtype', None) 481 return schema 482 483 @staticmethod 484 def table_val(ntv_type, ntv_name, srs): 485 '''convert a Series into TableSchema json-value. 486 487 *Parameters* 488 489 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 490 - **ntv_name**: string - name of the Series 491 - **srs** : Series to be converted.''' 492 srs = PdUtil.convert(ntv_type, srs) 493 srs.name = ntv_name 494 tab_val = json.loads(srs.to_json(orient='table', 495 date_format='iso', default_handler=str)) 496 name = 'values' if srs.name is None else srs.name 497 tab_val['schema'] = PdUtil.table_schema(tab_val['schema'], name, ntv_type) 498 return tab_val 499 500 @staticmethod 501 def convert(ntv_type, srs, to_json=True): 502 ''' convert Series with external NTVtype. 503 504 *Parameters* 505 506 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 507 - **srs** : Series to be converted. 508 - **to_json** : boolean (default True) - apply to json function''' 509 if to_json: 510 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 511 return srs.apply(ShapelyConnec.to_coord) 512 if ntv_type == 'geojson': 513 return srs.apply(ShapelyConnec.to_geojson) 514 if ntv_type == 'date': 515 return srs.astype(str) 516 return srs 517 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 518 return srs.apply(ShapelyConnec.to_geometry) 519 if ntv_type == 'geojson': 520 return srs.apply(ShapelyConnec.from_geojson) 521 if ntv_type == 'datetime': 522 return pd.to_datetime(srs) 523 if ntv_type == 'date': 524 return pd.to_datetime(srs).dt.date 525 if ntv_type == 'time': 526 return pd.to_datetime(srs).dt.time 527 return srs 528 529 @staticmethod 530 def ntv_type(name_type, dtype, table=False): 531 ''' return NTVtype from name_type and dtype of a Series . 532 533 *Parameters* 534 535 - **name_type** : string - type included in the Series name, 536 - **dtype** : string - dtype of the Series. 537 - **table** : boolean (default False) - True if Table Schema conversion 538 ''' 539 if not name_type: 540 types_none = SeriesConnec.types.set_index('name_type').loc[None] 541 if dtype in types_none.dtype.values: 542 return types_none.set_index('dtype').loc[dtype].ntv_type 543 if not table: 544 return 'json' 545 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 546 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 547 return name_type 548 549 @staticmethod 550 def ntv_val(ntv_type, srs): 551 ''' convert a simple Series into NTV json-value. 552 553 *Parameters* 554 555 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 556 - **srs** : Series to be *converted.''' 557 srs = PdUtil.convert(ntv_type, srs) 558 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 559 return srs.to_list() 560 if srs.dtype.name == 'object': 561 return srs.to_list() 562 return json.loads(srs.to_json(orient='records', 563 date_format='iso', default_handler=str)) 564 565 @staticmethod 566 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 567 '''return a list of values to convert in a Series''' 568 if pd_convert: 569 if name_type == 'array': 570 return ntv_codec.to_obj(format='obj', simpleval=True) 571 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 572 def_type=ntv_codec.type_str, fast=True) 573 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 574 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type) 575 576 @staticmethod 577 def ntv_table(table_format, table_type): 578 ''' return NTVtype from the TableSchema data. 579 580 *Parameters* 581 582 - **table_format** : string - TableSchema format, 583 - **table_type** : string - TableSchema type''' 584 return SeriesConnec.table.set_index(['type', 'format']).loc[ 585 (table_type, table_format)].values[0] 586 587 @staticmethod 588 def pd_index(dfr): 589 '''return a DataFrame with index''' 590 if 'index' in dfr.columns: 591 dfr = dfr.set_index('index') 592 dfr.index.rename(None, inplace=True) 593 return dfr 594 595 @staticmethod 596 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 597 '''return a tuple with the name of the Series, the type deduced from 598 the name and the dtype''' 599 ntv_name = '' if ntv_name is None else ntv_name 600 typtab = SeriesConnec.typtab.set_index('ntv_type') 601 types = SeriesConnec.types.set_index('ntv_type') 602 if table and ntv_type.lower() in typtab.index: 603 name_type = typtab.loc[ntv_type.lower()]['name_type'] 604 dtype = typtab.loc[ntv_type.lower()]['dtype'] 605 elif pd_convert or table: 606 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 607 dtype = types.loc[ntv_type]['dtype'] 608 else: 609 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 610 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 611 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 612 return (pd_name if pd_name else None, name_type, dtype) 613 614 @staticmethod 615 def unic(srs): 616 ''' return simple value if the Series contains a single value''' 617 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
ntv-pandas utilities.
This class includes static methods:
Ntv and pandas
- decode_ntv_tab: Generate a tuple data from a NTVvalue
- ntv_type: return NTVtype from name_type and dtype of a Series
- convert: convert Series with external NTVtype
- ntv_val: convert a simple Series into NTV json-value
- ntv_obj: return a list of values to convert in a Series
- pd_name: return a tuple with the name of the Series and the type deduced from the name
- pd_index: return a DataFrame with index
- unic: return simple value if the Series contains a single value
TableSchema
- to_obj_table: convert json TableSchema data into a DataFrame or a Series
- name_table: return a list of non index field's names from a json Table
- ntvtype_table: return a list of non index field's ntv_type from a json Table
- table_schema: add 'format' and 'type' keys in a Json TableSchema
- table_val: convert a Series into TableSchema json-value
- ntv_table: return NTVtype from the TableSchema data
396 @staticmethod 397 def to_obj_table(jsn, **kwargs): 398 ''' convert json TableSchema data into a DataFrame or a Series''' 399 ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields']) 400 name = PdUtil.name_table(jsn['schema']['fields']) 401 pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 402 for nam, ntvtyp in zip(name, ntv_type)] 403 pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 404 for nam, ntvtyp in zip(name, ntv_type)] 405 dfr = pd.read_json(json.dumps(jsn['data']), orient='record') 406 dfr = PdUtil.pd_index(dfr) 407 dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], 408 to_json=False) for ind, col in enumerate(dfr.columns)}) 409 dfr = dfr.astype({col: pd_dtype[ind] for ind, col in enumerate(dfr.columns)}) 410 dfr.columns = pd_name 411 if len(dfr.columns) == 1: 412 return dfr[dfr.columns[0]] 413 return dfr
convert json TableSchema data into a DataFrame or a Series
415 @staticmethod 416 def decode_ntv_tab(field): 417 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 418 419 *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)* 420 421 - name (None or string): name of the Field 422 - dtype (None or string): type of data 423 - codec (list): list of Field codec values 424 - parent (None or int): Field parent or None 425 - keys (None or list): Field keys 426 - coef (None or int): coef if primary Field else None 427 - leng (int): length of the Field 428 ''' 429 ntv = Ntv.obj(field) 430 typ = ntv.type_str if ntv.ntv_type else None 431 nam = ntv.name 432 if isinstance(ntv, NtvSingle): 433 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 434 val = [ntv_val.to_obj() for ntv_val in ntv] 435 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 436 return (nam, typ, val, None, None, None, len(ntv)) 437 438 ntvc = ntv[0] 439 leng = max(len(ind) for ind in ntv) 440 typc = ntvc.type_str if ntvc.ntv_type else None 441 valc = ntvc.to_obj(simpleval=True) 442 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 443 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 444 isinstance(ntv[2][0].val, int): 445 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 446 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 447 return (nam, typc, valc, ntv[1].val, None, None, leng) 448 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 449 leng = leng * ntv[1][0].val 450 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 451 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 452 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 453 return (nam, typ, val, None, None, None, len(ntv))
Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
Returns tuple: (name, dtype, codec, parent, keys, coef, leng)
- name (None or string): name of the Field
- dtype (None or string): type of data
- codec (list): list of Field codec values
- parent (None or int): Field parent or None
- keys (None or list): Field keys
- coef (None or int): coef if primary Field else None
- leng (int): length of the Field
455 @staticmethod 456 def name_table(fields): 457 '''return a list of non index field's names from a json Table''' 458 names = [field.get('name', None) for field in fields 459 if field.get('name', None) != 'index'] 460 return [ None if name == 'values' else name for name in names]
return a list of non index field's names from a json Table
462 @staticmethod 463 def ntvtype_table(fields): 464 '''return a list of non index field's ntv_type from a json Table''' 465 return [PdUtil.ntv_table(field.get('format', 'default'), 466 field.get('type', None)) for field in fields 467 if field.get('name', None) != 'index']
return a list of non index field's ntv_type from a json Table
469 @staticmethod 470 def table_schema(schema, name, ntv_type): 471 '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 472 for the field defined by 'name' ''' 473 ind = [field['name'] for field in schema['fields']].index(name) 474 tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type] 475 if tabletype['format'] == 'default': 476 schema['fields'][ind].pop('format', None) 477 else: 478 schema['fields'][ind]['format'] = tabletype['format'] 479 schema['fields'][ind]['type'] = tabletype['type'] 480 schema['fields'][ind].pop('extDtype', None) 481 return schema
convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema for the field defined by 'name'
483 @staticmethod 484 def table_val(ntv_type, ntv_name, srs): 485 '''convert a Series into TableSchema json-value. 486 487 *Parameters* 488 489 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 490 - **ntv_name**: string - name of the Series 491 - **srs** : Series to be converted.''' 492 srs = PdUtil.convert(ntv_type, srs) 493 srs.name = ntv_name 494 tab_val = json.loads(srs.to_json(orient='table', 495 date_format='iso', default_handler=str)) 496 name = 'values' if srs.name is None else srs.name 497 tab_val['schema'] = PdUtil.table_schema(tab_val['schema'], name, ntv_type) 498 return tab_val
convert a Series into TableSchema json-value.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- ntv_name: string - name of the Series
- srs : Series to be converted.
500 @staticmethod 501 def convert(ntv_type, srs, to_json=True): 502 ''' convert Series with external NTVtype. 503 504 *Parameters* 505 506 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 507 - **srs** : Series to be converted. 508 - **to_json** : boolean (default True) - apply to json function''' 509 if to_json: 510 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 511 return srs.apply(ShapelyConnec.to_coord) 512 if ntv_type == 'geojson': 513 return srs.apply(ShapelyConnec.to_geojson) 514 if ntv_type == 'date': 515 return srs.astype(str) 516 return srs 517 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 518 return srs.apply(ShapelyConnec.to_geometry) 519 if ntv_type == 'geojson': 520 return srs.apply(ShapelyConnec.from_geojson) 521 if ntv_type == 'datetime': 522 return pd.to_datetime(srs) 523 if ntv_type == 'date': 524 return pd.to_datetime(srs).dt.date 525 if ntv_type == 'time': 526 return pd.to_datetime(srs).dt.time 527 return srs
convert Series with external NTVtype.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- srs : Series to be converted.
- to_json : boolean (default True) - apply to json function
529 @staticmethod 530 def ntv_type(name_type, dtype, table=False): 531 ''' return NTVtype from name_type and dtype of a Series . 532 533 *Parameters* 534 535 - **name_type** : string - type included in the Series name, 536 - **dtype** : string - dtype of the Series. 537 - **table** : boolean (default False) - True if Table Schema conversion 538 ''' 539 if not name_type: 540 types_none = SeriesConnec.types.set_index('name_type').loc[None] 541 if dtype in types_none.dtype.values: 542 return types_none.set_index('dtype').loc[dtype].ntv_type 543 if not table: 544 return 'json' 545 typtab = SeriesConnec.typtab.set_index('name_type').loc[None] 546 return typtab.set_index('dtype').loc[dtype.lower()].ntv_type 547 return name_type
return NTVtype from name_type and dtype of a Series .
Parameters
- name_type : string - type included in the Series name,
- dtype : string - dtype of the Series.
- table : boolean (default False) - True if Table Schema conversion
549 @staticmethod 550 def ntv_val(ntv_type, srs): 551 ''' convert a simple Series into NTV json-value. 552 553 *Parameters* 554 555 - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype, 556 - **srs** : Series to be *converted.''' 557 srs = PdUtil.convert(ntv_type, srs) 558 if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']: 559 return srs.to_list() 560 if srs.dtype.name == 'object': 561 return srs.to_list() 562 return json.loads(srs.to_json(orient='records', 563 date_format='iso', default_handler=str))
convert a simple Series into NTV json-value.
Parameters
- ntv_type : string - NTVtype deduced from the Series name_type and dtype,
- srs : Series to be *converted.
565 @staticmethod 566 def ntv_obj(ntv_codec, name_type, annotated, pd_convert): 567 '''return a list of values to convert in a Series''' 568 if pd_convert: 569 if name_type == 'array': 570 return ntv_codec.to_obj(format='obj', simpleval=True) 571 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 572 def_type=ntv_codec.type_str, fast=True) 573 return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 574 return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
return a list of values to convert in a Series
576 @staticmethod 577 def ntv_table(table_format, table_type): 578 ''' return NTVtype from the TableSchema data. 579 580 *Parameters* 581 582 - **table_format** : string - TableSchema format, 583 - **table_type** : string - TableSchema type''' 584 return SeriesConnec.table.set_index(['type', 'format']).loc[ 585 (table_type, table_format)].values[0]
return NTVtype from the TableSchema data.
Parameters
- table_format : string - TableSchema format,
- table_type : string - TableSchema type
587 @staticmethod 588 def pd_index(dfr): 589 '''return a DataFrame with index''' 590 if 'index' in dfr.columns: 591 dfr = dfr.set_index('index') 592 dfr.index.rename(None, inplace=True) 593 return dfr
return a DataFrame with index
595 @staticmethod 596 def pd_name(ntv_name, ntv_type, pd_convert=True, table=False): 597 '''return a tuple with the name of the Series, the type deduced from 598 the name and the dtype''' 599 ntv_name = '' if ntv_name is None else ntv_name 600 typtab = SeriesConnec.typtab.set_index('ntv_type') 601 types = SeriesConnec.types.set_index('ntv_type') 602 if table and ntv_type.lower() in typtab.index: 603 name_type = typtab.loc[ntv_type.lower()]['name_type'] 604 dtype = typtab.loc[ntv_type.lower()]['dtype'] 605 elif pd_convert or table: 606 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 607 dtype = types.loc[ntv_type]['dtype'] 608 else: 609 return (ntv_name + '::' + ntv_type, ntv_type, 'object') 610 dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout 611 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 612 return (pd_name if pd_name else None, name_type, dtype)
return a tuple with the name of the Series, the type deduced from the name and the dtype