ntv-pandas.ntv_pandas.pandas_ntv_connector
Created on Feb 27 2023
@author: Philippe@loco-labs.io
The pandas_ntv_connector
module is part of the ntv-pandas.ntv_pandas
package
(specification document).
A NtvConnector is defined by:
- clas_obj: str - define the class name of the object to convert
- clas_typ: str - define the NTVtype of the converted object
- to_obj_ntv: method - converter from JsonNTV to the object
- to_json_ntv: method - converter from the object to JsonNTV
It contains :
functions
read_json
andto_json
to convert JSON data and pandas entitiesthe child classes of
NTV.json_ntv.ntv.NtvConnector
abstract class:DataFrameConnec
: 'tab' connectorSeriesConnec
: 'field' connector
1# -*- coding: utf-8 -*- 2""" 3Created on Feb 27 2023 4 5@author: Philippe@loco-labs.io 6 7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package 8([specification document]( 9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)). 10 11A NtvConnector is defined by: 12- clas_obj: str - define the class name of the object to convert 13- clas_typ: str - define the NTVtype of the converted object 14- to_obj_ntv: method - converter from JsonNTV to the object 15- to_json_ntv: method - converter from the object to JsonNTV 16 17It contains : 18 19- functions `read_json` and `to_json` to convert JSON data and pandas entities 20 21- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class: 22 - `DataFrameConnec`: 'tab' connector 23 - `SeriesConnec`: 'field' connector 24 25""" 26import datetime 27import json 28import configparser 29from pathlib import Path 30import pandas as pd 31import numpy as np 32 33import ntv_pandas 34from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle 35from json_ntv.ntv_connector import ShapelyConnec 36 37def to_json(pd_array, **kwargs): 38 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 39 40 *parameters* 41 42 - **pd_array** : Series or Dataframe to convert 43 - **text** : boolean (default: False) - if True return a JSON text else a JSON value 44 - **header** : boolean (default: True) - if True the JSON data is included as 45 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 46 ''' 47 option = {'text': False, 'header': True} | kwargs 48 if isinstance(pd_array, pd.Series): 49 jsn = SeriesConnec.to_json_ntv(pd_array)[0] 50 head = ':field' 51 else: 52 jsn = DataFrameConnec.to_json_ntv(pd_array)[0] 53 head = ':tab' 54 if option['header']: 55 jsn = { head: jsn} 56 if option['text']: 57 return json.dumps(jsn) 58 return jsn 59 60def read_json(js, **kwargs): 61 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 62 63 *parameters* 64 65 - **js** : JSON text or JSON value to convert 66 - **extkeys**: list (default None) - keys to use if not present in ntv_value 67 - **decode_str**: boolean (default False) - if True, string values are converted 68 in object values 69 - **leng**: integer (default None) - leng of the Series (used with single codec value) 70 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 71 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 72 - **series**: boolean (default False) - used only without header. If True 73 JSON data is converted into Series else DataFrame 74 ''' 75 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 76 'annotated':False, 'series':False} | kwargs 77 jso = json.loads(js) if isinstance(js, str) else js 78 ntv = Ntv.from_obj(jso) 79 if ntv.type_str == 'field': 80 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 81 if ntv.type_str == 'tab': 82 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 83 if option['series']: 84 return SeriesConnec.to_obj_ntv(ntv, **option) 85 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 86 87def as_def_type(pd_array): 88 '''convert a Series or DataFrame with default dtype''' 89 if isinstance(pd_array, pd.Series): 90 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 91 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns}) 92 93class DataFrameConnec(NtvConnector): 94 '''NTV connector for pandas DataFrame. 95 96 Two static methods are included: 97 98 - to_listidx: convert a DataFrame in categorical data 99 - decode_ntv_tab: Generate a tuple data from a NTVvalue 100 ''' 101 102 clas_obj = 'DataFrame' 103 clas_typ = 'tab' 104 105 @staticmethod 106 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 107 ''' convert json ntv_value into a DataFrame. 108 109 *Parameters* 110 111 - **index** : list (default None) - list of index values, 112 - **alias** : boolean (default False) - if True, alias dtype else default dtype 113 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 114 series = SeriesConnec.to_series 115 116 ntv = Ntv.fast(ntv_value) 117 lidx = [list(DataFrameConnec.decode_ntv_tab(ntvf)) 118 for ntvf in ntv] 119 leng = max([idx[6] for idx in lidx]) 120 option = kwargs | {'leng': leng} 121 no_keys = [] 122 for ind in range(len(lidx)): 123 no_keys.append(not lidx[ind][3] 124 and not lidx[ind][4] and not lidx[ind][5]) 125 NtvConnector.init_ntv_keys(ind, lidx, leng) 126 lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1], 127 single=len(lidx[ind][2]) == 1)) 128 list_series = [series(lidx[ind][2], lidx[ind][0], 129 None if no_keys[ind] else lidx[ind][4], **option) 130 for ind in range(len(lidx))] 131 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 132 if 'index' in dfr.columns: 133 dfr = dfr.set_index('index') 134 dfr.index.rename(None, inplace=True) 135 return dfr 136 137 @staticmethod 138 def to_json_ntv(value, name=None, typ=None): 139 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 140 141 *Parameters* 142 143 - **typ** : string (default None) - type of the NTV object, 144 - **name** : string (default None) - name of the NTV object 145 - **value** : DataFrame values''' 146 df2 = value.reset_index() 147 jsn = Ntv.obj([SeriesConnec.to_json_ntv(DataFrameConnec._unic(df2[col]))[0] 148 for col in df2.columns]).to_obj() 149 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 150 151 @staticmethod 152 def to_listidx(dtf): 153 ''' convert a DataFrame in categorical data (list of dict for each column 154 with keys : 'codec', 'name, 'keys' and length of the DataFrame)''' 155 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf)) 156 157 @staticmethod 158 def _unic(srs): 159 ''' return simple value if the Series contains a single value''' 160 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs 161 162 @staticmethod 163 def decode_ntv_tab(field): 164 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 165 166 *Returns* 167 168 - **tuple** : name, dtype, codec, parent, keys, coef, leng 169 name (None or string): name of the Field 170 dtype (None or string): type of data 171 codec (list): list of Field codec values 172 parent (None or int): Field parent or None 173 keys (None or list): Field keys 174 coef (None or int): coef if primary Field else None 175 leng (int): length of the Field 176 ''' 177 ntv = Ntv.obj(field) 178 typ = ntv.type_str if ntv.ntv_type else None 179 nam = ntv.name 180 if isinstance(ntv, NtvSingle): 181 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 182 val = [ntv_val.to_obj() for ntv_val in ntv] 183 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 184 return (nam, typ, val, None, None, None, len(ntv)) 185 186 ntvc = ntv[0] 187 leng = max(len(ind) for ind in ntv) 188 typc = ntvc.type_str if ntvc.ntv_type else None 189 valc = ntvc.to_obj(simpleval=True) 190 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 191 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 192 isinstance(ntv[2][0].val, int): 193 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 194 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 195 return (nam, typc, valc, ntv[1].val, None, None, leng) 196 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 197 leng = leng * ntv[1][0].val 198 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 199 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 200 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 201 return (nam, typ, val, None, None, None, len(ntv)) 202 203 204class SeriesConnec(NtvConnector): 205 '''NTV connector for pandas Series 206 207 Three static methods are included: 208 209 - to_idx: convert a Series in categorical data 210 - to_series: return a Series from Field data 211 - read_json: return a Series from a NTVvalue 212 ''' 213 clas_obj = 'Series' 214 clas_typ = 'field' 215 config = configparser.ConfigParser() 216 config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini')) 217 types = pd.DataFrame(json.loads(config['data']['type']), columns=json.loads(config['data']['column'])) 218 astype = json.loads(config['data']['astype']) 219 deftype = {val: key for key, val in astype.items()} 220 221 @staticmethod 222 def to_obj_ntv(ntv_value, **kwargs): 223 '''Generate a Series Object from a Ntv field object 224 225 *Parameters* 226 227 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 228 229 *parameters (kwargs)* 230 231 - **extkeys**: list (default None) - keys to use if not present in ntv_value 232 - **decode_str**: boolean (default False) - if True, string values are converted 233 in object values 234 - **index**: list (default None) - if present, add the index in Series 235 - **leng**: integer (default None) - leng of the Series (used with single codec value) 236 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 237 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 238 ''' 239 option = {'extkeys': None, 'decode_str': False, 'leng': None, 240 'annotated':False} | kwargs 241 if ntv_value is None: 242 return None 243 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 244 245 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 246 DataFrameConnec.decode_ntv_tab(ntv) 247 if parent and not option['extkeys']: 248 return None 249 if coef: 250 ntv_keys = NtvConnector.keysfromcoef( 251 coef, leng_field//coef, option['leng']) 252 elif option['extkeys'] and parent: 253 ntv_keys = NtvConnector.keysfromderkeys( 254 option['extkeys'], ntv_keys) 255 elif option['extkeys'] and not parent: 256 ntv_keys = option['extkeys'] 257 ntv_codec = Ntv.fast(Ntv.obj_ntv( 258 codec, typ=typ, single=len(codec) == 1)) 259 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **kwargs) 260 261 @staticmethod 262 def to_json_ntv(value, name=None, typ=None): 263 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 264 265 *Parameters* 266 267 - **typ** : string (default None) - type of the NTV object, 268 - **name** : string (default None) - name of the NTV object 269 - **value** : Series values''' 270 271 astype = SeriesConnec.astype 272 ntv_type_val = SeriesConnec._ntv_type_val 273 srs = value.astype(astype.get(value.dtype.name, value.dtype.name)) 274 sr_name = srs.name if srs.name else '' 275 ntv_name, name_type = Ntv.from_obj_name(sr_name)[:2] 276 277 if srs.dtype.name == 'category': 278 cdc = pd.Series(srs.cat.categories) 279 ntv_type, cat_value = ntv_type_val(name_type, cdc) 280 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 281 cod_value = list(srs.cat.codes) 282 coef = NtvConnector.encode_coef(cod_value) 283 ntv_value = [cat_value, [coef] if coef else cod_value] 284 ntv_type = 'json' 285 else: 286 ntv_type, ntv_value = ntv_type_val(name_type, srs) 287 if len(ntv_value) == 1: 288 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 289 SeriesConnec.clas_typ if not typ else typ) 290 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 291 SeriesConnec.clas_typ if not typ else typ) 292 293 @staticmethod 294 def to_idx(ser): 295 ''' convert a Series in categorical data 296 297 *return (dict)* 298 299 - 'codec': list of pandas categories 300 - 'name': name of the series 301 - 'keys': list of pandas codes 302 ''' 303 idx = ser.astype('category') 304 lis = list(idx.cat.categories) 305 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 306 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 307 for ts in lis] 308 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)} 309 310 @staticmethod 311 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 312 ''' return a pd.Series from Field data (codec, name, keys) 313 314 *Parameters* 315 316 - **ntv_codec**: Ntv object - codec value to convert in Series values 317 - **ntv_type**: string - default type to apply to convert in dtype 318 - **ntv_name**: string - name of the Series 319 320 *parameters (kwargs)* 321 322 - **index**: list (default None) - if present, add the index in Series 323 - **leng**: integer (default None) - leng of the Series (used with single codec value) 324 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 325 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 326 ''' 327 option = {'index': None, 'leng': None, 'alias': False, 328 'annotated': False} | kwargs 329 types = SeriesConnec.types.set_index('ntv_type') 330 astype = SeriesConnec.astype 331 332 ntv_type = ntv_codec.type_str 333 len_unique = option['leng'] if len( 334 ntv_codec) == 1 and option['leng'] else 1 335 pd_convert = ntv_type in types.index 336 337 dtype = types.loc[ntv_type]['dtype'] if pd_convert else 'object' 338 ntv_obj, pd_name, name_type = SeriesConnec._val_nam_typ( 339 ntv_codec, ntv_type, ntv_name, pd_convert, option['annotated']) 340 341 if ntv_keys: 342 if pd_convert and name_type != 'array': 343 categ = SeriesConnec.read_json(ntv_obj, dtype, ntv_type) 344 cat_type = categ.dtype.name 345 categories = categ.astype(astype.get(cat_type, cat_type)) 346 else: 347 categories = pd.Series(ntv_obj, dtype='object') 348 cat = pd.CategoricalDtype(categories=categories) 349 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 350 srs = pd.Series(data, name=pd_name, 351 index=option['index'], dtype='category') 352 else: 353 data = ntv_obj * len_unique 354 if pd_convert: 355 srs = SeriesConnec.read_json(data, dtype, ntv_type, pd_name) 356 else: 357 srs = pd.Series(data, name=pd_name, dtype=dtype) 358 359 if option['alias']: 360 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 361 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 362 363 @staticmethod 364 def read_json(data, dtype, ntv_type, pd_name=None): 365 '''return a Series from a NTVvalue''' 366 srs = pd.read_json(json.dumps(data), dtype=dtype, 367 typ='series') 368 if not pd_name is None: 369 srs = srs.rename(pd_name) 370 if ntv_type == 'date': 371 return pd.to_datetime(srs).dt.date 372 if ntv_type == 'time': 373 return pd.to_datetime(srs).dt.time 374 if ntv_type in ['point', 'polygon', 'line', 'geometry']: 375 return srs.apply(ShapelyConnec.to_geometry) 376 if ntv_type == 'geojson': 377 return srs.apply(ShapelyConnec.from_geojson) 378 return srs 379 380 @staticmethod 381 def _val_nam_typ(ntv_codec, ntv_type, ntv_name, pd_convert, annotated): 382 ''' return Series data from ntv data 383 384 *parameters* 385 386 - **ntv_codec**: Ntv object - codec value to convert in Series values 387 - **ntv_type**: string - default type to apply to convert in dtype 388 - **ntv_name**: string - name of the Series 389 - **pd_convert**: boolean - if True, use pandas json conversion 390 - **annotated**: boolean - if True, ntv_codec names are ignored 391 392 *return (tuple)* 393 394 - ntv_obj : list with ntv_codec json values converted to object values 395 - pd_name : string with the Serie name 396 - name_type : string - pandas types to be converted in 'json' Ntv-type 397 ''' 398 types = SeriesConnec.types.set_index('ntv_type') 399 if pd_convert: 400 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 401 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 402 pd_name = pd_name if pd_name else None 403 if name_type == 'array': 404 ntv_obj = ntv_codec.to_obj(format='obj', simpleval=True) 405 else: 406 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 407 def_type=ntv_codec.type_str, fast=True) 408 ntv_obj = ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 409 return (ntv_obj, pd_name, name_type) 410 ntv_obj = ntv_codec.to_obj(format='obj', simpleval=True, def_type=ntv_type) 411 return (ntv_obj, ntv_name + '::' + ntv_type, ntv_type) 412 413 @staticmethod 414 def _ntv_type_val(name_type, srs): 415 ''' convert a simple Series into NTV data (NTV type, NTV json-value). If name_type is None and 416 dtype is 'object', the NTV value is the srs values. 417 418 *Parameters* 419 420 - **name_type** : string - default NTV type to be used. If None, dtype is converted in NTV type, 421 - **srs** : Series to be converted.''' 422 types = SeriesConnec.types.set_index('name_type') 423 dtype = srs.dtype.name 424 if not name_type: 425 types_none = types.loc[None] 426 if dtype in types_none.dtype.values: 427 ntv_type = types_none.set_index('dtype').loc[dtype].ntv_type 428 else: 429 ntv_type = 'json' 430 return (ntv_type, json.loads(srs.to_json(orient='records', 431 date_format='iso', default_handler=str))) 432 ntv_type = name_type 433 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 434 return (ntv_type, srs.apply(ShapelyConnec.to_coord).to_list()) 435 if ntv_type == 'geojson': 436 return (ntv_type, srs.apply(ShapelyConnec.to_geojson).to_list()) 437 if ntv_type == 'date': 438 srs = srs.astype(str) 439 if dtype == 'object': 440 return (ntv_type, srs.to_list()) 441 return (ntv_type, json.loads(srs.to_json(orient='records', 442 date_format='iso', default_handler=str))) 443 444 """@staticmethod 445 def _encode_coef(lis): 446 '''Generate a repetition coefficient for periodic list''' 447 if len(lis) < 2: 448 return 0 449 coef = 1 450 while coef != len(lis): 451 if lis[coef-1] != lis[coef]: 452 break 453 coef += 1 454 if (not len(lis) % (coef * (max(lis) + 1)) and 455 lis == SeriesConnec._keys_from_coef(coef, max(lis) + 1, len(lis))): 456 return coef 457 return 0 458 459 @staticmethod 460 def _keys_from_coef(coef, period, leng=None): 461 ''' return a list of keys with periodic structure''' 462 if not leng: 463 leng = coef * period 464 return None if not (coef and period) else [(ind % (coef * period)) // coef 465 for ind in range(leng)]"""
38def to_json(pd_array, **kwargs): 39 ''' convert pandas Series or Dataframe to JSON text or JSON Value. 40 41 *parameters* 42 43 - **pd_array** : Series or Dataframe to convert 44 - **text** : boolean (default: False) - if True return a JSON text else a JSON value 45 - **header** : boolean (default: True) - if True the JSON data is included as 46 value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame 47 ''' 48 option = {'text': False, 'header': True} | kwargs 49 if isinstance(pd_array, pd.Series): 50 jsn = SeriesConnec.to_json_ntv(pd_array)[0] 51 head = ':field' 52 else: 53 jsn = DataFrameConnec.to_json_ntv(pd_array)[0] 54 head = ':tab' 55 if option['header']: 56 jsn = { head: jsn} 57 if option['text']: 58 return json.dumps(jsn) 59 return jsn
convert pandas Series or Dataframe to JSON text or JSON Value.
parameters
- pd_array : Series or Dataframe to convert
- text : boolean (default: False) - if True return a JSON text else a JSON value
- header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
61def read_json(js, **kwargs): 62 ''' convert JSON text or JSON Value to pandas Series or Dataframe. 63 64 *parameters* 65 66 - **js** : JSON text or JSON value to convert 67 - **extkeys**: list (default None) - keys to use if not present in ntv_value 68 - **decode_str**: boolean (default False) - if True, string values are converted 69 in object values 70 - **leng**: integer (default None) - leng of the Series (used with single codec value) 71 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 72 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 73 - **series**: boolean (default False) - used only without header. If True 74 JSON data is converted into Series else DataFrame 75 ''' 76 option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False, 77 'annotated':False, 'series':False} | kwargs 78 jso = json.loads(js) if isinstance(js, str) else js 79 ntv = Ntv.from_obj(jso) 80 if ntv.type_str == 'field': 81 return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option) 82 if ntv.type_str == 'tab': 83 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option) 84 if option['series']: 85 return SeriesConnec.to_obj_ntv(ntv, **option) 86 return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
convert JSON text or JSON Value to pandas Series or Dataframe.
parameters
- js : JSON text or JSON value to convert
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
- series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
88def as_def_type(pd_array): 89 '''convert a Series or DataFrame with default dtype''' 90 if isinstance(pd_array, pd.Series): 91 return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)) 92 return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
convert a Series or DataFrame with default dtype
94class DataFrameConnec(NtvConnector): 95 '''NTV connector for pandas DataFrame. 96 97 Two static methods are included: 98 99 - to_listidx: convert a DataFrame in categorical data 100 - decode_ntv_tab: Generate a tuple data from a NTVvalue 101 ''' 102 103 clas_obj = 'DataFrame' 104 clas_typ = 'tab' 105 106 @staticmethod 107 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 108 ''' convert json ntv_value into a DataFrame. 109 110 *Parameters* 111 112 - **index** : list (default None) - list of index values, 113 - **alias** : boolean (default False) - if True, alias dtype else default dtype 114 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 115 series = SeriesConnec.to_series 116 117 ntv = Ntv.fast(ntv_value) 118 lidx = [list(DataFrameConnec.decode_ntv_tab(ntvf)) 119 for ntvf in ntv] 120 leng = max([idx[6] for idx in lidx]) 121 option = kwargs | {'leng': leng} 122 no_keys = [] 123 for ind in range(len(lidx)): 124 no_keys.append(not lidx[ind][3] 125 and not lidx[ind][4] and not lidx[ind][5]) 126 NtvConnector.init_ntv_keys(ind, lidx, leng) 127 lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1], 128 single=len(lidx[ind][2]) == 1)) 129 list_series = [series(lidx[ind][2], lidx[ind][0], 130 None if no_keys[ind] else lidx[ind][4], **option) 131 for ind in range(len(lidx))] 132 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 133 if 'index' in dfr.columns: 134 dfr = dfr.set_index('index') 135 dfr.index.rename(None, inplace=True) 136 return dfr 137 138 @staticmethod 139 def to_json_ntv(value, name=None, typ=None): 140 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 141 142 *Parameters* 143 144 - **typ** : string (default None) - type of the NTV object, 145 - **name** : string (default None) - name of the NTV object 146 - **value** : DataFrame values''' 147 df2 = value.reset_index() 148 jsn = Ntv.obj([SeriesConnec.to_json_ntv(DataFrameConnec._unic(df2[col]))[0] 149 for col in df2.columns]).to_obj() 150 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ) 151 152 @staticmethod 153 def to_listidx(dtf): 154 ''' convert a DataFrame in categorical data (list of dict for each column 155 with keys : 'codec', 'name, 'keys' and length of the DataFrame)''' 156 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf)) 157 158 @staticmethod 159 def _unic(srs): 160 ''' return simple value if the Series contains a single value''' 161 return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs 162 163 @staticmethod 164 def decode_ntv_tab(field): 165 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 166 167 *Returns* 168 169 - **tuple** : name, dtype, codec, parent, keys, coef, leng 170 name (None or string): name of the Field 171 dtype (None or string): type of data 172 codec (list): list of Field codec values 173 parent (None or int): Field parent or None 174 keys (None or list): Field keys 175 coef (None or int): coef if primary Field else None 176 leng (int): length of the Field 177 ''' 178 ntv = Ntv.obj(field) 179 typ = ntv.type_str if ntv.ntv_type else None 180 nam = ntv.name 181 if isinstance(ntv, NtvSingle): 182 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 183 val = [ntv_val.to_obj() for ntv_val in ntv] 184 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 185 return (nam, typ, val, None, None, None, len(ntv)) 186 187 ntvc = ntv[0] 188 leng = max(len(ind) for ind in ntv) 189 typc = ntvc.type_str if ntvc.ntv_type else None 190 valc = ntvc.to_obj(simpleval=True) 191 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 192 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 193 isinstance(ntv[2][0].val, int): 194 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 195 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 196 return (nam, typc, valc, ntv[1].val, None, None, leng) 197 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 198 leng = leng * ntv[1][0].val 199 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 200 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 201 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 202 return (nam, typ, val, None, None, None, len(ntv))
NTV connector for pandas DataFrame.
Two static methods are included:
- to_listidx: convert a DataFrame in categorical data
- decode_ntv_tab: Generate a tuple data from a NTVvalue
106 @staticmethod 107 def to_obj_ntv(ntv_value, **kwargs): # reindex=True, decode_str=False): 108 ''' convert json ntv_value into a DataFrame. 109 110 *Parameters* 111 112 - **index** : list (default None) - list of index values, 113 - **alias** : boolean (default False) - if True, alias dtype else default dtype 114 - **annotated** : boolean (default False) - if True, NTV names are not included.''' 115 series = SeriesConnec.to_series 116 117 ntv = Ntv.fast(ntv_value) 118 lidx = [list(DataFrameConnec.decode_ntv_tab(ntvf)) 119 for ntvf in ntv] 120 leng = max([idx[6] for idx in lidx]) 121 option = kwargs | {'leng': leng} 122 no_keys = [] 123 for ind in range(len(lidx)): 124 no_keys.append(not lidx[ind][3] 125 and not lidx[ind][4] and not lidx[ind][5]) 126 NtvConnector.init_ntv_keys(ind, lidx, leng) 127 lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1], 128 single=len(lidx[ind][2]) == 1)) 129 list_series = [series(lidx[ind][2], lidx[ind][0], 130 None if no_keys[ind] else lidx[ind][4], **option) 131 for ind in range(len(lidx))] 132 dfr = pd.DataFrame({ser.name: ser for ser in list_series}) 133 if 'index' in dfr.columns: 134 dfr = dfr.set_index('index') 135 dfr.index.rename(None, inplace=True) 136 return dfr
convert json ntv_value into a DataFrame.
Parameters
- index : list (default None) - list of index values,
- alias : boolean (default False) - if True, alias dtype else default dtype
- annotated : boolean (default False) - if True, NTV names are not included.
138 @staticmethod 139 def to_json_ntv(value, name=None, typ=None): 140 ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type). 141 142 *Parameters* 143 144 - **typ** : string (default None) - type of the NTV object, 145 - **name** : string (default None) - name of the NTV object 146 - **value** : DataFrame values''' 147 df2 = value.reset_index() 148 jsn = Ntv.obj([SeriesConnec.to_json_ntv(DataFrameConnec._unic(df2[col]))[0] 149 for col in df2.columns]).to_obj() 150 return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : DataFrame values
152 @staticmethod 153 def to_listidx(dtf): 154 ''' convert a DataFrame in categorical data (list of dict for each column 155 with keys : 'codec', 'name, 'keys' and length of the DataFrame)''' 156 return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
convert a DataFrame in categorical data (list of dict for each column with keys : 'codec', 'name, 'keys' and length of the DataFrame)
163 @staticmethod 164 def decode_ntv_tab(field): 165 '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object) 166 167 *Returns* 168 169 - **tuple** : name, dtype, codec, parent, keys, coef, leng 170 name (None or string): name of the Field 171 dtype (None or string): type of data 172 codec (list): list of Field codec values 173 parent (None or int): Field parent or None 174 keys (None or list): Field keys 175 coef (None or int): coef if primary Field else None 176 leng (int): length of the Field 177 ''' 178 ntv = Ntv.obj(field) 179 typ = ntv.type_str if ntv.ntv_type else None 180 nam = ntv.name 181 if isinstance(ntv, NtvSingle): 182 return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1) 183 val = [ntv_val.to_obj() for ntv_val in ntv] 184 if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle): 185 return (nam, typ, val, None, None, None, len(ntv)) 186 187 ntvc = ntv[0] 188 leng = max(len(ind) for ind in ntv) 189 typc = ntvc.type_str if ntvc.ntv_type else None 190 valc = ntvc.to_obj(simpleval=True) 191 if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \ 192 isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \ 193 isinstance(ntv[2][0].val, int): 194 return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng) 195 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)): 196 return (nam, typc, valc, ntv[1].val, None, None, leng) 197 if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list): 198 leng = leng * ntv[1][0].val 199 return (nam, typc, valc, None, None, ntv[1][0].val, leng) 200 if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int): 201 return (nam, typc, valc, None, ntv[1].to_obj(), None, leng) 202 return (nam, typ, val, None, None, None, len(ntv))
Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
Returns
- tuple : name, dtype, codec, parent, keys, coef, leng name (None or string): name of the Field dtype (None or string): type of data codec (list): list of Field codec values parent (None or int): Field parent or None keys (None or list): Field keys coef (None or int): coef if primary Field else None leng (int): length of the Field
Inherited Members
- json_ntv.ntv_util.NtvConnector
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- init_ntv_keys
205class SeriesConnec(NtvConnector): 206 '''NTV connector for pandas Series 207 208 Three static methods are included: 209 210 - to_idx: convert a Series in categorical data 211 - to_series: return a Series from Field data 212 - read_json: return a Series from a NTVvalue 213 ''' 214 clas_obj = 'Series' 215 clas_typ = 'field' 216 config = configparser.ConfigParser() 217 config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini')) 218 types = pd.DataFrame(json.loads(config['data']['type']), columns=json.loads(config['data']['column'])) 219 astype = json.loads(config['data']['astype']) 220 deftype = {val: key for key, val in astype.items()} 221 222 @staticmethod 223 def to_obj_ntv(ntv_value, **kwargs): 224 '''Generate a Series Object from a Ntv field object 225 226 *Parameters* 227 228 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 229 230 *parameters (kwargs)* 231 232 - **extkeys**: list (default None) - keys to use if not present in ntv_value 233 - **decode_str**: boolean (default False) - if True, string values are converted 234 in object values 235 - **index**: list (default None) - if present, add the index in Series 236 - **leng**: integer (default None) - leng of the Series (used with single codec value) 237 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 238 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 239 ''' 240 option = {'extkeys': None, 'decode_str': False, 'leng': None, 241 'annotated':False} | kwargs 242 if ntv_value is None: 243 return None 244 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 245 246 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 247 DataFrameConnec.decode_ntv_tab(ntv) 248 if parent and not option['extkeys']: 249 return None 250 if coef: 251 ntv_keys = NtvConnector.keysfromcoef( 252 coef, leng_field//coef, option['leng']) 253 elif option['extkeys'] and parent: 254 ntv_keys = NtvConnector.keysfromderkeys( 255 option['extkeys'], ntv_keys) 256 elif option['extkeys'] and not parent: 257 ntv_keys = option['extkeys'] 258 ntv_codec = Ntv.fast(Ntv.obj_ntv( 259 codec, typ=typ, single=len(codec) == 1)) 260 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **kwargs) 261 262 @staticmethod 263 def to_json_ntv(value, name=None, typ=None): 264 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 265 266 *Parameters* 267 268 - **typ** : string (default None) - type of the NTV object, 269 - **name** : string (default None) - name of the NTV object 270 - **value** : Series values''' 271 272 astype = SeriesConnec.astype 273 ntv_type_val = SeriesConnec._ntv_type_val 274 srs = value.astype(astype.get(value.dtype.name, value.dtype.name)) 275 sr_name = srs.name if srs.name else '' 276 ntv_name, name_type = Ntv.from_obj_name(sr_name)[:2] 277 278 if srs.dtype.name == 'category': 279 cdc = pd.Series(srs.cat.categories) 280 ntv_type, cat_value = ntv_type_val(name_type, cdc) 281 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 282 cod_value = list(srs.cat.codes) 283 coef = NtvConnector.encode_coef(cod_value) 284 ntv_value = [cat_value, [coef] if coef else cod_value] 285 ntv_type = 'json' 286 else: 287 ntv_type, ntv_value = ntv_type_val(name_type, srs) 288 if len(ntv_value) == 1: 289 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 290 SeriesConnec.clas_typ if not typ else typ) 291 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 292 SeriesConnec.clas_typ if not typ else typ) 293 294 @staticmethod 295 def to_idx(ser): 296 ''' convert a Series in categorical data 297 298 *return (dict)* 299 300 - 'codec': list of pandas categories 301 - 'name': name of the series 302 - 'keys': list of pandas codes 303 ''' 304 idx = ser.astype('category') 305 lis = list(idx.cat.categories) 306 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 307 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 308 for ts in lis] 309 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)} 310 311 @staticmethod 312 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 313 ''' return a pd.Series from Field data (codec, name, keys) 314 315 *Parameters* 316 317 - **ntv_codec**: Ntv object - codec value to convert in Series values 318 - **ntv_type**: string - default type to apply to convert in dtype 319 - **ntv_name**: string - name of the Series 320 321 *parameters (kwargs)* 322 323 - **index**: list (default None) - if present, add the index in Series 324 - **leng**: integer (default None) - leng of the Series (used with single codec value) 325 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 326 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 327 ''' 328 option = {'index': None, 'leng': None, 'alias': False, 329 'annotated': False} | kwargs 330 types = SeriesConnec.types.set_index('ntv_type') 331 astype = SeriesConnec.astype 332 333 ntv_type = ntv_codec.type_str 334 len_unique = option['leng'] if len( 335 ntv_codec) == 1 and option['leng'] else 1 336 pd_convert = ntv_type in types.index 337 338 dtype = types.loc[ntv_type]['dtype'] if pd_convert else 'object' 339 ntv_obj, pd_name, name_type = SeriesConnec._val_nam_typ( 340 ntv_codec, ntv_type, ntv_name, pd_convert, option['annotated']) 341 342 if ntv_keys: 343 if pd_convert and name_type != 'array': 344 categ = SeriesConnec.read_json(ntv_obj, dtype, ntv_type) 345 cat_type = categ.dtype.name 346 categories = categ.astype(astype.get(cat_type, cat_type)) 347 else: 348 categories = pd.Series(ntv_obj, dtype='object') 349 cat = pd.CategoricalDtype(categories=categories) 350 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 351 srs = pd.Series(data, name=pd_name, 352 index=option['index'], dtype='category') 353 else: 354 data = ntv_obj * len_unique 355 if pd_convert: 356 srs = SeriesConnec.read_json(data, dtype, ntv_type, pd_name) 357 else: 358 srs = pd.Series(data, name=pd_name, dtype=dtype) 359 360 if option['alias']: 361 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 362 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name)) 363 364 @staticmethod 365 def read_json(data, dtype, ntv_type, pd_name=None): 366 '''return a Series from a NTVvalue''' 367 srs = pd.read_json(json.dumps(data), dtype=dtype, 368 typ='series') 369 if not pd_name is None: 370 srs = srs.rename(pd_name) 371 if ntv_type == 'date': 372 return pd.to_datetime(srs).dt.date 373 if ntv_type == 'time': 374 return pd.to_datetime(srs).dt.time 375 if ntv_type in ['point', 'polygon', 'line', 'geometry']: 376 return srs.apply(ShapelyConnec.to_geometry) 377 if ntv_type == 'geojson': 378 return srs.apply(ShapelyConnec.from_geojson) 379 return srs 380 381 @staticmethod 382 def _val_nam_typ(ntv_codec, ntv_type, ntv_name, pd_convert, annotated): 383 ''' return Series data from ntv data 384 385 *parameters* 386 387 - **ntv_codec**: Ntv object - codec value to convert in Series values 388 - **ntv_type**: string - default type to apply to convert in dtype 389 - **ntv_name**: string - name of the Series 390 - **pd_convert**: boolean - if True, use pandas json conversion 391 - **annotated**: boolean - if True, ntv_codec names are ignored 392 393 *return (tuple)* 394 395 - ntv_obj : list with ntv_codec json values converted to object values 396 - pd_name : string with the Serie name 397 - name_type : string - pandas types to be converted in 'json' Ntv-type 398 ''' 399 types = SeriesConnec.types.set_index('ntv_type') 400 if pd_convert: 401 name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else '' 402 pd_name = ntv_name + '::' + name_type if name_type else ntv_name 403 pd_name = pd_name if pd_name else None 404 if name_type == 'array': 405 ntv_obj = ntv_codec.to_obj(format='obj', simpleval=True) 406 else: 407 ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False, 408 def_type=ntv_codec.type_str, fast=True) 409 ntv_obj = ntv_obj if isinstance(ntv_obj, list) else [ntv_obj] 410 return (ntv_obj, pd_name, name_type) 411 ntv_obj = ntv_codec.to_obj(format='obj', simpleval=True, def_type=ntv_type) 412 return (ntv_obj, ntv_name + '::' + ntv_type, ntv_type) 413 414 @staticmethod 415 def _ntv_type_val(name_type, srs): 416 ''' convert a simple Series into NTV data (NTV type, NTV json-value). If name_type is None and 417 dtype is 'object', the NTV value is the srs values. 418 419 *Parameters* 420 421 - **name_type** : string - default NTV type to be used. If None, dtype is converted in NTV type, 422 - **srs** : Series to be converted.''' 423 types = SeriesConnec.types.set_index('name_type') 424 dtype = srs.dtype.name 425 if not name_type: 426 types_none = types.loc[None] 427 if dtype in types_none.dtype.values: 428 ntv_type = types_none.set_index('dtype').loc[dtype].ntv_type 429 else: 430 ntv_type = 'json' 431 return (ntv_type, json.loads(srs.to_json(orient='records', 432 date_format='iso', default_handler=str))) 433 ntv_type = name_type 434 if ntv_type in ['point', 'line', 'polygon', 'geometry']: 435 return (ntv_type, srs.apply(ShapelyConnec.to_coord).to_list()) 436 if ntv_type == 'geojson': 437 return (ntv_type, srs.apply(ShapelyConnec.to_geojson).to_list()) 438 if ntv_type == 'date': 439 srs = srs.astype(str) 440 if dtype == 'object': 441 return (ntv_type, srs.to_list()) 442 return (ntv_type, json.loads(srs.to_json(orient='records', 443 date_format='iso', default_handler=str))) 444 445 """@staticmethod 446 def _encode_coef(lis): 447 '''Generate a repetition coefficient for periodic list''' 448 if len(lis) < 2: 449 return 0 450 coef = 1 451 while coef != len(lis): 452 if lis[coef-1] != lis[coef]: 453 break 454 coef += 1 455 if (not len(lis) % (coef * (max(lis) + 1)) and 456 lis == SeriesConnec._keys_from_coef(coef, max(lis) + 1, len(lis))): 457 return coef 458 return 0 459 460 @staticmethod 461 def _keys_from_coef(coef, period, leng=None): 462 ''' return a list of keys with periodic structure''' 463 if not leng: 464 leng = coef * period 465 return None if not (coef and period) else [(ind % (coef * period)) // coef 466 for ind in range(leng)]"""
NTV connector for pandas Series
Three static methods are included:
- to_idx: convert a Series in categorical data
- to_series: return a Series from Field data
- read_json: return a Series from a NTVvalue
222 @staticmethod 223 def to_obj_ntv(ntv_value, **kwargs): 224 '''Generate a Series Object from a Ntv field object 225 226 *Parameters* 227 228 - **ntv_value**: Ntv object or Ntv value - value to convert in Series 229 230 *parameters (kwargs)* 231 232 - **extkeys**: list (default None) - keys to use if not present in ntv_value 233 - **decode_str**: boolean (default False) - if True, string values are converted 234 in object values 235 - **index**: list (default None) - if present, add the index in Series 236 - **leng**: integer (default None) - leng of the Series (used with single codec value) 237 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 238 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 239 ''' 240 option = {'extkeys': None, 'decode_str': False, 'leng': None, 241 'annotated':False} | kwargs 242 if ntv_value is None: 243 return None 244 ntv = Ntv.obj(ntv_value, decode_str=option['decode_str']) 245 246 ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \ 247 DataFrameConnec.decode_ntv_tab(ntv) 248 if parent and not option['extkeys']: 249 return None 250 if coef: 251 ntv_keys = NtvConnector.keysfromcoef( 252 coef, leng_field//coef, option['leng']) 253 elif option['extkeys'] and parent: 254 ntv_keys = NtvConnector.keysfromderkeys( 255 option['extkeys'], ntv_keys) 256 elif option['extkeys'] and not parent: 257 ntv_keys = option['extkeys'] 258 ntv_codec = Ntv.fast(Ntv.obj_ntv( 259 codec, typ=typ, single=len(codec) == 1)) 260 return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **kwargs)
Generate a Series Object from a Ntv field object
Parameters
- ntv_value: Ntv object or Ntv value - value to convert in Series
parameters (kwargs)
- extkeys: list (default None) - keys to use if not present in ntv_value
- decode_str: boolean (default False) - if True, string values are converted in object values
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
262 @staticmethod 263 def to_json_ntv(value, name=None, typ=None): 264 ''' convert a Series (value, name, type) into NTV json (json-value, name, type). 265 266 *Parameters* 267 268 - **typ** : string (default None) - type of the NTV object, 269 - **name** : string (default None) - name of the NTV object 270 - **value** : Series values''' 271 272 astype = SeriesConnec.astype 273 ntv_type_val = SeriesConnec._ntv_type_val 274 srs = value.astype(astype.get(value.dtype.name, value.dtype.name)) 275 sr_name = srs.name if srs.name else '' 276 ntv_name, name_type = Ntv.from_obj_name(sr_name)[:2] 277 278 if srs.dtype.name == 'category': 279 cdc = pd.Series(srs.cat.categories) 280 ntv_type, cat_value = ntv_type_val(name_type, cdc) 281 cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj() 282 cod_value = list(srs.cat.codes) 283 coef = NtvConnector.encode_coef(cod_value) 284 ntv_value = [cat_value, [coef] if coef else cod_value] 285 ntv_type = 'json' 286 else: 287 ntv_type, ntv_value = ntv_type_val(name_type, srs) 288 if len(ntv_value) == 1: 289 return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name, 290 SeriesConnec.clas_typ if not typ else typ) 291 return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name, 292 SeriesConnec.clas_typ if not typ else typ)
convert a Series (value, name, type) into NTV json (json-value, name, type).
Parameters
- typ : string (default None) - type of the NTV object,
- name : string (default None) - name of the NTV object
- value : Series values
294 @staticmethod 295 def to_idx(ser): 296 ''' convert a Series in categorical data 297 298 *return (dict)* 299 300 - 'codec': list of pandas categories 301 - 'name': name of the series 302 - 'keys': list of pandas codes 303 ''' 304 idx = ser.astype('category') 305 lis = list(idx.cat.categories) 306 if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp): 307 lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) 308 for ts in lis] 309 return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
convert a Series in categorical data
return (dict)
- 'codec': list of pandas categories
- 'name': name of the series
- 'keys': list of pandas codes
311 @staticmethod 312 def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs): 313 ''' return a pd.Series from Field data (codec, name, keys) 314 315 *Parameters* 316 317 - **ntv_codec**: Ntv object - codec value to convert in Series values 318 - **ntv_type**: string - default type to apply to convert in dtype 319 - **ntv_name**: string - name of the Series 320 321 *parameters (kwargs)* 322 323 - **index**: list (default None) - if present, add the index in Series 324 - **leng**: integer (default None) - leng of the Series (used with single codec value) 325 - **alias**: boolean (default False) - if True, convert dtype in alias dtype 326 - **annotated**: boolean (default False) - if True, ntv_codec names are ignored 327 ''' 328 option = {'index': None, 'leng': None, 'alias': False, 329 'annotated': False} | kwargs 330 types = SeriesConnec.types.set_index('ntv_type') 331 astype = SeriesConnec.astype 332 333 ntv_type = ntv_codec.type_str 334 len_unique = option['leng'] if len( 335 ntv_codec) == 1 and option['leng'] else 1 336 pd_convert = ntv_type in types.index 337 338 dtype = types.loc[ntv_type]['dtype'] if pd_convert else 'object' 339 ntv_obj, pd_name, name_type = SeriesConnec._val_nam_typ( 340 ntv_codec, ntv_type, ntv_name, pd_convert, option['annotated']) 341 342 if ntv_keys: 343 if pd_convert and name_type != 'array': 344 categ = SeriesConnec.read_json(ntv_obj, dtype, ntv_type) 345 cat_type = categ.dtype.name 346 categories = categ.astype(astype.get(cat_type, cat_type)) 347 else: 348 categories = pd.Series(ntv_obj, dtype='object') 349 cat = pd.CategoricalDtype(categories=categories) 350 data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat) 351 srs = pd.Series(data, name=pd_name, 352 index=option['index'], dtype='category') 353 else: 354 data = ntv_obj * len_unique 355 if pd_convert: 356 srs = SeriesConnec.read_json(data, dtype, ntv_type, pd_name) 357 else: 358 srs = pd.Series(data, name=pd_name, dtype=dtype) 359 360 if option['alias']: 361 return srs.astype(astype.get(srs.dtype.name, srs.dtype.name)) 362 return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
return a pd.Series from Field data (codec, name, keys)
Parameters
- ntv_codec: Ntv object - codec value to convert in Series values
- ntv_type: string - default type to apply to convert in dtype
- ntv_name: string - name of the Series
parameters (kwargs)
- index: list (default None) - if present, add the index in Series
- leng: integer (default None) - leng of the Series (used with single codec value)
- alias: boolean (default False) - if True, convert dtype in alias dtype
- annotated: boolean (default False) - if True, ntv_codec names are ignored
364 @staticmethod 365 def read_json(data, dtype, ntv_type, pd_name=None): 366 '''return a Series from a NTVvalue''' 367 srs = pd.read_json(json.dumps(data), dtype=dtype, 368 typ='series') 369 if not pd_name is None: 370 srs = srs.rename(pd_name) 371 if ntv_type == 'date': 372 return pd.to_datetime(srs).dt.date 373 if ntv_type == 'time': 374 return pd.to_datetime(srs).dt.time 375 if ntv_type in ['point', 'polygon', 'line', 'geometry']: 376 return srs.apply(ShapelyConnec.to_geometry) 377 if ntv_type == 'geojson': 378 return srs.apply(ShapelyConnec.from_geojson) 379 return srs
return a Series from a NTVvalue
Inherited Members
- json_ntv.ntv_util.NtvConnector
- castable
- dic_obj
- dic_type
- connector
- dic_connec
- cast
- uncast
- is_json_class
- is_json
- keysfromderkeys
- encode_coef
- keysfromcoef
- init_ntv_keys