ntv-pandas.ntv_pandas.pandas_ntv_connector

Created on Feb 27 2023

@author: Philippe@loco-labs.io

The pandas_ntv_connector module is part of the ntv-pandas.ntv_pandas package (specification document).

A NtvConnector is defined by:

  • clas_obj: str - define the class name of the object to convert
  • clas_typ: str - define the NTVtype of the converted object
  • to_obj_ntv: method - converter from JsonNTV to the object
  • to_json_ntv: method - converter from the object to JsonNTV

It contains :

  1# -*- coding: utf-8 -*-
  2"""
  3Created on Feb 27 2023
  4
  5@author: Philippe@loco-labs.io
  6
  7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package 
  8([specification document](
  9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)).
 10
 11A NtvConnector is defined by:
 12- clas_obj: str - define the class name of the object to convert
 13- clas_typ: str - define the NTVtype of the converted object
 14- to_obj_ntv: method - converter from JsonNTV to the object
 15- to_json_ntv: method - converter from the object to JsonNTV
 16
 17It contains :
 18
 19- functions `read_json` and `to_json` to convert JSON data and pandas entities
 20
 21- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class:
 22    - `DataFrameConnec`: 'tab'   connector
 23    - `SeriesConnec`:    'field' connector
 24    
 25"""
 26import datetime
 27import json
 28import configparser
 29from pathlib import Path
 30import pandas as pd
 31import numpy as np
 32
 33import ntv_pandas
 34from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle
 35from json_ntv.ntv_connector import ShapelyConnec
 36
 37def to_json(pd_array, **kwargs):
 38    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
 39    
 40    *parameters*
 41    
 42    - **pd_array** : Series or Dataframe to convert
 43    - **text** : boolean (default: False) - if True return a JSON text else a JSON value
 44    - **header** : boolean (default: True) - if True the JSON data is included as
 45    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
 46    ''' 
 47    option = {'text': False, 'header': True} | kwargs
 48    if isinstance(pd_array, pd.Series):
 49        jsn = SeriesConnec.to_json_ntv(pd_array)[0]
 50        head = ':field'
 51    else:
 52        jsn = DataFrameConnec.to_json_ntv(pd_array)[0]
 53        head = ':tab'
 54    if option['header']:      
 55        jsn = { head: jsn}
 56    if option['text']:
 57        return json.dumps(jsn)
 58    return jsn
 59    
 60def read_json(js, **kwargs):
 61    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
 62    
 63    *parameters*
 64    
 65    - **js** : JSON text or JSON value to convert
 66    - **extkeys**: list (default None) - keys to use if not present in ntv_value
 67    - **decode_str**: boolean (default False) - if True, string values are converted
 68    in object values
 69    - **leng**: integer (default None) - leng of the Series (used with single codec value)
 70    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
 71    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
 72    - **series**: boolean (default False) - used only without header. If True 
 73    JSON data is converted into Series else DataFrame
 74    ''' 
 75    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
 76              'annotated':False, 'series':False} | kwargs
 77    jso = json.loads(js) if isinstance(js, str) else js
 78    ntv = Ntv.from_obj(jso)
 79    if ntv.type_str == 'field':
 80        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
 81    if ntv.type_str == 'tab':
 82        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
 83    if option['series']:
 84        return SeriesConnec.to_obj_ntv(ntv, **option)
 85    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
 86
 87def as_def_type(pd_array):
 88    '''convert a Series or DataFrame with default dtype'''
 89    if isinstance(pd_array, pd.Series):
 90        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
 91    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
 92        
 93class DataFrameConnec(NtvConnector):
 94    '''NTV connector for pandas DataFrame.
 95    
 96    Two static methods are included:
 97    
 98    - to_listidx: convert a DataFrame in categorical data 
 99    - decode_ntv_tab: Generate a tuple data from a NTVvalue
100    '''
101
102    clas_obj = 'DataFrame'
103    clas_typ = 'tab'
104
105    @staticmethod
106    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
107        ''' convert json ntv_value into a DataFrame.
108
109        *Parameters*
110
111        - **index** : list (default None) - list of index values,
112        - **alias** : boolean (default False) - if True, alias dtype else default dtype
113        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
114        series = SeriesConnec.to_series
115
116        ntv = Ntv.fast(ntv_value)
117        lidx = [list(DataFrameConnec.decode_ntv_tab(ntvf))
118                for ntvf in ntv]
119        leng = max([idx[6] for idx in lidx])
120        option = kwargs | {'leng': leng}
121        no_keys = []
122        for ind in range(len(lidx)):
123            no_keys.append(not lidx[ind][3]
124                           and not lidx[ind][4] and not lidx[ind][5])
125            NtvConnector.init_ntv_keys(ind, lidx, leng)
126            lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1],
127                                                single=len(lidx[ind][2]) == 1))
128        list_series = [series(lidx[ind][2], lidx[ind][0],
129                              None if no_keys[ind] else lidx[ind][4], **option)
130                       for ind in range(len(lidx))]
131        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
132        if 'index' in dfr.columns:
133            dfr = dfr.set_index('index')
134            dfr.index.rename(None, inplace=True)
135        return dfr
136
137    @staticmethod
138    def to_json_ntv(value, name=None, typ=None):
139        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
140
141        *Parameters*
142
143        - **typ** : string (default None) - type of the NTV object,
144        - **name** : string (default None) - name of the NTV object
145        - **value** : DataFrame values'''
146        df2 = value.reset_index()
147        jsn = Ntv.obj([SeriesConnec.to_json_ntv(DataFrameConnec._unic(df2[col]))[0]
148                       for col in df2.columns]).to_obj()
149        return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
150
151    @staticmethod
152    def to_listidx(dtf):
153        ''' convert a DataFrame in categorical data (list of dict for each column
154        with keys : 'codec', 'name, 'keys' and length of the DataFrame)'''
155        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
156
157    @staticmethod
158    def _unic(srs):
159        ''' return simple value if the Series contains a single value'''
160        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
161
162    @staticmethod
163    def decode_ntv_tab(field):
164        '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
165
166        *Returns*
167
168        - **tuple** : name, dtype, codec, parent, keys, coef, leng
169            name (None or string): name of the Field
170            dtype (None or string): type of data
171            codec (list): list of Field codec values
172            parent (None or int): Field parent or None
173            keys (None or list): Field keys
174            coef (None or int): coef if primary Field else None
175            leng (int): length of the Field
176        '''
177        ntv = Ntv.obj(field)
178        typ = ntv.type_str if ntv.ntv_type else None
179        nam = ntv.name
180        if isinstance(ntv, NtvSingle):
181            return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1)
182        val = [ntv_val.to_obj() for ntv_val in ntv]
183        if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle):
184            return (nam, typ, val, None, None, None, len(ntv))
185
186        ntvc = ntv[0]
187        leng = max(len(ind) for ind in ntv)
188        typc = ntvc.type_str if ntvc.ntv_type else None
189        valc = ntvc.to_obj(simpleval=True)
190        if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \
191                isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \
192                isinstance(ntv[2][0].val, int):
193            return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng)
194        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)):
195            return (nam, typc, valc, ntv[1].val, None, None, leng)
196        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list):
197            leng = leng * ntv[1][0].val
198            return (nam, typc, valc, None, None, ntv[1][0].val, leng)
199        if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int):
200            return (nam, typc, valc, None, ntv[1].to_obj(), None, leng)
201        return (nam, typ, val, None, None, None, len(ntv))
202
203
204class SeriesConnec(NtvConnector):
205    '''NTV connector for pandas Series
206    
207    Three static methods are included:
208    
209    - to_idx: convert a Series in categorical data 
210    - to_series: return a Series from Field data
211    - read_json: return a Series from a NTVvalue
212    '''
213    clas_obj = 'Series'
214    clas_typ = 'field'
215    config = configparser.ConfigParser()
216    config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini'))
217    types = pd.DataFrame(json.loads(config['data']['type']), columns=json.loads(config['data']['column']))
218    astype = json.loads(config['data']['astype'])
219    deftype = {val: key for key, val in astype.items()}
220
221    @staticmethod
222    def to_obj_ntv(ntv_value, **kwargs):
223        '''Generate a Series Object from a Ntv field object
224
225        *Parameters*
226
227        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
228
229        *parameters (kwargs)*
230
231        - **extkeys**: list (default None) - keys to use if not present in ntv_value
232        - **decode_str**: boolean (default False) - if True, string values are converted
233        in object values
234        - **index**: list (default None) - if present, add the index in Series
235        - **leng**: integer (default None) - leng of the Series (used with single codec value)
236        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
237        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
238        '''
239        option = {'extkeys': None, 'decode_str': False, 'leng': None,
240                  'annotated':False} | kwargs
241        if ntv_value is None:
242            return None
243        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
244
245        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
246            DataFrameConnec.decode_ntv_tab(ntv)
247        if parent and not option['extkeys']:
248            return None
249        if coef:
250            ntv_keys = NtvConnector.keysfromcoef(
251                coef, leng_field//coef, option['leng'])
252        elif option['extkeys'] and parent:
253            ntv_keys = NtvConnector.keysfromderkeys(
254                option['extkeys'], ntv_keys)
255        elif option['extkeys'] and not parent:
256            ntv_keys = option['extkeys']
257        ntv_codec = Ntv.fast(Ntv.obj_ntv(
258            codec, typ=typ, single=len(codec) == 1))
259        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **kwargs)
260
261    @staticmethod
262    def to_json_ntv(value, name=None, typ=None):
263        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
264
265        *Parameters*
266
267        - **typ** : string (default None) - type of the NTV object,
268        - **name** : string (default None) - name of the NTV object
269        - **value** : Series values'''
270
271        astype = SeriesConnec.astype
272        ntv_type_val = SeriesConnec._ntv_type_val
273        srs = value.astype(astype.get(value.dtype.name, value.dtype.name))
274        sr_name = srs.name if srs.name else ''
275        ntv_name, name_type = Ntv.from_obj_name(sr_name)[:2]
276
277        if srs.dtype.name == 'category':
278            cdc = pd.Series(srs.cat.categories)
279            ntv_type, cat_value = ntv_type_val(name_type, cdc)
280            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
281            cod_value = list(srs.cat.codes)
282            coef = NtvConnector.encode_coef(cod_value)
283            ntv_value = [cat_value, [coef] if coef else cod_value]
284            ntv_type = 'json'
285        else:
286            ntv_type, ntv_value = ntv_type_val(name_type, srs)
287        if len(ntv_value) == 1:
288            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
289                    SeriesConnec.clas_typ if not typ else typ)
290        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
291                SeriesConnec.clas_typ if not typ else typ)
292
293    @staticmethod
294    def to_idx(ser):
295        ''' convert a Series in categorical data
296
297        *return (dict)*
298
299        - 'codec': list of pandas categories
300        - 'name': name of the series
301        - 'keys': list of pandas codes
302        '''
303        idx = ser.astype('category')
304        lis = list(idx.cat.categories)
305        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
306            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
307                   for ts in lis]
308        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
309
310    @staticmethod
311    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
312        ''' return a pd.Series from Field data (codec, name, keys)
313
314        *Parameters*
315
316        - **ntv_codec**: Ntv object - codec value to convert in Series values
317        - **ntv_type**: string - default type to apply to convert in dtype
318        - **ntv_name**: string - name of the Series
319
320        *parameters (kwargs)*
321
322        - **index**: list (default None) - if present, add the index in Series
323        - **leng**: integer (default None) - leng of the Series (used with single codec value)
324        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
325        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
326        '''
327        option = {'index': None, 'leng': None, 'alias': False,
328                  'annotated': False} | kwargs
329        types = SeriesConnec.types.set_index('ntv_type')
330        astype = SeriesConnec.astype
331
332        ntv_type = ntv_codec.type_str
333        len_unique = option['leng'] if len(
334            ntv_codec) == 1 and option['leng'] else 1
335        pd_convert = ntv_type in types.index
336
337        dtype = types.loc[ntv_type]['dtype'] if pd_convert else 'object'
338        ntv_obj, pd_name, name_type = SeriesConnec._val_nam_typ(
339            ntv_codec, ntv_type, ntv_name, pd_convert, option['annotated'])
340
341        if ntv_keys:
342            if pd_convert and name_type != 'array':
343                categ = SeriesConnec.read_json(ntv_obj, dtype, ntv_type)
344                cat_type = categ.dtype.name
345                categories = categ.astype(astype.get(cat_type, cat_type))
346            else:
347                categories = pd.Series(ntv_obj, dtype='object')
348            cat = pd.CategoricalDtype(categories=categories)
349            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
350            srs = pd.Series(data, name=pd_name,
351                            index=option['index'], dtype='category')
352        else:
353            data = ntv_obj * len_unique
354            if pd_convert:
355                srs = SeriesConnec.read_json(data, dtype, ntv_type, pd_name)
356            else:
357                srs = pd.Series(data, name=pd_name, dtype=dtype)
358        
359        if option['alias']:
360            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
361        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
362
363    @staticmethod 
364    def read_json(data, dtype, ntv_type, pd_name=None):
365        '''return a Series from a NTVvalue'''
366        srs = pd.read_json(json.dumps(data), dtype=dtype,
367                           typ='series')
368        if not pd_name is None:
369            srs = srs.rename(pd_name)
370        if ntv_type == 'date':
371            return pd.to_datetime(srs).dt.date
372        if ntv_type == 'time':
373            return pd.to_datetime(srs).dt.time
374        if ntv_type in ['point', 'polygon', 'line', 'geometry']:
375            return srs.apply(ShapelyConnec.to_geometry)
376        if ntv_type == 'geojson':
377            return srs.apply(ShapelyConnec.from_geojson)
378        return srs
379
380    @staticmethod
381    def _val_nam_typ(ntv_codec, ntv_type, ntv_name, pd_convert, annotated):
382        ''' return Series data from ntv data
383
384        *parameters*
385
386        - **ntv_codec**: Ntv object - codec value to convert in Series values
387        - **ntv_type**: string - default type to apply to convert in dtype
388        - **ntv_name**: string - name of the Series
389        - **pd_convert**: boolean - if True, use pandas json conversion
390        - **annotated**: boolean - if True, ntv_codec names are ignored
391
392        *return (tuple)*
393
394        - ntv_obj : list with ntv_codec json values converted to object values
395        - pd_name : string with the Serie name
396        - name_type : string - pandas types to be converted in 'json' Ntv-type
397        '''
398        types = SeriesConnec.types.set_index('ntv_type')
399        if pd_convert:
400            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
401            pd_name = ntv_name + '::' + name_type if name_type else ntv_name
402            pd_name = pd_name if pd_name else None
403            if name_type == 'array':
404                ntv_obj = ntv_codec.to_obj(format='obj', simpleval=True)
405            else:
406                ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
407                                              def_type=ntv_codec.type_str, fast=True)
408                ntv_obj = ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
409            return (ntv_obj, pd_name, name_type)
410        ntv_obj = ntv_codec.to_obj(format='obj', simpleval=True, def_type=ntv_type)
411        return (ntv_obj, ntv_name + '::' + ntv_type, ntv_type)
412
413    @staticmethod
414    def _ntv_type_val(name_type, srs):
415        ''' convert a simple Series into NTV data (NTV type, NTV json-value). If name_type is None and
416        dtype is 'object', the NTV value is the srs values.
417
418        *Parameters*
419
420        - **name_type** : string - default NTV type to be used. If None, dtype is converted in NTV type,
421        - **srs** : Series to be converted.'''
422        types = SeriesConnec.types.set_index('name_type')
423        dtype = srs.dtype.name
424        if not name_type:
425            types_none = types.loc[None]
426            if dtype in types_none.dtype.values:
427                ntv_type = types_none.set_index('dtype').loc[dtype].ntv_type
428            else:
429                ntv_type = 'json'
430            return (ntv_type, json.loads(srs.to_json(orient='records',
431                        date_format='iso', default_handler=str)))
432        ntv_type = name_type
433        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
434            return (ntv_type, srs.apply(ShapelyConnec.to_coord).to_list())
435        if ntv_type == 'geojson':
436            return (ntv_type, srs.apply(ShapelyConnec.to_geojson).to_list())
437        if ntv_type == 'date':
438            srs = srs.astype(str)
439        if dtype == 'object':
440            return (ntv_type, srs.to_list())
441        return (ntv_type, json.loads(srs.to_json(orient='records',
442                        date_format='iso', default_handler=str)))
443
444    """@staticmethod 
445    def _encode_coef(lis):
446        '''Generate a repetition coefficient for periodic list'''
447        if len(lis) < 2:
448            return 0
449        coef = 1
450        while coef != len(lis):
451            if lis[coef-1] != lis[coef]:
452                break
453            coef += 1
454        if (not len(lis) % (coef * (max(lis) + 1)) and 
455            lis == SeriesConnec._keys_from_coef(coef, max(lis) + 1, len(lis))):
456            return coef
457        return 0
458
459    @staticmethod 
460    def _keys_from_coef(coef, period, leng=None):
461        ''' return a list of keys with periodic structure'''
462        if not leng:
463            leng = coef * period
464        return None if not (coef and period) else [(ind % (coef * period)) // coef 
465                                                   for ind in range(leng)]"""
def to_json(pd_array, **kwargs):
38def to_json(pd_array, **kwargs):
39    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
40    
41    *parameters*
42    
43    - **pd_array** : Series or Dataframe to convert
44    - **text** : boolean (default: False) - if True return a JSON text else a JSON value
45    - **header** : boolean (default: True) - if True the JSON data is included as
46    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
47    ''' 
48    option = {'text': False, 'header': True} | kwargs
49    if isinstance(pd_array, pd.Series):
50        jsn = SeriesConnec.to_json_ntv(pd_array)[0]
51        head = ':field'
52    else:
53        jsn = DataFrameConnec.to_json_ntv(pd_array)[0]
54        head = ':tab'
55    if option['header']:      
56        jsn = { head: jsn}
57    if option['text']:
58        return json.dumps(jsn)
59    return jsn

convert pandas Series or Dataframe to JSON text or JSON Value.

parameters

  • pd_array : Series or Dataframe to convert
  • text : boolean (default: False) - if True return a JSON text else a JSON value
  • header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
def read_json(js, **kwargs):
61def read_json(js, **kwargs):
62    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
63    
64    *parameters*
65    
66    - **js** : JSON text or JSON value to convert
67    - **extkeys**: list (default None) - keys to use if not present in ntv_value
68    - **decode_str**: boolean (default False) - if True, string values are converted
69    in object values
70    - **leng**: integer (default None) - leng of the Series (used with single codec value)
71    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
72    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
73    - **series**: boolean (default False) - used only without header. If True 
74    JSON data is converted into Series else DataFrame
75    ''' 
76    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
77              'annotated':False, 'series':False} | kwargs
78    jso = json.loads(js) if isinstance(js, str) else js
79    ntv = Ntv.from_obj(jso)
80    if ntv.type_str == 'field':
81        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
82    if ntv.type_str == 'tab':
83        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
84    if option['series']:
85        return SeriesConnec.to_obj_ntv(ntv, **option)
86    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)

convert JSON text or JSON Value to pandas Series or Dataframe.

parameters

  • js : JSON text or JSON value to convert
  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
  • series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
def as_def_type(pd_array):
88def as_def_type(pd_array):
89    '''convert a Series or DataFrame with default dtype'''
90    if isinstance(pd_array, pd.Series):
91        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
92    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})

convert a Series or DataFrame with default dtype

class DataFrameConnec(json_ntv.ntv_util.NtvConnector):
 94class DataFrameConnec(NtvConnector):
 95    '''NTV connector for pandas DataFrame.
 96    
 97    Two static methods are included:
 98    
 99    - to_listidx: convert a DataFrame in categorical data 
100    - decode_ntv_tab: Generate a tuple data from a NTVvalue
101    '''
102
103    clas_obj = 'DataFrame'
104    clas_typ = 'tab'
105
106    @staticmethod
107    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
108        ''' convert json ntv_value into a DataFrame.
109
110        *Parameters*
111
112        - **index** : list (default None) - list of index values,
113        - **alias** : boolean (default False) - if True, alias dtype else default dtype
114        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
115        series = SeriesConnec.to_series
116
117        ntv = Ntv.fast(ntv_value)
118        lidx = [list(DataFrameConnec.decode_ntv_tab(ntvf))
119                for ntvf in ntv]
120        leng = max([idx[6] for idx in lidx])
121        option = kwargs | {'leng': leng}
122        no_keys = []
123        for ind in range(len(lidx)):
124            no_keys.append(not lidx[ind][3]
125                           and not lidx[ind][4] and not lidx[ind][5])
126            NtvConnector.init_ntv_keys(ind, lidx, leng)
127            lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1],
128                                                single=len(lidx[ind][2]) == 1))
129        list_series = [series(lidx[ind][2], lidx[ind][0],
130                              None if no_keys[ind] else lidx[ind][4], **option)
131                       for ind in range(len(lidx))]
132        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
133        if 'index' in dfr.columns:
134            dfr = dfr.set_index('index')
135            dfr.index.rename(None, inplace=True)
136        return dfr
137
138    @staticmethod
139    def to_json_ntv(value, name=None, typ=None):
140        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
141
142        *Parameters*
143
144        - **typ** : string (default None) - type of the NTV object,
145        - **name** : string (default None) - name of the NTV object
146        - **value** : DataFrame values'''
147        df2 = value.reset_index()
148        jsn = Ntv.obj([SeriesConnec.to_json_ntv(DataFrameConnec._unic(df2[col]))[0]
149                       for col in df2.columns]).to_obj()
150        return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
151
152    @staticmethod
153    def to_listidx(dtf):
154        ''' convert a DataFrame in categorical data (list of dict for each column
155        with keys : 'codec', 'name, 'keys' and length of the DataFrame)'''
156        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
157
158    @staticmethod
159    def _unic(srs):
160        ''' return simple value if the Series contains a single value'''
161        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
162
163    @staticmethod
164    def decode_ntv_tab(field):
165        '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
166
167        *Returns*
168
169        - **tuple** : name, dtype, codec, parent, keys, coef, leng
170            name (None or string): name of the Field
171            dtype (None or string): type of data
172            codec (list): list of Field codec values
173            parent (None or int): Field parent or None
174            keys (None or list): Field keys
175            coef (None or int): coef if primary Field else None
176            leng (int): length of the Field
177        '''
178        ntv = Ntv.obj(field)
179        typ = ntv.type_str if ntv.ntv_type else None
180        nam = ntv.name
181        if isinstance(ntv, NtvSingle):
182            return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1)
183        val = [ntv_val.to_obj() for ntv_val in ntv]
184        if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle):
185            return (nam, typ, val, None, None, None, len(ntv))
186
187        ntvc = ntv[0]
188        leng = max(len(ind) for ind in ntv)
189        typc = ntvc.type_str if ntvc.ntv_type else None
190        valc = ntvc.to_obj(simpleval=True)
191        if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \
192                isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \
193                isinstance(ntv[2][0].val, int):
194            return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng)
195        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)):
196            return (nam, typc, valc, ntv[1].val, None, None, leng)
197        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list):
198            leng = leng * ntv[1][0].val
199            return (nam, typc, valc, None, None, ntv[1][0].val, leng)
200        if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int):
201            return (nam, typc, valc, None, ntv[1].to_obj(), None, leng)
202        return (nam, typ, val, None, None, None, len(ntv))

NTV connector for pandas DataFrame.

Two static methods are included:

  • to_listidx: convert a DataFrame in categorical data
  • decode_ntv_tab: Generate a tuple data from a NTVvalue
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
106    @staticmethod
107    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
108        ''' convert json ntv_value into a DataFrame.
109
110        *Parameters*
111
112        - **index** : list (default None) - list of index values,
113        - **alias** : boolean (default False) - if True, alias dtype else default dtype
114        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
115        series = SeriesConnec.to_series
116
117        ntv = Ntv.fast(ntv_value)
118        lidx = [list(DataFrameConnec.decode_ntv_tab(ntvf))
119                for ntvf in ntv]
120        leng = max([idx[6] for idx in lidx])
121        option = kwargs | {'leng': leng}
122        no_keys = []
123        for ind in range(len(lidx)):
124            no_keys.append(not lidx[ind][3]
125                           and not lidx[ind][4] and not lidx[ind][5])
126            NtvConnector.init_ntv_keys(ind, lidx, leng)
127            lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1],
128                                                single=len(lidx[ind][2]) == 1))
129        list_series = [series(lidx[ind][2], lidx[ind][0],
130                              None if no_keys[ind] else lidx[ind][4], **option)
131                       for ind in range(len(lidx))]
132        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
133        if 'index' in dfr.columns:
134            dfr = dfr.set_index('index')
135            dfr.index.rename(None, inplace=True)
136        return dfr

convert json ntv_value into a DataFrame.

Parameters

  • index : list (default None) - list of index values,
  • alias : boolean (default False) - if True, alias dtype else default dtype
  • annotated : boolean (default False) - if True, NTV names are not included.
@staticmethod
def to_json_ntv(value, name=None, typ=None):
138    @staticmethod
139    def to_json_ntv(value, name=None, typ=None):
140        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
141
142        *Parameters*
143
144        - **typ** : string (default None) - type of the NTV object,
145        - **name** : string (default None) - name of the NTV object
146        - **value** : DataFrame values'''
147        df2 = value.reset_index()
148        jsn = Ntv.obj([SeriesConnec.to_json_ntv(DataFrameConnec._unic(df2[col]))[0]
149                       for col in df2.columns]).to_obj()
150        return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)

convert a DataFrame (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : DataFrame values
@staticmethod
def to_listidx(dtf):
152    @staticmethod
153    def to_listidx(dtf):
154        ''' convert a DataFrame in categorical data (list of dict for each column
155        with keys : 'codec', 'name, 'keys' and length of the DataFrame)'''
156        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

convert a DataFrame in categorical data (list of dict for each column with keys : 'codec', 'name, 'keys' and length of the DataFrame)

@staticmethod
def decode_ntv_tab(field):
163    @staticmethod
164    def decode_ntv_tab(field):
165        '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
166
167        *Returns*
168
169        - **tuple** : name, dtype, codec, parent, keys, coef, leng
170            name (None or string): name of the Field
171            dtype (None or string): type of data
172            codec (list): list of Field codec values
173            parent (None or int): Field parent or None
174            keys (None or list): Field keys
175            coef (None or int): coef if primary Field else None
176            leng (int): length of the Field
177        '''
178        ntv = Ntv.obj(field)
179        typ = ntv.type_str if ntv.ntv_type else None
180        nam = ntv.name
181        if isinstance(ntv, NtvSingle):
182            return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1)
183        val = [ntv_val.to_obj() for ntv_val in ntv]
184        if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle):
185            return (nam, typ, val, None, None, None, len(ntv))
186
187        ntvc = ntv[0]
188        leng = max(len(ind) for ind in ntv)
189        typc = ntvc.type_str if ntvc.ntv_type else None
190        valc = ntvc.to_obj(simpleval=True)
191        if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \
192                isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \
193                isinstance(ntv[2][0].val, int):
194            return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng)
195        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)):
196            return (nam, typc, valc, ntv[1].val, None, None, leng)
197        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list):
198            leng = leng * ntv[1][0].val
199            return (nam, typc, valc, None, None, ntv[1][0].val, leng)
200        if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int):
201            return (nam, typc, valc, None, ntv[1].to_obj(), None, leng)
202        return (nam, typ, val, None, None, None, len(ntv))

Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)

Returns

  • tuple : name, dtype, codec, parent, keys, coef, leng name (None or string): name of the Field dtype (None or string): type of data codec (list): list of Field codec values parent (None or int): Field parent or None keys (None or list): Field keys coef (None or int): coef if primary Field else None leng (int): length of the Field
Inherited Members
json_ntv.ntv_util.NtvConnector
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
init_ntv_keys
class SeriesConnec(json_ntv.ntv_util.NtvConnector):
205class SeriesConnec(NtvConnector):
206    '''NTV connector for pandas Series
207    
208    Three static methods are included:
209    
210    - to_idx: convert a Series in categorical data 
211    - to_series: return a Series from Field data
212    - read_json: return a Series from a NTVvalue
213    '''
214    clas_obj = 'Series'
215    clas_typ = 'field'
216    config = configparser.ConfigParser()
217    config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini'))
218    types = pd.DataFrame(json.loads(config['data']['type']), columns=json.loads(config['data']['column']))
219    astype = json.loads(config['data']['astype'])
220    deftype = {val: key for key, val in astype.items()}
221
222    @staticmethod
223    def to_obj_ntv(ntv_value, **kwargs):
224        '''Generate a Series Object from a Ntv field object
225
226        *Parameters*
227
228        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
229
230        *parameters (kwargs)*
231
232        - **extkeys**: list (default None) - keys to use if not present in ntv_value
233        - **decode_str**: boolean (default False) - if True, string values are converted
234        in object values
235        - **index**: list (default None) - if present, add the index in Series
236        - **leng**: integer (default None) - leng of the Series (used with single codec value)
237        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
238        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
239        '''
240        option = {'extkeys': None, 'decode_str': False, 'leng': None,
241                  'annotated':False} | kwargs
242        if ntv_value is None:
243            return None
244        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
245
246        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
247            DataFrameConnec.decode_ntv_tab(ntv)
248        if parent and not option['extkeys']:
249            return None
250        if coef:
251            ntv_keys = NtvConnector.keysfromcoef(
252                coef, leng_field//coef, option['leng'])
253        elif option['extkeys'] and parent:
254            ntv_keys = NtvConnector.keysfromderkeys(
255                option['extkeys'], ntv_keys)
256        elif option['extkeys'] and not parent:
257            ntv_keys = option['extkeys']
258        ntv_codec = Ntv.fast(Ntv.obj_ntv(
259            codec, typ=typ, single=len(codec) == 1))
260        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **kwargs)
261
262    @staticmethod
263    def to_json_ntv(value, name=None, typ=None):
264        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
265
266        *Parameters*
267
268        - **typ** : string (default None) - type of the NTV object,
269        - **name** : string (default None) - name of the NTV object
270        - **value** : Series values'''
271
272        astype = SeriesConnec.astype
273        ntv_type_val = SeriesConnec._ntv_type_val
274        srs = value.astype(astype.get(value.dtype.name, value.dtype.name))
275        sr_name = srs.name if srs.name else ''
276        ntv_name, name_type = Ntv.from_obj_name(sr_name)[:2]
277
278        if srs.dtype.name == 'category':
279            cdc = pd.Series(srs.cat.categories)
280            ntv_type, cat_value = ntv_type_val(name_type, cdc)
281            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
282            cod_value = list(srs.cat.codes)
283            coef = NtvConnector.encode_coef(cod_value)
284            ntv_value = [cat_value, [coef] if coef else cod_value]
285            ntv_type = 'json'
286        else:
287            ntv_type, ntv_value = ntv_type_val(name_type, srs)
288        if len(ntv_value) == 1:
289            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
290                    SeriesConnec.clas_typ if not typ else typ)
291        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
292                SeriesConnec.clas_typ if not typ else typ)
293
294    @staticmethod
295    def to_idx(ser):
296        ''' convert a Series in categorical data
297
298        *return (dict)*
299
300        - 'codec': list of pandas categories
301        - 'name': name of the series
302        - 'keys': list of pandas codes
303        '''
304        idx = ser.astype('category')
305        lis = list(idx.cat.categories)
306        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
307            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
308                   for ts in lis]
309        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
310
311    @staticmethod
312    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
313        ''' return a pd.Series from Field data (codec, name, keys)
314
315        *Parameters*
316
317        - **ntv_codec**: Ntv object - codec value to convert in Series values
318        - **ntv_type**: string - default type to apply to convert in dtype
319        - **ntv_name**: string - name of the Series
320
321        *parameters (kwargs)*
322
323        - **index**: list (default None) - if present, add the index in Series
324        - **leng**: integer (default None) - leng of the Series (used with single codec value)
325        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
326        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
327        '''
328        option = {'index': None, 'leng': None, 'alias': False,
329                  'annotated': False} | kwargs
330        types = SeriesConnec.types.set_index('ntv_type')
331        astype = SeriesConnec.astype
332
333        ntv_type = ntv_codec.type_str
334        len_unique = option['leng'] if len(
335            ntv_codec) == 1 and option['leng'] else 1
336        pd_convert = ntv_type in types.index
337
338        dtype = types.loc[ntv_type]['dtype'] if pd_convert else 'object'
339        ntv_obj, pd_name, name_type = SeriesConnec._val_nam_typ(
340            ntv_codec, ntv_type, ntv_name, pd_convert, option['annotated'])
341
342        if ntv_keys:
343            if pd_convert and name_type != 'array':
344                categ = SeriesConnec.read_json(ntv_obj, dtype, ntv_type)
345                cat_type = categ.dtype.name
346                categories = categ.astype(astype.get(cat_type, cat_type))
347            else:
348                categories = pd.Series(ntv_obj, dtype='object')
349            cat = pd.CategoricalDtype(categories=categories)
350            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
351            srs = pd.Series(data, name=pd_name,
352                            index=option['index'], dtype='category')
353        else:
354            data = ntv_obj * len_unique
355            if pd_convert:
356                srs = SeriesConnec.read_json(data, dtype, ntv_type, pd_name)
357            else:
358                srs = pd.Series(data, name=pd_name, dtype=dtype)
359        
360        if option['alias']:
361            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
362        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
363
364    @staticmethod 
365    def read_json(data, dtype, ntv_type, pd_name=None):
366        '''return a Series from a NTVvalue'''
367        srs = pd.read_json(json.dumps(data), dtype=dtype,
368                           typ='series')
369        if not pd_name is None:
370            srs = srs.rename(pd_name)
371        if ntv_type == 'date':
372            return pd.to_datetime(srs).dt.date
373        if ntv_type == 'time':
374            return pd.to_datetime(srs).dt.time
375        if ntv_type in ['point', 'polygon', 'line', 'geometry']:
376            return srs.apply(ShapelyConnec.to_geometry)
377        if ntv_type == 'geojson':
378            return srs.apply(ShapelyConnec.from_geojson)
379        return srs
380
381    @staticmethod
382    def _val_nam_typ(ntv_codec, ntv_type, ntv_name, pd_convert, annotated):
383        ''' return Series data from ntv data
384
385        *parameters*
386
387        - **ntv_codec**: Ntv object - codec value to convert in Series values
388        - **ntv_type**: string - default type to apply to convert in dtype
389        - **ntv_name**: string - name of the Series
390        - **pd_convert**: boolean - if True, use pandas json conversion
391        - **annotated**: boolean - if True, ntv_codec names are ignored
392
393        *return (tuple)*
394
395        - ntv_obj : list with ntv_codec json values converted to object values
396        - pd_name : string with the Serie name
397        - name_type : string - pandas types to be converted in 'json' Ntv-type
398        '''
399        types = SeriesConnec.types.set_index('ntv_type')
400        if pd_convert:
401            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
402            pd_name = ntv_name + '::' + name_type if name_type else ntv_name
403            pd_name = pd_name if pd_name else None
404            if name_type == 'array':
405                ntv_obj = ntv_codec.to_obj(format='obj', simpleval=True)
406            else:
407                ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
408                                              def_type=ntv_codec.type_str, fast=True)
409                ntv_obj = ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
410            return (ntv_obj, pd_name, name_type)
411        ntv_obj = ntv_codec.to_obj(format='obj', simpleval=True, def_type=ntv_type)
412        return (ntv_obj, ntv_name + '::' + ntv_type, ntv_type)
413
414    @staticmethod
415    def _ntv_type_val(name_type, srs):
416        ''' convert a simple Series into NTV data (NTV type, NTV json-value). If name_type is None and
417        dtype is 'object', the NTV value is the srs values.
418
419        *Parameters*
420
421        - **name_type** : string - default NTV type to be used. If None, dtype is converted in NTV type,
422        - **srs** : Series to be converted.'''
423        types = SeriesConnec.types.set_index('name_type')
424        dtype = srs.dtype.name
425        if not name_type:
426            types_none = types.loc[None]
427            if dtype in types_none.dtype.values:
428                ntv_type = types_none.set_index('dtype').loc[dtype].ntv_type
429            else:
430                ntv_type = 'json'
431            return (ntv_type, json.loads(srs.to_json(orient='records',
432                        date_format='iso', default_handler=str)))
433        ntv_type = name_type
434        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
435            return (ntv_type, srs.apply(ShapelyConnec.to_coord).to_list())
436        if ntv_type == 'geojson':
437            return (ntv_type, srs.apply(ShapelyConnec.to_geojson).to_list())
438        if ntv_type == 'date':
439            srs = srs.astype(str)
440        if dtype == 'object':
441            return (ntv_type, srs.to_list())
442        return (ntv_type, json.loads(srs.to_json(orient='records',
443                        date_format='iso', default_handler=str)))
444
445    """@staticmethod 
446    def _encode_coef(lis):
447        '''Generate a repetition coefficient for periodic list'''
448        if len(lis) < 2:
449            return 0
450        coef = 1
451        while coef != len(lis):
452            if lis[coef-1] != lis[coef]:
453                break
454            coef += 1
455        if (not len(lis) % (coef * (max(lis) + 1)) and 
456            lis == SeriesConnec._keys_from_coef(coef, max(lis) + 1, len(lis))):
457            return coef
458        return 0
459
460    @staticmethod 
461    def _keys_from_coef(coef, period, leng=None):
462        ''' return a list of keys with periodic structure'''
463        if not leng:
464            leng = coef * period
465        return None if not (coef and period) else [(ind % (coef * period)) // coef 
466                                                   for ind in range(leng)]"""

NTV connector for pandas Series

Three static methods are included:

  • to_idx: convert a Series in categorical data
  • to_series: return a Series from Field data
  • read_json: return a Series from a NTVvalue
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
222    @staticmethod
223    def to_obj_ntv(ntv_value, **kwargs):
224        '''Generate a Series Object from a Ntv field object
225
226        *Parameters*
227
228        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
229
230        *parameters (kwargs)*
231
232        - **extkeys**: list (default None) - keys to use if not present in ntv_value
233        - **decode_str**: boolean (default False) - if True, string values are converted
234        in object values
235        - **index**: list (default None) - if present, add the index in Series
236        - **leng**: integer (default None) - leng of the Series (used with single codec value)
237        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
238        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
239        '''
240        option = {'extkeys': None, 'decode_str': False, 'leng': None,
241                  'annotated':False} | kwargs
242        if ntv_value is None:
243            return None
244        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
245
246        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
247            DataFrameConnec.decode_ntv_tab(ntv)
248        if parent and not option['extkeys']:
249            return None
250        if coef:
251            ntv_keys = NtvConnector.keysfromcoef(
252                coef, leng_field//coef, option['leng'])
253        elif option['extkeys'] and parent:
254            ntv_keys = NtvConnector.keysfromderkeys(
255                option['extkeys'], ntv_keys)
256        elif option['extkeys'] and not parent:
257            ntv_keys = option['extkeys']
258        ntv_codec = Ntv.fast(Ntv.obj_ntv(
259            codec, typ=typ, single=len(codec) == 1))
260        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **kwargs)

Generate a Series Object from a Ntv field object

Parameters

  • ntv_value: Ntv object or Ntv value - value to convert in Series

parameters (kwargs)

  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def to_json_ntv(value, name=None, typ=None):
262    @staticmethod
263    def to_json_ntv(value, name=None, typ=None):
264        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
265
266        *Parameters*
267
268        - **typ** : string (default None) - type of the NTV object,
269        - **name** : string (default None) - name of the NTV object
270        - **value** : Series values'''
271
272        astype = SeriesConnec.astype
273        ntv_type_val = SeriesConnec._ntv_type_val
274        srs = value.astype(astype.get(value.dtype.name, value.dtype.name))
275        sr_name = srs.name if srs.name else ''
276        ntv_name, name_type = Ntv.from_obj_name(sr_name)[:2]
277
278        if srs.dtype.name == 'category':
279            cdc = pd.Series(srs.cat.categories)
280            ntv_type, cat_value = ntv_type_val(name_type, cdc)
281            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
282            cod_value = list(srs.cat.codes)
283            coef = NtvConnector.encode_coef(cod_value)
284            ntv_value = [cat_value, [coef] if coef else cod_value]
285            ntv_type = 'json'
286        else:
287            ntv_type, ntv_value = ntv_type_val(name_type, srs)
288        if len(ntv_value) == 1:
289            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
290                    SeriesConnec.clas_typ if not typ else typ)
291        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
292                SeriesConnec.clas_typ if not typ else typ)

convert a Series (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : Series values
@staticmethod
def to_idx(ser):
294    @staticmethod
295    def to_idx(ser):
296        ''' convert a Series in categorical data
297
298        *return (dict)*
299
300        - 'codec': list of pandas categories
301        - 'name': name of the series
302        - 'keys': list of pandas codes
303        '''
304        idx = ser.astype('category')
305        lis = list(idx.cat.categories)
306        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
307            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
308                   for ts in lis]
309        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}

convert a Series in categorical data

return (dict)

  • 'codec': list of pandas categories
  • 'name': name of the series
  • 'keys': list of pandas codes
@staticmethod
def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
311    @staticmethod
312    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
313        ''' return a pd.Series from Field data (codec, name, keys)
314
315        *Parameters*
316
317        - **ntv_codec**: Ntv object - codec value to convert in Series values
318        - **ntv_type**: string - default type to apply to convert in dtype
319        - **ntv_name**: string - name of the Series
320
321        *parameters (kwargs)*
322
323        - **index**: list (default None) - if present, add the index in Series
324        - **leng**: integer (default None) - leng of the Series (used with single codec value)
325        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
326        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
327        '''
328        option = {'index': None, 'leng': None, 'alias': False,
329                  'annotated': False} | kwargs
330        types = SeriesConnec.types.set_index('ntv_type')
331        astype = SeriesConnec.astype
332
333        ntv_type = ntv_codec.type_str
334        len_unique = option['leng'] if len(
335            ntv_codec) == 1 and option['leng'] else 1
336        pd_convert = ntv_type in types.index
337
338        dtype = types.loc[ntv_type]['dtype'] if pd_convert else 'object'
339        ntv_obj, pd_name, name_type = SeriesConnec._val_nam_typ(
340            ntv_codec, ntv_type, ntv_name, pd_convert, option['annotated'])
341
342        if ntv_keys:
343            if pd_convert and name_type != 'array':
344                categ = SeriesConnec.read_json(ntv_obj, dtype, ntv_type)
345                cat_type = categ.dtype.name
346                categories = categ.astype(astype.get(cat_type, cat_type))
347            else:
348                categories = pd.Series(ntv_obj, dtype='object')
349            cat = pd.CategoricalDtype(categories=categories)
350            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
351            srs = pd.Series(data, name=pd_name,
352                            index=option['index'], dtype='category')
353        else:
354            data = ntv_obj * len_unique
355            if pd_convert:
356                srs = SeriesConnec.read_json(data, dtype, ntv_type, pd_name)
357            else:
358                srs = pd.Series(data, name=pd_name, dtype=dtype)
359        
360        if option['alias']:
361            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
362        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))

return a pd.Series from Field data (codec, name, keys)

Parameters

  • ntv_codec: Ntv object - codec value to convert in Series values
  • ntv_type: string - default type to apply to convert in dtype
  • ntv_name: string - name of the Series

parameters (kwargs)

  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def read_json(data, dtype, ntv_type, pd_name=None):
364    @staticmethod 
365    def read_json(data, dtype, ntv_type, pd_name=None):
366        '''return a Series from a NTVvalue'''
367        srs = pd.read_json(json.dumps(data), dtype=dtype,
368                           typ='series')
369        if not pd_name is None:
370            srs = srs.rename(pd_name)
371        if ntv_type == 'date':
372            return pd.to_datetime(srs).dt.date
373        if ntv_type == 'time':
374            return pd.to_datetime(srs).dt.time
375        if ntv_type in ['point', 'polygon', 'line', 'geometry']:
376            return srs.apply(ShapelyConnec.to_geometry)
377        if ntv_type == 'geojson':
378            return srs.apply(ShapelyConnec.from_geojson)
379        return srs

return a Series from a NTVvalue

Inherited Members
json_ntv.ntv_util.NtvConnector
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
init_ntv_keys