ntv-pandas.ntv_pandas.pandas_ntv_connector

Created on Feb 27 2023

@author: Philippe@loco-labs.io

The pandas_ntv_connector module is part of the ntv-pandas.ntv_pandas package (specification document).

A NtvConnector is defined by:

  • clas_obj: str - define the class name of the object to convert
  • clas_typ: str - define the NTVtype of the converted object
  • to_obj_ntv: method - converter from JsonNTV to the object
  • to_json_ntv: method - converter from the object to JsonNTV

It contains :

  • functions read_json and to_json to convert JSON data and pandas entities

  • the child classes of NTV.json_ntv.ntv.NtvConnector abstract class:

  • an utility class with static methods : PdUtil

  1# -*- coding: utf-8 -*-
  2"""
  3Created on Feb 27 2023
  4
  5@author: Philippe@loco-labs.io
  6
  7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package 
  8([specification document](
  9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)).
 10
 11A NtvConnector is defined by:
 12- clas_obj: str - define the class name of the object to convert
 13- clas_typ: str - define the NTVtype of the converted object
 14- to_obj_ntv: method - converter from JsonNTV to the object
 15- to_json_ntv: method - converter from the object to JsonNTV
 16
 17It contains :
 18
 19- functions `read_json` and `to_json` to convert JSON data and pandas entities
 20
 21- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class:
 22    - `DataFrameConnec`: 'tab'   connector
 23    - `SeriesConnec`:    'field' connector
 24    
 25- an utility class with static methods : `PdUtil`    
 26"""
 27import datetime
 28import json
 29import configparser
 30from pathlib import Path
 31import pandas as pd
 32import numpy as np
 33
 34import ntv_pandas
 35from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle
 36from json_ntv.ntv_util import NtvUtil
 37from json_ntv.ntv_connector import ShapelyConnec
 38
 39def to_json(pd_array, **kwargs):
 40    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
 41    
 42    *parameters*
 43    
 44    - **pd_array** : Series or Dataframe to convert
 45    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
 46    - **header** : boolean (default: True) - if True the JSON data is included as
 47    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
 48    - **table** : boolean (default False) - if True return TableSchema format
 49    ''' 
 50    option = {'encoded': False, 'header': True, 'table': False} | kwargs
 51    option['header'] = False if option['table'] else option['header']
 52    if isinstance(pd_array, pd.Series):
 53        jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0]
 54        head = ':field'
 55    else:
 56        jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0]
 57        head = ':tab'
 58    if option['header']:      
 59        jsn = { head: jsn}
 60    if option['encoded']:
 61        return json.dumps(jsn)
 62    return jsn
 63    
 64def read_json(js, **kwargs):
 65    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
 66    
 67    *parameters*
 68    
 69    - **js** : JSON text or JSON value to convert
 70    - **extkeys**: list (default None) - keys to use if not present in ntv_value
 71    - **decode_str**: boolean (default False) - if True, string values are converted
 72    in object values
 73    - **leng**: integer (default None) - leng of the Series (used with single codec value)
 74    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
 75    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
 76    - **series**: boolean (default False) - used only without header. If True 
 77    JSON data is converted into Series else DataFrame
 78    ''' 
 79    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
 80              'annotated':False, 'series':False} | kwargs
 81    jso = json.loads(js) if isinstance(js, str) else js
 82    if 'schema' in jso:
 83        return PdUtil.to_obj_table(jso, **option)
 84    ntv = Ntv.from_obj(jso)
 85    if ntv.type_str == 'field':
 86        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
 87    if ntv.type_str == 'tab':
 88        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
 89    if option['series']:
 90        return SeriesConnec.to_obj_ntv(ntv, **option)
 91    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
 92
 93def as_def_type(pd_array):
 94    '''convert a Series or DataFrame with default dtype'''
 95    if isinstance(pd_array, pd.Series):
 96        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
 97    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
 98        
 99class DataFrameConnec(NtvConnector):
100    
101    '''NTV connector for pandas DataFrame.
102    
103    One static methods is included:
104    
105    - to_listidx: convert a DataFrame in categorical data 
106    '''
107
108    clas_obj = 'DataFrame'
109    clas_typ = 'tab'
110
111    @staticmethod
112    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
113        ''' convert json ntv_value into a DataFrame.
114
115        *Parameters*
116
117        - **index** : list (default None) - list of index values,
118        - **alias** : boolean (default False) - if True, alias dtype else default dtype
119        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
120        series = SeriesConnec.to_series
121
122        ntv = Ntv.fast(ntv_value)
123        lidx = [list(PdUtil.decode_ntv_tab(ntvf))
124                for ntvf in ntv]
125        leng = max([idx[6] for idx in lidx])
126        option = kwargs | {'leng': leng}
127        no_keys = []
128        for ind in range(len(lidx)):
129            no_keys.append(not lidx[ind][3]
130                           and not lidx[ind][4] and not lidx[ind][5])
131            NtvConnector.init_ntv_keys(ind, lidx, leng)
132            lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1],
133                                                single=len(lidx[ind][2]) == 1))
134        list_series = [series(lidx[ind][2], lidx[ind][0],
135                              None if no_keys[ind] else lidx[ind][4], **option)
136                       for ind in range(len(lidx))]
137        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
138        return PdUtil.pd_index(dfr)
139
140    @staticmethod
141    def to_json_ntv(value, name=None, typ=None, **kwargs):
142        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
143
144        *Parameters*
145
146        - **typ** : string (default None) - type of the NTV object,
147        - **name** : string (default None) - name of the NTV object
148        - **value** : DataFrame values
149        - **table** : boolean (default False) - if True return TableSchema format'''
150
151        table = kwargs.get('table', False)
152        if not table:
153            df2 = value.reset_index()
154            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
155                           for col in df2.columns]).to_obj()
156            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
157        df2 = pd.DataFrame({ NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
158            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
159            value[col]) for col in value.columns})
160        table_val = json.loads(df2.to_json(orient='table',
161                        date_format='iso', default_handler=str))
162        for name in value.columns:
163            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
164                                        value[name], table=True, no_val=True)
165            table_val['schema'] = PdUtil.table_schema(table_val['schema'], 
166                                                         ntv_name, ntv_type)
167        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
168          
169    @staticmethod
170    def to_listidx(dtf):
171        ''' convert a DataFrame in categorical data 
172        
173        *Return: tuple with:*
174
175        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
176        - **lenght** of the DataFrame'''
177        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
178
179
180class SeriesConnec(NtvConnector):
181    '''NTV connector for pandas Series
182    
183    Two static methods are included:
184    
185    - to_idx: convert a Series in categorical data 
186    - to_series: return a Series from Field data
187    '''
188    clas_obj = 'Series'
189    clas_typ = 'field'
190    config = configparser.ConfigParser()
191    config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini'))
192    types = pd.DataFrame(json.loads(config['data']['type']), 
193                         columns=json.loads(config['data']['column']))
194    astype = json.loads(config['data']['astype'])
195    deftype = {val: key for key, val in astype.items()}
196    config = configparser.ConfigParser()
197    config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini'))
198    table = pd.DataFrame(json.loads(config['data']['mapping']), 
199                         columns=json.loads(config['data']['column']))
200    typtab = pd.DataFrame(json.loads(config['data']['type']), 
201                         columns=json.loads(config['data']['col_type']))
202
203    @staticmethod
204    def to_obj_ntv(ntv_value, **kwargs):
205        '''Generate a Series Object from a Ntv field object
206
207        *Parameters*
208
209        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
210
211        *parameters (kwargs)*
212
213        - **extkeys**: list (default None) - keys to use if not present in ntv_value
214        - **decode_str**: boolean (default False) - if True, string values are converted
215        in object values
216        - **index**: list (default None) - if present, add the index in Series
217        - **leng**: integer (default None) - leng of the Series (used with single codec value)
218        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
219        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
220        '''
221        option = {'extkeys': None, 'decode_str': False, 'leng': None,
222                  'annotated':False} | kwargs
223        if ntv_value is None:
224            return None
225        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
226
227        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
228            PdUtil.decode_ntv_tab(ntv)
229        if parent and not option['extkeys']:
230            return None
231        if coef:
232            ntv_keys = NtvConnector.keysfromcoef(
233                coef, leng_field//coef, option['leng'])
234        elif option['extkeys'] and parent:
235            ntv_keys = NtvConnector.keysfromderkeys(
236                option['extkeys'], ntv_keys)
237        elif option['extkeys'] and not parent:
238            ntv_keys = option['extkeys']
239        ntv_codec = Ntv.fast(Ntv.obj_ntv(
240            codec, typ=typ, single=len(codec) == 1))
241        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
242
243    @staticmethod
244    def to_json_ntv(value, name=None, typ=None, **kwargs):
245        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
246
247        *Parameters*
248
249        - **typ** : string (default None) - type of the NTV object,
250        - **name** : string (default None) - name of the NTV object
251        - **value** : Series values
252        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
253        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
254
255        table = kwargs.get('table', False)
256        no_val = kwargs.get('no_val', False)
257        srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name))
258        sr_name = srs.name if srs.name else ''
259        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
260
261        if table:
262            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
263            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
264            if no_val:
265                return (ntv_name, ntv_type)
266            return (ntv_value, ntv_name, ntv_type)
267        if srs.dtype.name == 'category':
268            cdc = pd.Series(srs.cat.categories)
269            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
270            cat_value = PdUtil.ntv_val(ntv_type, cdc)
271            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
272            cod_value = list(srs.cat.codes)
273            coef = NtvConnector.encode_coef(cod_value)
274            ntv_value = [cat_value, [coef] if coef else cod_value]
275            ntv_type = 'json'
276        else:
277            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
278            ntv_value = PdUtil.ntv_val(ntv_type, srs)
279        if len(ntv_value) == 1:
280            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
281                    SeriesConnec.clas_typ if not typ else typ)
282        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
283                SeriesConnec.clas_typ if not typ else typ)
284
285    @staticmethod
286    def to_idx(ser):
287        ''' convert a Series in categorical data
288
289        *return (dict)*
290
291        { 'codec': 'list of pandas categories',
292          'name': 'name of the series',
293          'keys': 'list of pandas codes' }
294        '''
295        idx = ser.astype('category')
296        lis = list(idx.cat.categories)
297        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
298            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
299                   for ts in lis]
300        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
301
302    @staticmethod
303    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
304        ''' return a pd.Series from Field data (codec, name, keys)
305
306        *Parameters*
307
308        - **ntv_codec**: Ntv object - codec value to convert in Series values
309        - **ntv_type**: string - default type to apply to convert in dtype
310        - **ntv_name**: string - name of the Series
311
312        *parameters (kwargs)*
313
314        - **index**: list (default None) - if present, add the index in Series
315        - **leng**: integer (default None) - leng of the Series (used with single codec value)
316        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
317        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
318        '''
319        option = {'index': None, 'leng': None, 'alias': False,
320                  'annotated': False} | kwargs
321        types = SeriesConnec.types.set_index('ntv_type')
322        astype = SeriesConnec.astype
323        leng = option['leng']
324
325        ntv_type = ntv_codec.type_str
326        len_unique = leng if len(ntv_codec) == 1 and leng else 1
327        pd_convert = ntv_type in types.index
328        
329        pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert)
330        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 
331                                 option['annotated'], pd_convert)
332        if ntv_keys:
333            if pd_convert and name_type != 'array':
334                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
335                cat_type = categ.dtype.name
336                categories = categ.astype(astype.get(cat_type, cat_type))
337            else:
338                categories = pd.Series(ntv_obj, dtype='object')
339            cat = pd.CategoricalDtype(categories=categories)
340            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
341            srs = pd.Series(data, name=pd_name,
342                            index=option['index'], dtype='category')
343        else:
344            data = ntv_obj * len_unique
345            if pd_convert:
346                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
347            else:
348                srs = pd.Series(data, name=pd_name, dtype=dtype)
349        
350        if option['alias']:
351            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
352        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
353
354    @staticmethod 
355    def _from_json(data, dtype, ntv_type, pd_name=None):
356        '''return a Series from a Json data.
357
358        *Parameters*
359
360        - **data**: Json-value - data to convert in a Series
361        - **dtype**: string - dtype of the Series
362        - **ntv_type**: string - default type to apply to convert in dtype
363        - **pd_name**: string - name of the Series including ntv_type
364        
365        NTVvalue and a ntv_type'''
366        srs = pd.read_json(json.dumps(data), dtype=dtype,
367                           typ='series')
368        if not pd_name is None:
369            srs = srs.rename(pd_name)
370        return PdUtil.convert(ntv_type, srs, to_json=False)
371
372class PdUtil:
373    '''ntv-pandas utilities.
374    
375    This class includes static methods:
376    
377    Ntv and pandas
378    - **decode_ntv_tab**: Generate a tuple data from a NTVvalue
379    - **ntv_type**: return NTVtype from name_type and dtype of a Series
380    - **convert**: convert Series with external NTVtype
381    - **ntv_val**: convert a simple Series into NTV json-value
382    - **ntv_obj**: return a list of values to convert in a Series
383    - **pd_name**: return a tuple with the name of the Series and the type deduced from the name
384    - **pd_index**: return a DataFrame with index
385    - **unic**: return simple value if the Series contains a single value
386    
387    TableSchema
388    - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series
389    - **name_table**: return a list of non index field's names from a json Table
390    - **ntvtype_table**: return a list of non index field's ntv_type from a json Table
391    - **table_schema**: add 'format' and 'type' keys in a Json TableSchema
392    - **table_val**: convert a Series into TableSchema json-value
393    - **ntv_table**: return NTVtype from the TableSchema data   
394    '''
395    @staticmethod
396    def to_obj_table(jsn, **kwargs):
397        ''' convert json TableSchema data into a DataFrame or a Series'''
398        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
399        name = PdUtil.name_table(jsn['schema']['fields'])
400        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 
401                   for nam, ntvtyp in zip(name, ntv_type)]
402        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 
403                   for nam, ntvtyp in zip(name, ntv_type)]
404        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
405        dfr = PdUtil.pd_index(dfr)
406        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], 
407                        to_json=False) for ind, col in enumerate(dfr.columns)})
408        dfr = dfr.astype({col: pd_dtype[ind] for ind, col in enumerate(dfr.columns)})
409        dfr.columns = pd_name 
410        if len(dfr.columns) == 1:
411            return dfr[dfr.columns[0]]
412        return dfr
413
414    @staticmethod
415    def decode_ntv_tab(field):
416        '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
417
418        *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)*
419        
420        - name (None or string): name of the Field
421        - dtype (None or string): type of data
422        - codec (list): list of Field codec values
423        - parent (None or int): Field parent or None
424        - keys (None or list): Field keys
425        - coef (None or int): coef if primary Field else None
426        - leng (int): length of the Field
427        '''
428        ntv = Ntv.obj(field)
429        typ = ntv.type_str if ntv.ntv_type else None
430        nam = ntv.name
431        if isinstance(ntv, NtvSingle):
432            return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1)
433        val = [ntv_val.to_obj() for ntv_val in ntv]
434        if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle):
435            return (nam, typ, val, None, None, None, len(ntv))
436
437        ntvc = ntv[0]
438        leng = max(len(ind) for ind in ntv)
439        typc = ntvc.type_str if ntvc.ntv_type else None
440        valc = ntvc.to_obj(simpleval=True)
441        if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \
442                isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \
443                isinstance(ntv[2][0].val, int):
444            return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng)
445        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)):
446            return (nam, typc, valc, ntv[1].val, None, None, leng)
447        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list):
448            leng = leng * ntv[1][0].val
449            return (nam, typc, valc, None, None, ntv[1][0].val, leng)
450        if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int):
451            return (nam, typc, valc, None, ntv[1].to_obj(), None, leng)
452        return (nam, typ, val, None, None, None, len(ntv))
453
454    @staticmethod 
455    def name_table(fields):
456        '''return a list of non index field's names from a json Table'''
457        names = [field.get('name', None) for field in fields
458                if field.get('name', None) != 'index']
459        return [ None if name == 'values' else name for name in names]
460
461    @staticmethod 
462    def ntvtype_table(fields):
463        '''return a list of non index field's ntv_type from a json Table'''
464        return [PdUtil.ntv_table(field.get('format', 'default'),
465                field.get('type', None)) for field in fields
466                if field.get('name', None) != 'index']
467
468    @staticmethod 
469    def table_schema(schema, name, ntv_type):
470        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 
471        for the field defined by 'name' '''
472        ind = [field['name'] for field in schema['fields']].index(name)
473        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
474        if tabletype['format'] == 'default':
475            schema['fields'][ind].pop('format', None)        
476        else:    
477            schema['fields'][ind]['format'] = tabletype['format']
478        schema['fields'][ind]['type'] = tabletype['type']
479        schema['fields'][ind].pop('extDtype', None)        
480        return schema
481
482    @staticmethod
483    def table_val(ntv_type, ntv_name, srs):
484        '''convert a Series into TableSchema json-value.
485
486        *Parameters*
487
488        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
489        - **ntv_name**: string - name of the Series
490        - **srs** : Series to be converted.'''
491        srs = PdUtil.convert(ntv_type, srs)
492        srs.name = ntv_name
493        tab_val = json.loads(srs.to_json(orient='table',
494                        date_format='iso', default_handler=str))
495        name = 'values' if srs.name is None else srs.name 
496        tab_val['schema'] = PdUtil.table_schema(tab_val['schema'], name, ntv_type)
497        return tab_val    
498
499    @staticmethod
500    def convert(ntv_type, srs, to_json=True):
501        ''' convert Series with external NTVtype.
502
503        *Parameters*
504
505        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
506        - **srs** : Series to be converted.
507        - **to_json** : boolean (default True) - apply to json function'''
508        if to_json:
509            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
510                return srs.apply(ShapelyConnec.to_coord)
511            if ntv_type == 'geojson':
512                return srs.apply(ShapelyConnec.to_geojson)
513            if ntv_type == 'date':
514                return srs.astype(str)
515            return srs
516        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
517            return srs.apply(ShapelyConnec.to_geometry)
518        if ntv_type == 'geojson':
519            return srs.apply(ShapelyConnec.from_geojson)
520        if ntv_type == 'datetime':
521            return pd.to_datetime(srs)
522        if ntv_type == 'date':
523            return pd.to_datetime(srs).dt.date
524        if ntv_type == 'time':
525            return pd.to_datetime(srs).dt.time
526        return srs
527
528    @staticmethod
529    def ntv_type(name_type, dtype, table=False):
530        ''' return NTVtype from name_type and dtype of a Series .
531
532        *Parameters*
533
534        - **name_type** : string - type included in the Series name,
535        - **dtype** : string - dtype of the Series.
536        - **table** : boolean (default False) - True if Table Schema conversion 
537        '''
538        if not name_type:
539            types_none = SeriesConnec.types.set_index('name_type').loc[None]
540            if dtype in types_none.dtype.values:
541                return types_none.set_index('dtype').loc[dtype].ntv_type
542            if not table:
543                return 'json'
544            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
545            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type            
546        return name_type
547
548    @staticmethod
549    def ntv_val(ntv_type, srs):
550        ''' convert a simple Series into NTV json-value.
551
552        *Parameters*
553
554        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
555        - **srs** : Series to be *converted.'''
556        srs = PdUtil.convert(ntv_type, srs)
557        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
558            return srs.to_list()
559        if srs.dtype.name == 'object':
560            return srs.to_list()
561        return json.loads(srs.to_json(orient='records',
562                        date_format='iso', default_handler=str))
563
564    @staticmethod 
565    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
566        '''return a list of values to convert in a Series'''
567        if pd_convert:
568            if name_type == 'array':
569                return ntv_codec.to_obj(format='obj', simpleval=True)
570            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
571                                          def_type=ntv_codec.type_str, fast=True)
572            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
573        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
574
575    @staticmethod
576    def ntv_table(table_format, table_type):
577        ''' return NTVtype from the TableSchema data.
578
579        *Parameters*
580
581        - **table_format** : string - TableSchema format,
582        - **table_type** : string - TableSchema type'''
583        return SeriesConnec.table.set_index(['type', 'format']).loc[
584            (table_type, table_format)].values[0]
585
586    @staticmethod
587    def pd_index(dfr):
588        '''return a DataFrame with index'''
589        if 'index' in dfr.columns:
590            dfr = dfr.set_index('index')
591            dfr.index.rename(None, inplace=True)
592        return dfr
593    
594    @staticmethod 
595    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
596        '''return a tuple with the name of the Series, the type deduced from 
597        the name and the dtype'''
598        ntv_name = '' if ntv_name is None else ntv_name
599        typtab = SeriesConnec.typtab.set_index('ntv_type')
600        types = SeriesConnec.types.set_index('ntv_type')
601        if table and ntv_type.lower() in typtab.index:
602            name_type = typtab.loc[ntv_type.lower()]['name_type']
603            dtype = typtab.loc[ntv_type.lower()]['dtype']
604        elif pd_convert or table:
605            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
606            dtype = types.loc[ntv_type]['dtype']
607        else:
608            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
609        dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout
610        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
611        return (pd_name if pd_name else None, name_type, dtype)
612        
613    @staticmethod
614    def unic(srs):
615        ''' return simple value if the Series contains a single value'''
616        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
def to_json(pd_array, **kwargs):
40def to_json(pd_array, **kwargs):
41    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
42    
43    *parameters*
44    
45    - **pd_array** : Series or Dataframe to convert
46    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
47    - **header** : boolean (default: True) - if True the JSON data is included as
48    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
49    - **table** : boolean (default False) - if True return TableSchema format
50    ''' 
51    option = {'encoded': False, 'header': True, 'table': False} | kwargs
52    option['header'] = False if option['table'] else option['header']
53    if isinstance(pd_array, pd.Series):
54        jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0]
55        head = ':field'
56    else:
57        jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0]
58        head = ':tab'
59    if option['header']:      
60        jsn = { head: jsn}
61    if option['encoded']:
62        return json.dumps(jsn)
63    return jsn

convert pandas Series or Dataframe to JSON text or JSON Value.

parameters

  • pd_array : Series or Dataframe to convert
  • encoded : boolean (default: False) - if True return a JSON text else a JSON value
  • header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
  • table : boolean (default False) - if True return TableSchema format
def read_json(js, **kwargs):
65def read_json(js, **kwargs):
66    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
67    
68    *parameters*
69    
70    - **js** : JSON text or JSON value to convert
71    - **extkeys**: list (default None) - keys to use if not present in ntv_value
72    - **decode_str**: boolean (default False) - if True, string values are converted
73    in object values
74    - **leng**: integer (default None) - leng of the Series (used with single codec value)
75    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
76    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
77    - **series**: boolean (default False) - used only without header. If True 
78    JSON data is converted into Series else DataFrame
79    ''' 
80    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
81              'annotated':False, 'series':False} | kwargs
82    jso = json.loads(js) if isinstance(js, str) else js
83    if 'schema' in jso:
84        return PdUtil.to_obj_table(jso, **option)
85    ntv = Ntv.from_obj(jso)
86    if ntv.type_str == 'field':
87        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
88    if ntv.type_str == 'tab':
89        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
90    if option['series']:
91        return SeriesConnec.to_obj_ntv(ntv, **option)
92    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)

convert JSON text or JSON Value to pandas Series or Dataframe.

parameters

  • js : JSON text or JSON value to convert
  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
  • series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
def as_def_type(pd_array):
94def as_def_type(pd_array):
95    '''convert a Series or DataFrame with default dtype'''
96    if isinstance(pd_array, pd.Series):
97        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
98    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})

convert a Series or DataFrame with default dtype

class DataFrameConnec(json_ntv.ntv_util.NtvConnector):
100class DataFrameConnec(NtvConnector):
101    
102    '''NTV connector for pandas DataFrame.
103    
104    One static methods is included:
105    
106    - to_listidx: convert a DataFrame in categorical data 
107    '''
108
109    clas_obj = 'DataFrame'
110    clas_typ = 'tab'
111
112    @staticmethod
113    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
114        ''' convert json ntv_value into a DataFrame.
115
116        *Parameters*
117
118        - **index** : list (default None) - list of index values,
119        - **alias** : boolean (default False) - if True, alias dtype else default dtype
120        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
121        series = SeriesConnec.to_series
122
123        ntv = Ntv.fast(ntv_value)
124        lidx = [list(PdUtil.decode_ntv_tab(ntvf))
125                for ntvf in ntv]
126        leng = max([idx[6] for idx in lidx])
127        option = kwargs | {'leng': leng}
128        no_keys = []
129        for ind in range(len(lidx)):
130            no_keys.append(not lidx[ind][3]
131                           and not lidx[ind][4] and not lidx[ind][5])
132            NtvConnector.init_ntv_keys(ind, lidx, leng)
133            lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1],
134                                                single=len(lidx[ind][2]) == 1))
135        list_series = [series(lidx[ind][2], lidx[ind][0],
136                              None if no_keys[ind] else lidx[ind][4], **option)
137                       for ind in range(len(lidx))]
138        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
139        return PdUtil.pd_index(dfr)
140
141    @staticmethod
142    def to_json_ntv(value, name=None, typ=None, **kwargs):
143        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
144
145        *Parameters*
146
147        - **typ** : string (default None) - type of the NTV object,
148        - **name** : string (default None) - name of the NTV object
149        - **value** : DataFrame values
150        - **table** : boolean (default False) - if True return TableSchema format'''
151
152        table = kwargs.get('table', False)
153        if not table:
154            df2 = value.reset_index()
155            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
156                           for col in df2.columns]).to_obj()
157            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
158        df2 = pd.DataFrame({ NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
159            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
160            value[col]) for col in value.columns})
161        table_val = json.loads(df2.to_json(orient='table',
162                        date_format='iso', default_handler=str))
163        for name in value.columns:
164            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
165                                        value[name], table=True, no_val=True)
166            table_val['schema'] = PdUtil.table_schema(table_val['schema'], 
167                                                         ntv_name, ntv_type)
168        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
169          
170    @staticmethod
171    def to_listidx(dtf):
172        ''' convert a DataFrame in categorical data 
173        
174        *Return: tuple with:*
175
176        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
177        - **lenght** of the DataFrame'''
178        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

NTV connector for pandas DataFrame.

One static methods is included:

  • to_listidx: convert a DataFrame in categorical data
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
112    @staticmethod
113    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
114        ''' convert json ntv_value into a DataFrame.
115
116        *Parameters*
117
118        - **index** : list (default None) - list of index values,
119        - **alias** : boolean (default False) - if True, alias dtype else default dtype
120        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
121        series = SeriesConnec.to_series
122
123        ntv = Ntv.fast(ntv_value)
124        lidx = [list(PdUtil.decode_ntv_tab(ntvf))
125                for ntvf in ntv]
126        leng = max([idx[6] for idx in lidx])
127        option = kwargs | {'leng': leng}
128        no_keys = []
129        for ind in range(len(lidx)):
130            no_keys.append(not lidx[ind][3]
131                           and not lidx[ind][4] and not lidx[ind][5])
132            NtvConnector.init_ntv_keys(ind, lidx, leng)
133            lidx[ind][2] = Ntv.fast(Ntv.obj_ntv(lidx[ind][2], typ=lidx[ind][1],
134                                                single=len(lidx[ind][2]) == 1))
135        list_series = [series(lidx[ind][2], lidx[ind][0],
136                              None if no_keys[ind] else lidx[ind][4], **option)
137                       for ind in range(len(lidx))]
138        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
139        return PdUtil.pd_index(dfr)

convert json ntv_value into a DataFrame.

Parameters

  • index : list (default None) - list of index values,
  • alias : boolean (default False) - if True, alias dtype else default dtype
  • annotated : boolean (default False) - if True, NTV names are not included.
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
141    @staticmethod
142    def to_json_ntv(value, name=None, typ=None, **kwargs):
143        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
144
145        *Parameters*
146
147        - **typ** : string (default None) - type of the NTV object,
148        - **name** : string (default None) - name of the NTV object
149        - **value** : DataFrame values
150        - **table** : boolean (default False) - if True return TableSchema format'''
151
152        table = kwargs.get('table', False)
153        if not table:
154            df2 = value.reset_index()
155            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
156                           for col in df2.columns]).to_obj()
157            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
158        df2 = pd.DataFrame({ NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
159            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
160            value[col]) for col in value.columns})
161        table_val = json.loads(df2.to_json(orient='table',
162                        date_format='iso', default_handler=str))
163        for name in value.columns:
164            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
165                                        value[name], table=True, no_val=True)
166            table_val['schema'] = PdUtil.table_schema(table_val['schema'], 
167                                                         ntv_name, ntv_type)
168        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)

convert a DataFrame (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : DataFrame values
  • table : boolean (default False) - if True return TableSchema format
@staticmethod
def to_listidx(dtf):
170    @staticmethod
171    def to_listidx(dtf):
172        ''' convert a DataFrame in categorical data 
173        
174        *Return: tuple with:*
175
176        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
177        - **lenght** of the DataFrame'''
178        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

convert a DataFrame in categorical data

Return: tuple with:

  • list of dict (keys : 'codec', 'name, 'keys') for each column
  • lenght of the DataFrame
Inherited Members
json_ntv.ntv_util.NtvConnector
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
init_ntv_keys
class SeriesConnec(json_ntv.ntv_util.NtvConnector):
181class SeriesConnec(NtvConnector):
182    '''NTV connector for pandas Series
183    
184    Two static methods are included:
185    
186    - to_idx: convert a Series in categorical data 
187    - to_series: return a Series from Field data
188    '''
189    clas_obj = 'Series'
190    clas_typ = 'field'
191    config = configparser.ConfigParser()
192    config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini'))
193    types = pd.DataFrame(json.loads(config['data']['type']), 
194                         columns=json.loads(config['data']['column']))
195    astype = json.loads(config['data']['astype'])
196    deftype = {val: key for key, val in astype.items()}
197    config = configparser.ConfigParser()
198    config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini'))
199    table = pd.DataFrame(json.loads(config['data']['mapping']), 
200                         columns=json.loads(config['data']['column']))
201    typtab = pd.DataFrame(json.loads(config['data']['type']), 
202                         columns=json.loads(config['data']['col_type']))
203
204    @staticmethod
205    def to_obj_ntv(ntv_value, **kwargs):
206        '''Generate a Series Object from a Ntv field object
207
208        *Parameters*
209
210        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
211
212        *parameters (kwargs)*
213
214        - **extkeys**: list (default None) - keys to use if not present in ntv_value
215        - **decode_str**: boolean (default False) - if True, string values are converted
216        in object values
217        - **index**: list (default None) - if present, add the index in Series
218        - **leng**: integer (default None) - leng of the Series (used with single codec value)
219        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
220        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
221        '''
222        option = {'extkeys': None, 'decode_str': False, 'leng': None,
223                  'annotated':False} | kwargs
224        if ntv_value is None:
225            return None
226        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
227
228        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
229            PdUtil.decode_ntv_tab(ntv)
230        if parent and not option['extkeys']:
231            return None
232        if coef:
233            ntv_keys = NtvConnector.keysfromcoef(
234                coef, leng_field//coef, option['leng'])
235        elif option['extkeys'] and parent:
236            ntv_keys = NtvConnector.keysfromderkeys(
237                option['extkeys'], ntv_keys)
238        elif option['extkeys'] and not parent:
239            ntv_keys = option['extkeys']
240        ntv_codec = Ntv.fast(Ntv.obj_ntv(
241            codec, typ=typ, single=len(codec) == 1))
242        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
243
244    @staticmethod
245    def to_json_ntv(value, name=None, typ=None, **kwargs):
246        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
247
248        *Parameters*
249
250        - **typ** : string (default None) - type of the NTV object,
251        - **name** : string (default None) - name of the NTV object
252        - **value** : Series values
253        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
254        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
255
256        table = kwargs.get('table', False)
257        no_val = kwargs.get('no_val', False)
258        srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name))
259        sr_name = srs.name if srs.name else ''
260        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
261
262        if table:
263            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
264            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
265            if no_val:
266                return (ntv_name, ntv_type)
267            return (ntv_value, ntv_name, ntv_type)
268        if srs.dtype.name == 'category':
269            cdc = pd.Series(srs.cat.categories)
270            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
271            cat_value = PdUtil.ntv_val(ntv_type, cdc)
272            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
273            cod_value = list(srs.cat.codes)
274            coef = NtvConnector.encode_coef(cod_value)
275            ntv_value = [cat_value, [coef] if coef else cod_value]
276            ntv_type = 'json'
277        else:
278            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
279            ntv_value = PdUtil.ntv_val(ntv_type, srs)
280        if len(ntv_value) == 1:
281            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
282                    SeriesConnec.clas_typ if not typ else typ)
283        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
284                SeriesConnec.clas_typ if not typ else typ)
285
286    @staticmethod
287    def to_idx(ser):
288        ''' convert a Series in categorical data
289
290        *return (dict)*
291
292        { 'codec': 'list of pandas categories',
293          'name': 'name of the series',
294          'keys': 'list of pandas codes' }
295        '''
296        idx = ser.astype('category')
297        lis = list(idx.cat.categories)
298        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
299            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
300                   for ts in lis]
301        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
302
303    @staticmethod
304    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
305        ''' return a pd.Series from Field data (codec, name, keys)
306
307        *Parameters*
308
309        - **ntv_codec**: Ntv object - codec value to convert in Series values
310        - **ntv_type**: string - default type to apply to convert in dtype
311        - **ntv_name**: string - name of the Series
312
313        *parameters (kwargs)*
314
315        - **index**: list (default None) - if present, add the index in Series
316        - **leng**: integer (default None) - leng of the Series (used with single codec value)
317        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
318        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
319        '''
320        option = {'index': None, 'leng': None, 'alias': False,
321                  'annotated': False} | kwargs
322        types = SeriesConnec.types.set_index('ntv_type')
323        astype = SeriesConnec.astype
324        leng = option['leng']
325
326        ntv_type = ntv_codec.type_str
327        len_unique = leng if len(ntv_codec) == 1 and leng else 1
328        pd_convert = ntv_type in types.index
329        
330        pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert)
331        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 
332                                 option['annotated'], pd_convert)
333        if ntv_keys:
334            if pd_convert and name_type != 'array':
335                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
336                cat_type = categ.dtype.name
337                categories = categ.astype(astype.get(cat_type, cat_type))
338            else:
339                categories = pd.Series(ntv_obj, dtype='object')
340            cat = pd.CategoricalDtype(categories=categories)
341            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
342            srs = pd.Series(data, name=pd_name,
343                            index=option['index'], dtype='category')
344        else:
345            data = ntv_obj * len_unique
346            if pd_convert:
347                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
348            else:
349                srs = pd.Series(data, name=pd_name, dtype=dtype)
350        
351        if option['alias']:
352            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
353        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
354
355    @staticmethod 
356    def _from_json(data, dtype, ntv_type, pd_name=None):
357        '''return a Series from a Json data.
358
359        *Parameters*
360
361        - **data**: Json-value - data to convert in a Series
362        - **dtype**: string - dtype of the Series
363        - **ntv_type**: string - default type to apply to convert in dtype
364        - **pd_name**: string - name of the Series including ntv_type
365        
366        NTVvalue and a ntv_type'''
367        srs = pd.read_json(json.dumps(data), dtype=dtype,
368                           typ='series')
369        if not pd_name is None:
370            srs = srs.rename(pd_name)
371        return PdUtil.convert(ntv_type, srs, to_json=False)

NTV connector for pandas Series

Two static methods are included:

  • to_idx: convert a Series in categorical data
  • to_series: return a Series from Field data
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
204    @staticmethod
205    def to_obj_ntv(ntv_value, **kwargs):
206        '''Generate a Series Object from a Ntv field object
207
208        *Parameters*
209
210        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
211
212        *parameters (kwargs)*
213
214        - **extkeys**: list (default None) - keys to use if not present in ntv_value
215        - **decode_str**: boolean (default False) - if True, string values are converted
216        in object values
217        - **index**: list (default None) - if present, add the index in Series
218        - **leng**: integer (default None) - leng of the Series (used with single codec value)
219        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
220        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
221        '''
222        option = {'extkeys': None, 'decode_str': False, 'leng': None,
223                  'annotated':False} | kwargs
224        if ntv_value is None:
225            return None
226        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
227
228        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
229            PdUtil.decode_ntv_tab(ntv)
230        if parent and not option['extkeys']:
231            return None
232        if coef:
233            ntv_keys = NtvConnector.keysfromcoef(
234                coef, leng_field//coef, option['leng'])
235        elif option['extkeys'] and parent:
236            ntv_keys = NtvConnector.keysfromderkeys(
237                option['extkeys'], ntv_keys)
238        elif option['extkeys'] and not parent:
239            ntv_keys = option['extkeys']
240        ntv_codec = Ntv.fast(Ntv.obj_ntv(
241            codec, typ=typ, single=len(codec) == 1))
242        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)

Generate a Series Object from a Ntv field object

Parameters

  • ntv_value: Ntv object or Ntv value - value to convert in Series

parameters (kwargs)

  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
244    @staticmethod
245    def to_json_ntv(value, name=None, typ=None, **kwargs):
246        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
247
248        *Parameters*
249
250        - **typ** : string (default None) - type of the NTV object,
251        - **name** : string (default None) - name of the NTV object
252        - **value** : Series values
253        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
254        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
255
256        table = kwargs.get('table', False)
257        no_val = kwargs.get('no_val', False)
258        srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name))
259        sr_name = srs.name if srs.name else ''
260        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
261
262        if table:
263            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
264            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
265            if no_val:
266                return (ntv_name, ntv_type)
267            return (ntv_value, ntv_name, ntv_type)
268        if srs.dtype.name == 'category':
269            cdc = pd.Series(srs.cat.categories)
270            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
271            cat_value = PdUtil.ntv_val(ntv_type, cdc)
272            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
273            cod_value = list(srs.cat.codes)
274            coef = NtvConnector.encode_coef(cod_value)
275            ntv_value = [cat_value, [coef] if coef else cod_value]
276            ntv_type = 'json'
277        else:
278            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
279            ntv_value = PdUtil.ntv_val(ntv_type, srs)
280        if len(ntv_value) == 1:
281            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
282                    SeriesConnec.clas_typ if not typ else typ)
283        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
284                SeriesConnec.clas_typ if not typ else typ)

convert a Series (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : Series values
  • table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
  • no_val : boolean (default False) - if True return (ntv_name, ntv_type)
@staticmethod
def to_idx(ser):
286    @staticmethod
287    def to_idx(ser):
288        ''' convert a Series in categorical data
289
290        *return (dict)*
291
292        { 'codec': 'list of pandas categories',
293          'name': 'name of the series',
294          'keys': 'list of pandas codes' }
295        '''
296        idx = ser.astype('category')
297        lis = list(idx.cat.categories)
298        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
299            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
300                   for ts in lis]
301        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}

convert a Series in categorical data

return (dict)

{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }

@staticmethod
def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
303    @staticmethod
304    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
305        ''' return a pd.Series from Field data (codec, name, keys)
306
307        *Parameters*
308
309        - **ntv_codec**: Ntv object - codec value to convert in Series values
310        - **ntv_type**: string - default type to apply to convert in dtype
311        - **ntv_name**: string - name of the Series
312
313        *parameters (kwargs)*
314
315        - **index**: list (default None) - if present, add the index in Series
316        - **leng**: integer (default None) - leng of the Series (used with single codec value)
317        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
318        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
319        '''
320        option = {'index': None, 'leng': None, 'alias': False,
321                  'annotated': False} | kwargs
322        types = SeriesConnec.types.set_index('ntv_type')
323        astype = SeriesConnec.astype
324        leng = option['leng']
325
326        ntv_type = ntv_codec.type_str
327        len_unique = leng if len(ntv_codec) == 1 and leng else 1
328        pd_convert = ntv_type in types.index
329        
330        pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert)
331        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type, 
332                                 option['annotated'], pd_convert)
333        if ntv_keys:
334            if pd_convert and name_type != 'array':
335                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
336                cat_type = categ.dtype.name
337                categories = categ.astype(astype.get(cat_type, cat_type))
338            else:
339                categories = pd.Series(ntv_obj, dtype='object')
340            cat = pd.CategoricalDtype(categories=categories)
341            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
342            srs = pd.Series(data, name=pd_name,
343                            index=option['index'], dtype='category')
344        else:
345            data = ntv_obj * len_unique
346            if pd_convert:
347                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
348            else:
349                srs = pd.Series(data, name=pd_name, dtype=dtype)
350        
351        if option['alias']:
352            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
353        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))

return a pd.Series from Field data (codec, name, keys)

Parameters

  • ntv_codec: Ntv object - codec value to convert in Series values
  • ntv_type: string - default type to apply to convert in dtype
  • ntv_name: string - name of the Series

parameters (kwargs)

  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
Inherited Members
json_ntv.ntv_util.NtvConnector
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
init_ntv_keys
class PdUtil:
373class PdUtil:
374    '''ntv-pandas utilities.
375    
376    This class includes static methods:
377    
378    Ntv and pandas
379    - **decode_ntv_tab**: Generate a tuple data from a NTVvalue
380    - **ntv_type**: return NTVtype from name_type and dtype of a Series
381    - **convert**: convert Series with external NTVtype
382    - **ntv_val**: convert a simple Series into NTV json-value
383    - **ntv_obj**: return a list of values to convert in a Series
384    - **pd_name**: return a tuple with the name of the Series and the type deduced from the name
385    - **pd_index**: return a DataFrame with index
386    - **unic**: return simple value if the Series contains a single value
387    
388    TableSchema
389    - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series
390    - **name_table**: return a list of non index field's names from a json Table
391    - **ntvtype_table**: return a list of non index field's ntv_type from a json Table
392    - **table_schema**: add 'format' and 'type' keys in a Json TableSchema
393    - **table_val**: convert a Series into TableSchema json-value
394    - **ntv_table**: return NTVtype from the TableSchema data   
395    '''
396    @staticmethod
397    def to_obj_table(jsn, **kwargs):
398        ''' convert json TableSchema data into a DataFrame or a Series'''
399        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
400        name = PdUtil.name_table(jsn['schema']['fields'])
401        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 
402                   for nam, ntvtyp in zip(name, ntv_type)]
403        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 
404                   for nam, ntvtyp in zip(name, ntv_type)]
405        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
406        dfr = PdUtil.pd_index(dfr)
407        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], 
408                        to_json=False) for ind, col in enumerate(dfr.columns)})
409        dfr = dfr.astype({col: pd_dtype[ind] for ind, col in enumerate(dfr.columns)})
410        dfr.columns = pd_name 
411        if len(dfr.columns) == 1:
412            return dfr[dfr.columns[0]]
413        return dfr
414
415    @staticmethod
416    def decode_ntv_tab(field):
417        '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
418
419        *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)*
420        
421        - name (None or string): name of the Field
422        - dtype (None or string): type of data
423        - codec (list): list of Field codec values
424        - parent (None or int): Field parent or None
425        - keys (None or list): Field keys
426        - coef (None or int): coef if primary Field else None
427        - leng (int): length of the Field
428        '''
429        ntv = Ntv.obj(field)
430        typ = ntv.type_str if ntv.ntv_type else None
431        nam = ntv.name
432        if isinstance(ntv, NtvSingle):
433            return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1)
434        val = [ntv_val.to_obj() for ntv_val in ntv]
435        if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle):
436            return (nam, typ, val, None, None, None, len(ntv))
437
438        ntvc = ntv[0]
439        leng = max(len(ind) for ind in ntv)
440        typc = ntvc.type_str if ntvc.ntv_type else None
441        valc = ntvc.to_obj(simpleval=True)
442        if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \
443                isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \
444                isinstance(ntv[2][0].val, int):
445            return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng)
446        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)):
447            return (nam, typc, valc, ntv[1].val, None, None, leng)
448        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list):
449            leng = leng * ntv[1][0].val
450            return (nam, typc, valc, None, None, ntv[1][0].val, leng)
451        if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int):
452            return (nam, typc, valc, None, ntv[1].to_obj(), None, leng)
453        return (nam, typ, val, None, None, None, len(ntv))
454
455    @staticmethod 
456    def name_table(fields):
457        '''return a list of non index field's names from a json Table'''
458        names = [field.get('name', None) for field in fields
459                if field.get('name', None) != 'index']
460        return [ None if name == 'values' else name for name in names]
461
462    @staticmethod 
463    def ntvtype_table(fields):
464        '''return a list of non index field's ntv_type from a json Table'''
465        return [PdUtil.ntv_table(field.get('format', 'default'),
466                field.get('type', None)) for field in fields
467                if field.get('name', None) != 'index']
468
469    @staticmethod 
470    def table_schema(schema, name, ntv_type):
471        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 
472        for the field defined by 'name' '''
473        ind = [field['name'] for field in schema['fields']].index(name)
474        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
475        if tabletype['format'] == 'default':
476            schema['fields'][ind].pop('format', None)        
477        else:    
478            schema['fields'][ind]['format'] = tabletype['format']
479        schema['fields'][ind]['type'] = tabletype['type']
480        schema['fields'][ind].pop('extDtype', None)        
481        return schema
482
483    @staticmethod
484    def table_val(ntv_type, ntv_name, srs):
485        '''convert a Series into TableSchema json-value.
486
487        *Parameters*
488
489        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
490        - **ntv_name**: string - name of the Series
491        - **srs** : Series to be converted.'''
492        srs = PdUtil.convert(ntv_type, srs)
493        srs.name = ntv_name
494        tab_val = json.loads(srs.to_json(orient='table',
495                        date_format='iso', default_handler=str))
496        name = 'values' if srs.name is None else srs.name 
497        tab_val['schema'] = PdUtil.table_schema(tab_val['schema'], name, ntv_type)
498        return tab_val    
499
500    @staticmethod
501    def convert(ntv_type, srs, to_json=True):
502        ''' convert Series with external NTVtype.
503
504        *Parameters*
505
506        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
507        - **srs** : Series to be converted.
508        - **to_json** : boolean (default True) - apply to json function'''
509        if to_json:
510            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
511                return srs.apply(ShapelyConnec.to_coord)
512            if ntv_type == 'geojson':
513                return srs.apply(ShapelyConnec.to_geojson)
514            if ntv_type == 'date':
515                return srs.astype(str)
516            return srs
517        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
518            return srs.apply(ShapelyConnec.to_geometry)
519        if ntv_type == 'geojson':
520            return srs.apply(ShapelyConnec.from_geojson)
521        if ntv_type == 'datetime':
522            return pd.to_datetime(srs)
523        if ntv_type == 'date':
524            return pd.to_datetime(srs).dt.date
525        if ntv_type == 'time':
526            return pd.to_datetime(srs).dt.time
527        return srs
528
529    @staticmethod
530    def ntv_type(name_type, dtype, table=False):
531        ''' return NTVtype from name_type and dtype of a Series .
532
533        *Parameters*
534
535        - **name_type** : string - type included in the Series name,
536        - **dtype** : string - dtype of the Series.
537        - **table** : boolean (default False) - True if Table Schema conversion 
538        '''
539        if not name_type:
540            types_none = SeriesConnec.types.set_index('name_type').loc[None]
541            if dtype in types_none.dtype.values:
542                return types_none.set_index('dtype').loc[dtype].ntv_type
543            if not table:
544                return 'json'
545            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
546            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type            
547        return name_type
548
549    @staticmethod
550    def ntv_val(ntv_type, srs):
551        ''' convert a simple Series into NTV json-value.
552
553        *Parameters*
554
555        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
556        - **srs** : Series to be *converted.'''
557        srs = PdUtil.convert(ntv_type, srs)
558        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
559            return srs.to_list()
560        if srs.dtype.name == 'object':
561            return srs.to_list()
562        return json.loads(srs.to_json(orient='records',
563                        date_format='iso', default_handler=str))
564
565    @staticmethod 
566    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
567        '''return a list of values to convert in a Series'''
568        if pd_convert:
569            if name_type == 'array':
570                return ntv_codec.to_obj(format='obj', simpleval=True)
571            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
572                                          def_type=ntv_codec.type_str, fast=True)
573            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
574        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
575
576    @staticmethod
577    def ntv_table(table_format, table_type):
578        ''' return NTVtype from the TableSchema data.
579
580        *Parameters*
581
582        - **table_format** : string - TableSchema format,
583        - **table_type** : string - TableSchema type'''
584        return SeriesConnec.table.set_index(['type', 'format']).loc[
585            (table_type, table_format)].values[0]
586
587    @staticmethod
588    def pd_index(dfr):
589        '''return a DataFrame with index'''
590        if 'index' in dfr.columns:
591            dfr = dfr.set_index('index')
592            dfr.index.rename(None, inplace=True)
593        return dfr
594    
595    @staticmethod 
596    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
597        '''return a tuple with the name of the Series, the type deduced from 
598        the name and the dtype'''
599        ntv_name = '' if ntv_name is None else ntv_name
600        typtab = SeriesConnec.typtab.set_index('ntv_type')
601        types = SeriesConnec.types.set_index('ntv_type')
602        if table and ntv_type.lower() in typtab.index:
603            name_type = typtab.loc[ntv_type.lower()]['name_type']
604            dtype = typtab.loc[ntv_type.lower()]['dtype']
605        elif pd_convert or table:
606            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
607            dtype = types.loc[ntv_type]['dtype']
608        else:
609            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
610        dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout
611        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
612        return (pd_name if pd_name else None, name_type, dtype)
613        
614    @staticmethod
615    def unic(srs):
616        ''' return simple value if the Series contains a single value'''
617        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs

ntv-pandas utilities.

This class includes static methods:

Ntv and pandas

  • decode_ntv_tab: Generate a tuple data from a NTVvalue
  • ntv_type: return NTVtype from name_type and dtype of a Series
  • convert: convert Series with external NTVtype
  • ntv_val: convert a simple Series into NTV json-value
  • ntv_obj: return a list of values to convert in a Series
  • pd_name: return a tuple with the name of the Series and the type deduced from the name
  • pd_index: return a DataFrame with index
  • unic: return simple value if the Series contains a single value

TableSchema

  • to_obj_table: convert json TableSchema data into a DataFrame or a Series
  • name_table: return a list of non index field's names from a json Table
  • ntvtype_table: return a list of non index field's ntv_type from a json Table
  • table_schema: add 'format' and 'type' keys in a Json TableSchema
  • table_val: convert a Series into TableSchema json-value
  • ntv_table: return NTVtype from the TableSchema data
@staticmethod
def to_obj_table(jsn, **kwargs):
396    @staticmethod
397    def to_obj_table(jsn, **kwargs):
398        ''' convert json TableSchema data into a DataFrame or a Series'''
399        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
400        name = PdUtil.name_table(jsn['schema']['fields'])
401        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0] 
402                   for nam, ntvtyp in zip(name, ntv_type)]
403        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2] 
404                   for nam, ntvtyp in zip(name, ntv_type)]
405        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
406        dfr = PdUtil.pd_index(dfr)
407        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], 
408                        to_json=False) for ind, col in enumerate(dfr.columns)})
409        dfr = dfr.astype({col: pd_dtype[ind] for ind, col in enumerate(dfr.columns)})
410        dfr.columns = pd_name 
411        if len(dfr.columns) == 1:
412            return dfr[dfr.columns[0]]
413        return dfr

convert json TableSchema data into a DataFrame or a Series

@staticmethod
def decode_ntv_tab(field):
415    @staticmethod
416    def decode_ntv_tab(field):
417        '''Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)
418
419        *Returns tuple: (name, dtype, codec, parent, keys, coef, leng)*
420        
421        - name (None or string): name of the Field
422        - dtype (None or string): type of data
423        - codec (list): list of Field codec values
424        - parent (None or int): Field parent or None
425        - keys (None or list): Field keys
426        - coef (None or int): coef if primary Field else None
427        - leng (int): length of the Field
428        '''
429        ntv = Ntv.obj(field)
430        typ = ntv.type_str if ntv.ntv_type else None
431        nam = ntv.name
432        if isinstance(ntv, NtvSingle):
433            return (nam, typ, [ntv.to_obj(simpleval=True)], None, None, None, 1)
434        val = [ntv_val.to_obj() for ntv_val in ntv]
435        if len(ntv) < 2 or len(ntv) > 3 or isinstance(ntv[0], NtvSingle):
436            return (nam, typ, val, None, None, None, len(ntv))
437
438        ntvc = ntv[0]
439        leng = max(len(ind) for ind in ntv)
440        typc = ntvc.type_str if ntvc.ntv_type else None
441        valc = ntvc.to_obj(simpleval=True)
442        if len(ntv) == 3 and isinstance(ntv[1], NtvSingle) and \
443                isinstance(ntv[1].val, (int, str)) and not isinstance(ntv[2], NtvSingle) and \
444                isinstance(ntv[2][0].val, int):
445            return (nam, typc, valc, ntv[1].val, ntv[2].to_obj(), None, leng)
446        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, (int, str)):
447            return (nam, typc, valc, ntv[1].val, None, None, leng)
448        if len(ntv) == 2 and len(ntv[1]) == 1 and isinstance(ntv[1].val, list):
449            leng = leng * ntv[1][0].val
450            return (nam, typc, valc, None, None, ntv[1][0].val, leng)
451        if len(ntv) == 2 and len(ntv[1]) > 1 and isinstance(ntv[1][0].val, int):
452            return (nam, typc, valc, None, ntv[1].to_obj(), None, leng)
453        return (nam, typ, val, None, None, None, len(ntv))

Generate a tuple data from a Ntv tab value (bytes, string, json, Ntv object)

Returns tuple: (name, dtype, codec, parent, keys, coef, leng)

  • name (None or string): name of the Field
  • dtype (None or string): type of data
  • codec (list): list of Field codec values
  • parent (None or int): Field parent or None
  • keys (None or list): Field keys
  • coef (None or int): coef if primary Field else None
  • leng (int): length of the Field
@staticmethod
def name_table(fields):
455    @staticmethod 
456    def name_table(fields):
457        '''return a list of non index field's names from a json Table'''
458        names = [field.get('name', None) for field in fields
459                if field.get('name', None) != 'index']
460        return [ None if name == 'values' else name for name in names]

return a list of non index field's names from a json Table

@staticmethod
def ntvtype_table(fields):
462    @staticmethod 
463    def ntvtype_table(fields):
464        '''return a list of non index field's ntv_type from a json Table'''
465        return [PdUtil.ntv_table(field.get('format', 'default'),
466                field.get('type', None)) for field in fields
467                if field.get('name', None) != 'index']

return a list of non index field's ntv_type from a json Table

@staticmethod
def table_schema(schema, name, ntv_type):
469    @staticmethod 
470    def table_schema(schema, name, ntv_type):
471        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema 
472        for the field defined by 'name' '''
473        ind = [field['name'] for field in schema['fields']].index(name)
474        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
475        if tabletype['format'] == 'default':
476            schema['fields'][ind].pop('format', None)        
477        else:    
478            schema['fields'][ind]['format'] = tabletype['format']
479        schema['fields'][ind]['type'] = tabletype['type']
480        schema['fields'][ind].pop('extDtype', None)        
481        return schema

convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema for the field defined by 'name'

@staticmethod
def table_val(ntv_type, ntv_name, srs):
483    @staticmethod
484    def table_val(ntv_type, ntv_name, srs):
485        '''convert a Series into TableSchema json-value.
486
487        *Parameters*
488
489        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
490        - **ntv_name**: string - name of the Series
491        - **srs** : Series to be converted.'''
492        srs = PdUtil.convert(ntv_type, srs)
493        srs.name = ntv_name
494        tab_val = json.loads(srs.to_json(orient='table',
495                        date_format='iso', default_handler=str))
496        name = 'values' if srs.name is None else srs.name 
497        tab_val['schema'] = PdUtil.table_schema(tab_val['schema'], name, ntv_type)
498        return tab_val    

convert a Series into TableSchema json-value.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • ntv_name: string - name of the Series
  • srs : Series to be converted.
@staticmethod
def convert(ntv_type, srs, to_json=True):
500    @staticmethod
501    def convert(ntv_type, srs, to_json=True):
502        ''' convert Series with external NTVtype.
503
504        *Parameters*
505
506        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
507        - **srs** : Series to be converted.
508        - **to_json** : boolean (default True) - apply to json function'''
509        if to_json:
510            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
511                return srs.apply(ShapelyConnec.to_coord)
512            if ntv_type == 'geojson':
513                return srs.apply(ShapelyConnec.to_geojson)
514            if ntv_type == 'date':
515                return srs.astype(str)
516            return srs
517        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
518            return srs.apply(ShapelyConnec.to_geometry)
519        if ntv_type == 'geojson':
520            return srs.apply(ShapelyConnec.from_geojson)
521        if ntv_type == 'datetime':
522            return pd.to_datetime(srs)
523        if ntv_type == 'date':
524            return pd.to_datetime(srs).dt.date
525        if ntv_type == 'time':
526            return pd.to_datetime(srs).dt.time
527        return srs

convert Series with external NTVtype.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • srs : Series to be converted.
  • to_json : boolean (default True) - apply to json function
@staticmethod
def ntv_type(name_type, dtype, table=False):
529    @staticmethod
530    def ntv_type(name_type, dtype, table=False):
531        ''' return NTVtype from name_type and dtype of a Series .
532
533        *Parameters*
534
535        - **name_type** : string - type included in the Series name,
536        - **dtype** : string - dtype of the Series.
537        - **table** : boolean (default False) - True if Table Schema conversion 
538        '''
539        if not name_type:
540            types_none = SeriesConnec.types.set_index('name_type').loc[None]
541            if dtype in types_none.dtype.values:
542                return types_none.set_index('dtype').loc[dtype].ntv_type
543            if not table:
544                return 'json'
545            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
546            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type            
547        return name_type

return NTVtype from name_type and dtype of a Series .

Parameters

  • name_type : string - type included in the Series name,
  • dtype : string - dtype of the Series.
  • table : boolean (default False) - True if Table Schema conversion
@staticmethod
def ntv_val(ntv_type, srs):
549    @staticmethod
550    def ntv_val(ntv_type, srs):
551        ''' convert a simple Series into NTV json-value.
552
553        *Parameters*
554
555        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
556        - **srs** : Series to be *converted.'''
557        srs = PdUtil.convert(ntv_type, srs)
558        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
559            return srs.to_list()
560        if srs.dtype.name == 'object':
561            return srs.to_list()
562        return json.loads(srs.to_json(orient='records',
563                        date_format='iso', default_handler=str))

convert a simple Series into NTV json-value.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • srs : Series to be *converted.
@staticmethod
def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
565    @staticmethod 
566    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
567        '''return a list of values to convert in a Series'''
568        if pd_convert:
569            if name_type == 'array':
570                return ntv_codec.to_obj(format='obj', simpleval=True)
571            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
572                                          def_type=ntv_codec.type_str, fast=True)
573            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
574        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)

return a list of values to convert in a Series

@staticmethod
def ntv_table(table_format, table_type):
576    @staticmethod
577    def ntv_table(table_format, table_type):
578        ''' return NTVtype from the TableSchema data.
579
580        *Parameters*
581
582        - **table_format** : string - TableSchema format,
583        - **table_type** : string - TableSchema type'''
584        return SeriesConnec.table.set_index(['type', 'format']).loc[
585            (table_type, table_format)].values[0]

return NTVtype from the TableSchema data.

Parameters

  • table_format : string - TableSchema format,
  • table_type : string - TableSchema type
@staticmethod
def pd_index(dfr):
587    @staticmethod
588    def pd_index(dfr):
589        '''return a DataFrame with index'''
590        if 'index' in dfr.columns:
591            dfr = dfr.set_index('index')
592            dfr.index.rename(None, inplace=True)
593        return dfr

return a DataFrame with index

@staticmethod
def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
595    @staticmethod 
596    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
597        '''return a tuple with the name of the Series, the type deduced from 
598        the name and the dtype'''
599        ntv_name = '' if ntv_name is None else ntv_name
600        typtab = SeriesConnec.typtab.set_index('ntv_type')
601        types = SeriesConnec.types.set_index('ntv_type')
602        if table and ntv_type.lower() in typtab.index:
603            name_type = typtab.loc[ntv_type.lower()]['name_type']
604            dtype = typtab.loc[ntv_type.lower()]['dtype']
605        elif pd_convert or table:
606            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
607            dtype = types.loc[ntv_type]['dtype']
608        else:
609            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
610        dtype = SeriesConnec.deftype.get(dtype, dtype) # ajout
611        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
612        return (pd_name if pd_name else None, name_type, dtype)

return a tuple with the name of the Series, the type deduced from the name and the dtype

@staticmethod
def unic(srs):
614    @staticmethod
615    def unic(srs):
616        ''' return simple value if the Series contains a single value'''
617        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs

return simple value if the Series contains a single value