ntv-pandas.ntv_pandas.pandas_ntv_connector

Created on Feb 27 2023

@author: Philippe@loco-labs.io

The pandas_ntv_connector module is part of the ntv-pandas.ntv_pandas package (specification document).

A NtvConnector is defined by:

  • clas_obj: str - define the class name of the object to convert
  • clas_typ: str - define the NTVtype of the converted object
  • to_obj_ntv: method - converter from JsonNTV to the object
  • to_json_ntv: method - converter from the object to JsonNTV

It contains :

The functions to_json, to_analysis, check_relation, as_def_type and equals are used with the npd accessor.

  1# -*- coding: utf-8 -*-
  2"""
  3Created on Feb 27 2023
  4
  5@author: Philippe@loco-labs.io
  6
  7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package
  8([specification document](
  9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)).
 10
 11A NtvConnector is defined by:
 12- clas_obj: str - define the class name of the object to convert
 13- clas_typ: str - define the NTVtype of the converted object
 14- to_obj_ntv: method - converter from JsonNTV to the object
 15- to_json_ntv: method - converter from the object to JsonNTV
 16
 17It contains :
 18
 19- functions `read_json` and `to_json` to convert JSON data and pandas entities
 20- function `to_analysis` to create data used by the `tab_analysis` module
 21- function `check_relation` to identify rows with inconsistent relationships (`tab_dataset` function)
 22- functions `as_def_type` and `equals` 
 23
 24- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class:
 25    - `DataFrameConnec`: 'tab'   connector
 26    - `SeriesConnec`:    'field' connector
 27
 28- an utility class with static methods : `PdUtil`
 29
 30The functions `to_json`, `to_analysis`, `check_relation`, `as_def_type` and 
 31`equals` are used with the `npd` accessor.
 32
 33"""
 34import os
 35import datetime
 36import json
 37import configparser
 38from pathlib import Path
 39import pandas as pd
 40import numpy as np
 41
 42
 43from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle
 44from json_ntv.ntv_util import NtvUtil
 45from json_ntv.ntv_connector import ShapelyConnec
 46from tab_dataset.cfield import Cfield
 47
 48path_ntv_pandas = Path(os.path.abspath(__file__)).parent
 49
 50def as_def_type(pd_array):
 51    '''convert a Series or DataFrame with default dtype'''
 52    if isinstance(pd_array, (pd.Series, pd.Index)):
 53        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
 54    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
 55
 56def check_relation(pd_df, parent, child, typecoupl, value=True):
 57    ''' Accessor for method `cdataset.Cdataset.check_relation` invoket as 
 58    `pd.DataFrame.npd.check_relation`.
 59    Get the inconsistent records for a relationship.
 60
 61     *Parameters*
 62
 63    - **child** : str - name of the child Series involved in the relation
 64    - **parent**: str - name of the parent Series involved in the relation
 65    - **typecoupl**: str - relationship to check ('derived' or 'coupled')
 66    - **value**: boolean (default True) - if True return a dict with inconsistent
 67    values of the Series, else a tuple with index of records)
 68
 69    *Returns* :
 70
 71    - dict with inconsistent values of the Series
 72    - or a tuple with row of records'''
 73    parent_idx = SeriesConnec.to_idx(pd_df[parent])
 74    parent_field = Cfield(parent_idx['codec'], parent, parent_idx['keys'])
 75    child_idx = SeriesConnec.to_idx(pd_df[child])
 76    child_field = Cfield(child_idx['codec'], child, child_idx['keys'])
 77    return  Cfield.check_relation(parent_field, child_field, typecoupl, value)
 78
 79    
 80def equals(pdself, pdother):
 81    '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
 82    equ = True
 83    if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series):
 84        type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category'
 85        if type_cat:
 86            equ &= equals(pdself.cat.categories, pdother.cat.categories)
 87        else:
 88            equ &= as_def_type(pdself).equals(as_def_type(pdother))
 89        equ &= pdself.name == pdother.name
 90        if not equ:
 91            return False
 92    elif isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame):
 93        for cself, cother in zip(pdself, pdother):
 94            equ &= equals(pdself[cself], pdother[cother])
 95    return equ
 96
 97def read_json(jsn, **kwargs):
 98    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
 99
100    *parameters*
101
102    - **jsn** : JSON text or JSON value to convert
103    - **extkeys**: list (default None) - keys to use if not present in ntv_value
104    - **decode_str**: boolean (default False) - if True, string values are converted
105    in object values
106    - **leng**: integer (default None) - leng of the Series (used with single codec value)
107    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
108    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
109    - **series**: boolean (default False) - used only without header. If True
110    JSON data is converted into Series else DataFrame
111    '''
112    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
113              'annotated': False, 'series': False} | kwargs
114    jso = json.loads(jsn) if isinstance(jsn, str) else jsn
115    if 'schema' in jso:
116        return PdUtil.to_obj_table(jso, **option)
117    ntv = Ntv.from_obj(jso)
118    if ntv.type_str == 'field':
119        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
120    if ntv.type_str == 'tab':
121        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
122    if option['series']:
123        return SeriesConnec.to_obj_ntv(ntv, **option)
124    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
125
126def to_analysis(pd_df):
127    '''return a dict with data used in AnaDataset module'''
128
129    keys = [list(pd_df[col].astype('category').cat.codes) for col in pd_df.columns]
130    lencodec = [ len(set(key)) for key in keys]
131    dist = [[len(set(zip(keys[i], keys[j])))
132                   for j in range(i+1, len(keys))]
133                  for i in range(len(keys)-1)]
134    return {'fields': [{'lencodec': lencodec[ind], 'id': pd_df.columns[ind],
135                        'mincodec': lencodec[ind]}
136                       for ind in range(len(pd_df.columns))],
137            'name': None, 'length': len(pd_df), 
138            'relations': {pd_df.columns[i]: {pd_df.columns[j+i+1]: dist[i][j]
139                          for j in range(len(dist[i]))} for i in range(len(dist))}}
140
141def to_json(pd_array, **kwargs):
142    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
143
144    *parameters*
145
146    - **pd_array** : Series or Dataframe to convert
147    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
148    - **header** : boolean (default: True) - if True the JSON data is included as
149    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
150    - **table** : boolean (default False) - if True return TableSchema format
151    '''
152    option = {'encoded': False, 'header': True, 'table': False} | kwargs
153    option['header'] = False if option['table'] else option['header']
154    if isinstance(pd_array, pd.Series):
155        jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0]
156        head = ':field'
157    else:
158        jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0]
159        head = ':tab'
160    if option['header']:
161        jsn = {head: jsn}
162    if option['encoded']:
163        return json.dumps(jsn)
164    return jsn
165
166
167class DataFrameConnec(NtvConnector):
168
169    '''NTV connector for pandas DataFrame.
170
171    One static methods is included:
172
173    - to_listidx: convert a DataFrame in categorical data
174    '''
175
176    clas_obj = 'DataFrame'
177    clas_typ = 'tab'
178
179    @staticmethod
180    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
181        ''' convert json ntv_value into a DataFrame.
182
183        *Parameters*
184
185        - **index** : list (default None) - list of index values,
186        - **alias** : boolean (default False) - if True, alias dtype else default dtype
187        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
188        series = SeriesConnec.to_series
189
190        ntv = Ntv.fast(ntv_value)
191        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
192                for ntvf in ntv]
193        leng = max([idx[6] for idx in lidx])
194        option = kwargs | {'leng': leng}
195        no_keys = []
196        for ind in range(len(lidx)):
197            lind = lidx[ind]
198            no_keys.append(not lind[3] and not lind[4] and not lind[5])
199            NtvConnector.init_ntv_keys(ind, lidx, leng)
200            lind[2] = Ntv.fast(Ntv.obj_ntv(
201                lind[2], typ=lind[1], single=len(lind[2]) == 1))
202        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
203                              else lidx[ind][4], **option) for ind in range(len(lidx))]
204        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
205        return PdUtil.pd_index(dfr)
206
207    @staticmethod
208    def to_json_ntv(value, name=None, typ=None, **kwargs):
209        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
210
211        *Parameters*
212
213        - **typ** : string (default None) - type of the NTV object,
214        - **name** : string (default None) - name of the NTV object
215        - **value** : DataFrame values
216        - **table** : boolean (default False) - if True return TableSchema format'''
217
218        table = kwargs.get('table', False)
219        if not table:
220            df2 = value.reset_index()
221            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
222                           for col in df2.columns]).to_obj()
223            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
224        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
225            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
226            value[col]) for col in value.columns})
227        table_val = json.loads(df2.to_json(orient='table',
228                                           date_format='iso', default_handler=str))
229        for nam in value.columns:
230            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
231                value[nam], table=True, no_val=True)
232            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
233                                                      ntv_name, ntv_type)
234        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
235
236    @staticmethod
237    def to_listidx(dtf):
238        ''' convert a DataFrame in categorical data
239
240        *Return: tuple with:*
241
242        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
243        - **lenght** of the DataFrame'''
244        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
245
246
247class SeriesConnec(NtvConnector):
248    '''NTV connector for pandas Series
249
250    Two static methods are included:
251
252    - to_idx: convert a Series in categorical data
253    - to_series: return a Series from Field data
254    '''
255    clas_obj = 'Series'
256    clas_typ = 'field'
257    config = configparser.ConfigParser()
258    # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini'))
259    config.read(path_ntv_pandas.joinpath('ntv_pandas.ini'))
260    types = pd.DataFrame(json.loads(config['data']['type']),
261                         columns=json.loads(config['data']['column']))
262    astype = json.loads(config['data']['astype'])
263    deftype = {val: key for key, val in astype.items()}
264    config = configparser.ConfigParser()
265    # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini'))
266    config.read(path_ntv_pandas.joinpath('ntv_table.ini'))
267    table = pd.DataFrame(json.loads(config['data']['mapping']),
268                         columns=json.loads(config['data']['column']))
269    typtab = pd.DataFrame(json.loads(config['data']['type']),
270                          columns=json.loads(config['data']['col_type']))
271
272    @staticmethod
273    def to_obj_ntv(ntv_value, **kwargs):
274        '''Generate a Series Object from a Ntv field object
275
276        *Parameters*
277
278        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
279
280        *parameters (kwargs)*
281
282        - **extkeys**: list (default None) - keys to use if not present in ntv_value
283        - **decode_str**: boolean (default False) - if True, string values are converted
284        in object values
285        - **index**: list (default None) - if present, add the index in Series
286        - **leng**: integer (default None) - leng of the Series (used with single codec value)
287        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
288        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
289        '''
290        option = {'extkeys': None, 'decode_str': False, 'leng': None,
291                  'annotated': False} | kwargs
292        if ntv_value is None:
293            return None
294        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
295
296        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
297            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
298        if parent and not option['extkeys']:
299            return None
300        if coef:
301            ntv_keys = NtvConnector.keysfromcoef(
302                coef, leng_field//coef, option['leng'])
303        elif option['extkeys'] and parent:
304            ntv_keys = NtvConnector.keysfromderkeys(
305                option['extkeys'], ntv_keys)
306        elif option['extkeys'] and not parent:
307            ntv_keys = option['extkeys']
308        ntv_codec = Ntv.fast(Ntv.obj_ntv(
309            codec, typ=typ, single=len(codec) == 1))
310        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
311
312    @staticmethod
313    def to_json_ntv(value, name=None, typ=None, **kwargs):
314        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
315
316        *Parameters*
317
318        - **typ** : string (default None) - type of the NTV object,
319        - **name** : string (default None) - name of the NTV object
320        - **value** : Series values
321        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
322        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
323
324        table = kwargs.get('table', False)
325        no_val = kwargs.get('no_val', False)
326        srs = value.astype(SeriesConnec.astype.get(
327            value.dtype.name, value.dtype.name))
328        sr_name = srs.name if srs.name else ''
329        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
330
331        if table:
332            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
333            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
334            if no_val:
335                return (ntv_name, ntv_type)
336            return (ntv_value, ntv_name, ntv_type)
337        if srs.dtype.name == 'category':
338            cdc = pd.Series(srs.cat.categories)
339            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
340            cat_value = PdUtil.ntv_val(ntv_type, cdc)
341            cat_value = NtvList(cat_value, ntv_type=ntv_type)
342            cod_value = list(srs.cat.codes)
343            coef = NtvConnector.encode_coef(cod_value)
344            ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)]
345            ntv_type = None
346        else:
347            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
348            ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 
349                                     def_type=ntv_type).ntv_value
350        if len(ntv_value) == 1:
351            ntv_value[0].set_name(ntv_name) 
352            return (ntv_value[0].to_obj(), name, 
353                    SeriesConnec.clas_typ if not typ else typ)
354        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
355                SeriesConnec.clas_typ if not typ else typ)
356
357    @staticmethod
358    def to_idx(ser):
359        ''' convert a Series in categorical data
360
361        *return (dict)*
362
363        { 'codec': 'list of pandas categories',
364          'name': 'name of the series',
365          'keys': 'list of pandas codes' }
366        '''
367        idx = ser.astype('category')
368        lis = list(idx.cat.categories)
369        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
370            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
371                   for ts in lis]
372        return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)}
373
374    @staticmethod
375    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
376        ''' return a pd.Series from Field data (codec, name, keys)
377
378        *Parameters*
379
380        - **ntv_codec**: Ntv object - codec value to convert in Series values
381        - **ntv_type**: string - default type to apply to convert in dtype
382        - **ntv_name**: string - name of the Series
383
384        *parameters (kwargs)*
385
386        - **index**: list (default None) - if present, add the index in Series
387        - **leng**: integer (default None) - leng of the Series (used with single codec value)
388        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
389        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
390        '''
391        option = {'index': None, 'leng': None, 'alias': False,
392                  'annotated': False} | kwargs
393        types = SeriesConnec.types.set_index('ntv_type')
394        astype = SeriesConnec.astype
395        leng = option['leng']
396
397        ntv_type = ntv_codec.type_str
398        len_unique = leng if len(ntv_codec) == 1 and leng else 1
399        pd_convert = ntv_type in types.index
400
401        pd_name, name_type, dtype = PdUtil.pd_name(
402            ntv_name, ntv_type, pd_convert)
403        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
404                                 option['annotated'], pd_convert)
405        if ntv_keys:
406            if pd_convert and name_type != 'array':
407                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
408                cat_type = categ.dtype.name
409                categories = categ.astype(astype.get(cat_type, cat_type))
410            else:
411                categories = pd.Series(ntv_obj, dtype='object')
412            cat = pd.CategoricalDtype(categories=categories)
413            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
414            srs = pd.Series(data, name=pd_name,
415                            index=option['index'], dtype='category')
416        else:
417            data = ntv_obj * len_unique
418            if pd_convert:
419                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
420            else:
421                srs = pd.Series(data, name=pd_name, dtype=dtype)
422
423        if option['alias']:
424            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
425        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
426
427    @staticmethod
428    def _from_json(data, dtype, ntv_type, pd_name=None):
429        '''return a Series from a Json data.
430
431        *Parameters*
432
433        - **data**: Json-value - data to convert in a Series
434        - **dtype**: string - dtype of the Series
435        - **ntv_type**: string - default type to apply to convert in dtype
436        - **pd_name**: string - name of the Series including ntv_type
437
438        NTVvalue and a ntv_type'''
439        srs = pd.read_json(json.dumps(data), dtype=dtype, typ='series')
440        if not pd_name is None:
441            srs = srs.rename(pd_name)
442        return PdUtil.convert(ntv_type, srs, tojson=False)
443
444
445class PdUtil:
446    '''ntv-pandas utilities.
447
448    This class includes static methods:
449
450    Ntv and pandas
451    - **ntv_type**: return NTVtype from name_type and dtype of a Series
452    - **convert**: convert Series with external NTVtype
453    - **ntv_val**: convert a simple Series into NTV json-value
454    - **ntv_obj**: return a list of values to convert in a Series
455    - **pd_name**: return a tuple with the name of the Series and the type deduced from the name
456    - **pd_index**: return a DataFrame with index
457    - **unic**: return simple value if the Series contains a single value
458
459    TableSchema
460    - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series
461    - **name_table**: return a list of non index field's names from a json Table
462    - **ntvtype_table**: return a list of non index field's ntv_type from a json Table
463    - **table_schema**: add 'format' and 'type' keys in a Json TableSchema
464    - **table_val**: convert a Series into TableSchema json-value
465    - **ntv_table**: return NTVtype from the TableSchema data
466    '''
467    @staticmethod
468    def to_obj_table(jsn, **kwargs):
469        ''' convert json TableSchema data into a DataFrame or a Series'''
470        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
471        name = PdUtil.name_table(jsn['schema']['fields'])
472        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
473                   for nam, ntvtyp in zip(name, ntv_type)]
474        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
475                    for nam, ntvtyp in zip(name, ntv_type)]
476        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
477        dfr = PdUtil.pd_index(dfr)
478        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
479                            for ind, col in enumerate(dfr.columns)})
480        dfr = dfr.astype({col: pd_dtype[ind]
481                         for ind, col in enumerate(dfr.columns)})
482        dfr.columns = pd_name
483        if len(dfr.columns) == 1:
484            return dfr[dfr.columns[0]]
485        return dfr
486    
487    @staticmethod 
488    def decode_ntv_to_val(ntv):
489        ''' return a value from a ntv_field'''
490        if isinstance(ntv, NtvSingle):
491            return ntv.to_obj(simpleval=True)
492        return [ntv_val.to_obj() for ntv_val in ntv]
493
494    @staticmethod
495    def name_table(fields):
496        '''return a list of non index field's names from a json Table'''
497        names = [field.get('name', None) for field in fields
498                 if field.get('name', None) != 'index']
499        return [None if name == 'values' else name for name in names]
500
501    @staticmethod
502    def ntvtype_table(fields):
503        '''return a list of non index field's ntv_type from a json Table'''
504        return [PdUtil.ntv_table(field.get('format', 'default'),
505                field.get('type', None)) for field in fields
506                if field.get('name', None) != 'index']
507
508    @staticmethod
509    def table_schema(schema, name, ntv_type):
510        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
511        for the field defined by 'name' '''
512        ind = [field['name'] for field in schema['fields']].index(name)
513        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
514        if tabletype['format'] == 'default':
515            schema['fields'][ind].pop('format', None)
516        else:
517            schema['fields'][ind]['format'] = tabletype['format']
518        schema['fields'][ind]['type'] = tabletype['type']
519        schema['fields'][ind].pop('extDtype', None)
520        return schema
521
522    @staticmethod
523    def table_val(ntv_type, ntv_name, srs):
524        '''convert a Series into TableSchema json-value.
525
526        *Parameters*
527
528        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
529        - **ntv_name**: string - name of the Series
530        - **srs** : Series to be converted.'''
531        srs = PdUtil.convert(ntv_type, srs)
532        srs.name = ntv_name
533        tab_val = json.loads(srs.to_json(orient='table',
534                                         date_format='iso', default_handler=str))
535        name = 'values' if srs.name is None else srs.name
536        tab_val['schema'] = PdUtil.table_schema(
537            tab_val['schema'], name, ntv_type)
538        return tab_val
539
540    @staticmethod
541    def convert(ntv_type, srs, tojson=True):
542        ''' convert Series with external NTVtype.
543
544        *Parameters*
545
546        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
547        - **srs** : Series to be converted.
548        - **tojson** : boolean (default True) - apply to json function'''
549        if tojson:
550            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
551                return srs.apply(ShapelyConnec.to_coord)
552            if ntv_type == 'geojson':
553                return srs.apply(ShapelyConnec.to_geojson)
554            if ntv_type == 'date':
555                return srs.astype(str)
556            return srs
557        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
558            return srs.apply(ShapelyConnec.to_geometry)
559        if ntv_type == 'geojson':
560            return srs.apply(ShapelyConnec.from_geojson)
561        if ntv_type == 'datetime':
562            return pd.to_datetime(srs)
563        if ntv_type == 'date':
564            return pd.to_datetime(srs).dt.date
565        if ntv_type == 'time':
566            return pd.to_datetime(srs).dt.time
567        return srs
568
569    @staticmethod
570    def ntv_type(name_type, dtype, table=False):
571        ''' return NTVtype from name_type and dtype of a Series .
572
573        *Parameters*
574
575        - **name_type** : string - type included in the Series name,
576        - **dtype** : string - dtype of the Series.
577        - **table** : boolean (default False) - True if Table Schema conversion
578        '''
579        if not name_type:
580            types_none = SeriesConnec.types.set_index('name_type').loc[None]
581            if dtype in types_none.dtype.values:
582                return types_none.set_index('dtype').loc[dtype].ntv_type
583            if not table:
584                return None
585            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
586            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
587        return name_type
588
589    @staticmethod
590    def ntv_val(ntv_type, srs):
591        ''' convert a simple Series into NTV json-value.
592
593        *Parameters*
594
595        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
596        - **srs** : Series to be *converted.'''
597        srs = PdUtil.convert(ntv_type, srs)
598        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
599            return srs.to_list()
600        if srs.dtype.name == 'object':
601            return srs.to_list()
602        return json.loads(srs.to_json(orient='records',
603                                      date_format='iso', default_handler=str))
604
605    @staticmethod
606    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
607        '''return a list of values to convert in a Series'''
608        if pd_convert:
609            if name_type == 'array':
610                return ntv_codec.to_obj(format='obj', simpleval=True)
611            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
612                                          def_type=ntv_codec.type_str, fast=True)
613            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
614        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
615
616    @staticmethod
617    def ntv_table(table_format, table_type):
618        ''' return NTVtype from the TableSchema data.
619
620        *Parameters*
621
622        - **table_format** : string - TableSchema format,
623        - **table_type** : string - TableSchema type'''
624        return SeriesConnec.table.set_index(['type', 'format']).loc[
625            (table_type, table_format)].values[0]
626
627    @staticmethod
628    def pd_index(dfr):
629        '''return a DataFrame with index'''
630        if 'index' in dfr.columns:
631            dfr = dfr.set_index('index')
632            dfr.index.rename(None, inplace=True)
633        return dfr
634
635    @staticmethod
636    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
637        '''return a tuple with the name of the Series, the type deduced from
638        the name and the dtype'''
639        ntv_name = '' if ntv_name is None else ntv_name
640        typtab = SeriesConnec.typtab.set_index('ntv_type')
641        types = SeriesConnec.types.set_index('ntv_type')
642        if table and ntv_type.lower() in typtab.index:
643            name_type = typtab.loc[ntv_type.lower()]['name_type']
644            dtype = typtab.loc[ntv_type.lower()]['dtype']
645        elif pd_convert or table:
646            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
647            dtype = types.loc[ntv_type]['dtype']
648        else:
649            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
650        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
651        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
652        return (pd_name if pd_name else None, name_type, dtype)
653
654    @staticmethod
655    def unic(srs):
656        ''' return simple value if the Series contains a single value'''
657        if str(srs.dtype) == 'category':
658            return srs
659        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
def as_def_type(pd_array):
51def as_def_type(pd_array):
52    '''convert a Series or DataFrame with default dtype'''
53    if isinstance(pd_array, (pd.Series, pd.Index)):
54        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
55    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})

convert a Series or DataFrame with default dtype

def check_relation(pd_df, parent, child, typecoupl, value=True):
57def check_relation(pd_df, parent, child, typecoupl, value=True):
58    ''' Accessor for method `cdataset.Cdataset.check_relation` invoket as 
59    `pd.DataFrame.npd.check_relation`.
60    Get the inconsistent records for a relationship.
61
62     *Parameters*
63
64    - **child** : str - name of the child Series involved in the relation
65    - **parent**: str - name of the parent Series involved in the relation
66    - **typecoupl**: str - relationship to check ('derived' or 'coupled')
67    - **value**: boolean (default True) - if True return a dict with inconsistent
68    values of the Series, else a tuple with index of records)
69
70    *Returns* :
71
72    - dict with inconsistent values of the Series
73    - or a tuple with row of records'''
74    parent_idx = SeriesConnec.to_idx(pd_df[parent])
75    parent_field = Cfield(parent_idx['codec'], parent, parent_idx['keys'])
76    child_idx = SeriesConnec.to_idx(pd_df[child])
77    child_field = Cfield(child_idx['codec'], child, child_idx['keys'])
78    return  Cfield.check_relation(parent_field, child_field, typecoupl, value)

Accessor for method cdataset.Cdataset.check_relation invoket as pd.DataFrame.npd.check_relation. Get the inconsistent records for a relationship.

Parameters

  • child : str - name of the child Series involved in the relation
  • parent: str - name of the parent Series involved in the relation
  • typecoupl: str - relationship to check ('derived' or 'coupled')
  • value: boolean (default True) - if True return a dict with inconsistent values of the Series, else a tuple with index of records)

Returns :

  • dict with inconsistent values of the Series
  • or a tuple with row of records
def equals(pdself, pdother):
81def equals(pdself, pdother):
82    '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
83    equ = True
84    if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series):
85        type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category'
86        if type_cat:
87            equ &= equals(pdself.cat.categories, pdother.cat.categories)
88        else:
89            equ &= as_def_type(pdself).equals(as_def_type(pdother))
90        equ &= pdself.name == pdother.name
91        if not equ:
92            return False
93    elif isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame):
94        for cself, cother in zip(pdself, pdother):
95            equ &= equals(pdself[cself], pdother[cother])
96    return equ

return True if pd.equals is True and names are equal and dtype of categories are equal

def read_json(jsn, **kwargs):
 98def read_json(jsn, **kwargs):
 99    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
100
101    *parameters*
102
103    - **jsn** : JSON text or JSON value to convert
104    - **extkeys**: list (default None) - keys to use if not present in ntv_value
105    - **decode_str**: boolean (default False) - if True, string values are converted
106    in object values
107    - **leng**: integer (default None) - leng of the Series (used with single codec value)
108    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
109    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
110    - **series**: boolean (default False) - used only without header. If True
111    JSON data is converted into Series else DataFrame
112    '''
113    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
114              'annotated': False, 'series': False} | kwargs
115    jso = json.loads(jsn) if isinstance(jsn, str) else jsn
116    if 'schema' in jso:
117        return PdUtil.to_obj_table(jso, **option)
118    ntv = Ntv.from_obj(jso)
119    if ntv.type_str == 'field':
120        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
121    if ntv.type_str == 'tab':
122        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
123    if option['series']:
124        return SeriesConnec.to_obj_ntv(ntv, **option)
125    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)

convert JSON text or JSON Value to pandas Series or Dataframe.

parameters

  • jsn : JSON text or JSON value to convert
  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
  • series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
def to_analysis(pd_df):
127def to_analysis(pd_df):
128    '''return a dict with data used in AnaDataset module'''
129
130    keys = [list(pd_df[col].astype('category').cat.codes) for col in pd_df.columns]
131    lencodec = [ len(set(key)) for key in keys]
132    dist = [[len(set(zip(keys[i], keys[j])))
133                   for j in range(i+1, len(keys))]
134                  for i in range(len(keys)-1)]
135    return {'fields': [{'lencodec': lencodec[ind], 'id': pd_df.columns[ind],
136                        'mincodec': lencodec[ind]}
137                       for ind in range(len(pd_df.columns))],
138            'name': None, 'length': len(pd_df), 
139            'relations': {pd_df.columns[i]: {pd_df.columns[j+i+1]: dist[i][j]
140                          for j in range(len(dist[i]))} for i in range(len(dist))}}

return a dict with data used in AnaDataset module

def to_json(pd_array, **kwargs):
142def to_json(pd_array, **kwargs):
143    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
144
145    *parameters*
146
147    - **pd_array** : Series or Dataframe to convert
148    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
149    - **header** : boolean (default: True) - if True the JSON data is included as
150    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
151    - **table** : boolean (default False) - if True return TableSchema format
152    '''
153    option = {'encoded': False, 'header': True, 'table': False} | kwargs
154    option['header'] = False if option['table'] else option['header']
155    if isinstance(pd_array, pd.Series):
156        jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0]
157        head = ':field'
158    else:
159        jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0]
160        head = ':tab'
161    if option['header']:
162        jsn = {head: jsn}
163    if option['encoded']:
164        return json.dumps(jsn)
165    return jsn

convert pandas Series or Dataframe to JSON text or JSON Value.

parameters

  • pd_array : Series or Dataframe to convert
  • encoded : boolean (default: False) - if True return a JSON text else a JSON value
  • header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
  • table : boolean (default False) - if True return TableSchema format
class DataFrameConnec(json_ntv.ntv_util.NtvConnector):
168class DataFrameConnec(NtvConnector):
169
170    '''NTV connector for pandas DataFrame.
171
172    One static methods is included:
173
174    - to_listidx: convert a DataFrame in categorical data
175    '''
176
177    clas_obj = 'DataFrame'
178    clas_typ = 'tab'
179
180    @staticmethod
181    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
182        ''' convert json ntv_value into a DataFrame.
183
184        *Parameters*
185
186        - **index** : list (default None) - list of index values,
187        - **alias** : boolean (default False) - if True, alias dtype else default dtype
188        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
189        series = SeriesConnec.to_series
190
191        ntv = Ntv.fast(ntv_value)
192        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
193                for ntvf in ntv]
194        leng = max([idx[6] for idx in lidx])
195        option = kwargs | {'leng': leng}
196        no_keys = []
197        for ind in range(len(lidx)):
198            lind = lidx[ind]
199            no_keys.append(not lind[3] and not lind[4] and not lind[5])
200            NtvConnector.init_ntv_keys(ind, lidx, leng)
201            lind[2] = Ntv.fast(Ntv.obj_ntv(
202                lind[2], typ=lind[1], single=len(lind[2]) == 1))
203        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
204                              else lidx[ind][4], **option) for ind in range(len(lidx))]
205        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
206        return PdUtil.pd_index(dfr)
207
208    @staticmethod
209    def to_json_ntv(value, name=None, typ=None, **kwargs):
210        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
211
212        *Parameters*
213
214        - **typ** : string (default None) - type of the NTV object,
215        - **name** : string (default None) - name of the NTV object
216        - **value** : DataFrame values
217        - **table** : boolean (default False) - if True return TableSchema format'''
218
219        table = kwargs.get('table', False)
220        if not table:
221            df2 = value.reset_index()
222            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
223                           for col in df2.columns]).to_obj()
224            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
225        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
226            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
227            value[col]) for col in value.columns})
228        table_val = json.loads(df2.to_json(orient='table',
229                                           date_format='iso', default_handler=str))
230        for nam in value.columns:
231            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
232                value[nam], table=True, no_val=True)
233            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
234                                                      ntv_name, ntv_type)
235        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
236
237    @staticmethod
238    def to_listidx(dtf):
239        ''' convert a DataFrame in categorical data
240
241        *Return: tuple with:*
242
243        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
244        - **lenght** of the DataFrame'''
245        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

NTV connector for pandas DataFrame.

One static methods is included:

  • to_listidx: convert a DataFrame in categorical data
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
180    @staticmethod
181    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
182        ''' convert json ntv_value into a DataFrame.
183
184        *Parameters*
185
186        - **index** : list (default None) - list of index values,
187        - **alias** : boolean (default False) - if True, alias dtype else default dtype
188        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
189        series = SeriesConnec.to_series
190
191        ntv = Ntv.fast(ntv_value)
192        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
193                for ntvf in ntv]
194        leng = max([idx[6] for idx in lidx])
195        option = kwargs | {'leng': leng}
196        no_keys = []
197        for ind in range(len(lidx)):
198            lind = lidx[ind]
199            no_keys.append(not lind[3] and not lind[4] and not lind[5])
200            NtvConnector.init_ntv_keys(ind, lidx, leng)
201            lind[2] = Ntv.fast(Ntv.obj_ntv(
202                lind[2], typ=lind[1], single=len(lind[2]) == 1))
203        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
204                              else lidx[ind][4], **option) for ind in range(len(lidx))]
205        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
206        return PdUtil.pd_index(dfr)

convert json ntv_value into a DataFrame.

Parameters

  • index : list (default None) - list of index values,
  • alias : boolean (default False) - if True, alias dtype else default dtype
  • annotated : boolean (default False) - if True, NTV names are not included.
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
208    @staticmethod
209    def to_json_ntv(value, name=None, typ=None, **kwargs):
210        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
211
212        *Parameters*
213
214        - **typ** : string (default None) - type of the NTV object,
215        - **name** : string (default None) - name of the NTV object
216        - **value** : DataFrame values
217        - **table** : boolean (default False) - if True return TableSchema format'''
218
219        table = kwargs.get('table', False)
220        if not table:
221            df2 = value.reset_index()
222            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
223                           for col in df2.columns]).to_obj()
224            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
225        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
226            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
227            value[col]) for col in value.columns})
228        table_val = json.loads(df2.to_json(orient='table',
229                                           date_format='iso', default_handler=str))
230        for nam in value.columns:
231            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
232                value[nam], table=True, no_val=True)
233            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
234                                                      ntv_name, ntv_type)
235        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)

convert a DataFrame (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : DataFrame values
  • table : boolean (default False) - if True return TableSchema format
@staticmethod
def to_listidx(dtf):
237    @staticmethod
238    def to_listidx(dtf):
239        ''' convert a DataFrame in categorical data
240
241        *Return: tuple with:*
242
243        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
244        - **lenght** of the DataFrame'''
245        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

convert a DataFrame in categorical data

Return: tuple with:

  • list of dict (keys : 'codec', 'name, 'keys') for each column
  • lenght of the DataFrame
Inherited Members
json_ntv.ntv_util.NtvConnector
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
init_ntv_keys
class SeriesConnec(json_ntv.ntv_util.NtvConnector):
248class SeriesConnec(NtvConnector):
249    '''NTV connector for pandas Series
250
251    Two static methods are included:
252
253    - to_idx: convert a Series in categorical data
254    - to_series: return a Series from Field data
255    '''
256    clas_obj = 'Series'
257    clas_typ = 'field'
258    config = configparser.ConfigParser()
259    # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini'))
260    config.read(path_ntv_pandas.joinpath('ntv_pandas.ini'))
261    types = pd.DataFrame(json.loads(config['data']['type']),
262                         columns=json.loads(config['data']['column']))
263    astype = json.loads(config['data']['astype'])
264    deftype = {val: key for key, val in astype.items()}
265    config = configparser.ConfigParser()
266    # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini'))
267    config.read(path_ntv_pandas.joinpath('ntv_table.ini'))
268    table = pd.DataFrame(json.loads(config['data']['mapping']),
269                         columns=json.loads(config['data']['column']))
270    typtab = pd.DataFrame(json.loads(config['data']['type']),
271                          columns=json.loads(config['data']['col_type']))
272
273    @staticmethod
274    def to_obj_ntv(ntv_value, **kwargs):
275        '''Generate a Series Object from a Ntv field object
276
277        *Parameters*
278
279        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
280
281        *parameters (kwargs)*
282
283        - **extkeys**: list (default None) - keys to use if not present in ntv_value
284        - **decode_str**: boolean (default False) - if True, string values are converted
285        in object values
286        - **index**: list (default None) - if present, add the index in Series
287        - **leng**: integer (default None) - leng of the Series (used with single codec value)
288        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
289        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
290        '''
291        option = {'extkeys': None, 'decode_str': False, 'leng': None,
292                  'annotated': False} | kwargs
293        if ntv_value is None:
294            return None
295        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
296
297        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
298            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
299        if parent and not option['extkeys']:
300            return None
301        if coef:
302            ntv_keys = NtvConnector.keysfromcoef(
303                coef, leng_field//coef, option['leng'])
304        elif option['extkeys'] and parent:
305            ntv_keys = NtvConnector.keysfromderkeys(
306                option['extkeys'], ntv_keys)
307        elif option['extkeys'] and not parent:
308            ntv_keys = option['extkeys']
309        ntv_codec = Ntv.fast(Ntv.obj_ntv(
310            codec, typ=typ, single=len(codec) == 1))
311        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
312
313    @staticmethod
314    def to_json_ntv(value, name=None, typ=None, **kwargs):
315        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
316
317        *Parameters*
318
319        - **typ** : string (default None) - type of the NTV object,
320        - **name** : string (default None) - name of the NTV object
321        - **value** : Series values
322        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
323        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
324
325        table = kwargs.get('table', False)
326        no_val = kwargs.get('no_val', False)
327        srs = value.astype(SeriesConnec.astype.get(
328            value.dtype.name, value.dtype.name))
329        sr_name = srs.name if srs.name else ''
330        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
331
332        if table:
333            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
334            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
335            if no_val:
336                return (ntv_name, ntv_type)
337            return (ntv_value, ntv_name, ntv_type)
338        if srs.dtype.name == 'category':
339            cdc = pd.Series(srs.cat.categories)
340            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
341            cat_value = PdUtil.ntv_val(ntv_type, cdc)
342            cat_value = NtvList(cat_value, ntv_type=ntv_type)
343            cod_value = list(srs.cat.codes)
344            coef = NtvConnector.encode_coef(cod_value)
345            ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)]
346            ntv_type = None
347        else:
348            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
349            ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 
350                                     def_type=ntv_type).ntv_value
351        if len(ntv_value) == 1:
352            ntv_value[0].set_name(ntv_name) 
353            return (ntv_value[0].to_obj(), name, 
354                    SeriesConnec.clas_typ if not typ else typ)
355        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
356                SeriesConnec.clas_typ if not typ else typ)
357
358    @staticmethod
359    def to_idx(ser):
360        ''' convert a Series in categorical data
361
362        *return (dict)*
363
364        { 'codec': 'list of pandas categories',
365          'name': 'name of the series',
366          'keys': 'list of pandas codes' }
367        '''
368        idx = ser.astype('category')
369        lis = list(idx.cat.categories)
370        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
371            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
372                   for ts in lis]
373        return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)}
374
375    @staticmethod
376    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
377        ''' return a pd.Series from Field data (codec, name, keys)
378
379        *Parameters*
380
381        - **ntv_codec**: Ntv object - codec value to convert in Series values
382        - **ntv_type**: string - default type to apply to convert in dtype
383        - **ntv_name**: string - name of the Series
384
385        *parameters (kwargs)*
386
387        - **index**: list (default None) - if present, add the index in Series
388        - **leng**: integer (default None) - leng of the Series (used with single codec value)
389        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
390        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
391        '''
392        option = {'index': None, 'leng': None, 'alias': False,
393                  'annotated': False} | kwargs
394        types = SeriesConnec.types.set_index('ntv_type')
395        astype = SeriesConnec.astype
396        leng = option['leng']
397
398        ntv_type = ntv_codec.type_str
399        len_unique = leng if len(ntv_codec) == 1 and leng else 1
400        pd_convert = ntv_type in types.index
401
402        pd_name, name_type, dtype = PdUtil.pd_name(
403            ntv_name, ntv_type, pd_convert)
404        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
405                                 option['annotated'], pd_convert)
406        if ntv_keys:
407            if pd_convert and name_type != 'array':
408                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
409                cat_type = categ.dtype.name
410                categories = categ.astype(astype.get(cat_type, cat_type))
411            else:
412                categories = pd.Series(ntv_obj, dtype='object')
413            cat = pd.CategoricalDtype(categories=categories)
414            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
415            srs = pd.Series(data, name=pd_name,
416                            index=option['index'], dtype='category')
417        else:
418            data = ntv_obj * len_unique
419            if pd_convert:
420                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
421            else:
422                srs = pd.Series(data, name=pd_name, dtype=dtype)
423
424        if option['alias']:
425            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
426        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
427
428    @staticmethod
429    def _from_json(data, dtype, ntv_type, pd_name=None):
430        '''return a Series from a Json data.
431
432        *Parameters*
433
434        - **data**: Json-value - data to convert in a Series
435        - **dtype**: string - dtype of the Series
436        - **ntv_type**: string - default type to apply to convert in dtype
437        - **pd_name**: string - name of the Series including ntv_type
438
439        NTVvalue and a ntv_type'''
440        srs = pd.read_json(json.dumps(data), dtype=dtype, typ='series')
441        if not pd_name is None:
442            srs = srs.rename(pd_name)
443        return PdUtil.convert(ntv_type, srs, tojson=False)

NTV connector for pandas Series

Two static methods are included:

  • to_idx: convert a Series in categorical data
  • to_series: return a Series from Field data
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
273    @staticmethod
274    def to_obj_ntv(ntv_value, **kwargs):
275        '''Generate a Series Object from a Ntv field object
276
277        *Parameters*
278
279        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
280
281        *parameters (kwargs)*
282
283        - **extkeys**: list (default None) - keys to use if not present in ntv_value
284        - **decode_str**: boolean (default False) - if True, string values are converted
285        in object values
286        - **index**: list (default None) - if present, add the index in Series
287        - **leng**: integer (default None) - leng of the Series (used with single codec value)
288        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
289        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
290        '''
291        option = {'extkeys': None, 'decode_str': False, 'leng': None,
292                  'annotated': False} | kwargs
293        if ntv_value is None:
294            return None
295        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
296
297        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
298            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
299        if parent and not option['extkeys']:
300            return None
301        if coef:
302            ntv_keys = NtvConnector.keysfromcoef(
303                coef, leng_field//coef, option['leng'])
304        elif option['extkeys'] and parent:
305            ntv_keys = NtvConnector.keysfromderkeys(
306                option['extkeys'], ntv_keys)
307        elif option['extkeys'] and not parent:
308            ntv_keys = option['extkeys']
309        ntv_codec = Ntv.fast(Ntv.obj_ntv(
310            codec, typ=typ, single=len(codec) == 1))
311        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)

Generate a Series Object from a Ntv field object

Parameters

  • ntv_value: Ntv object or Ntv value - value to convert in Series

parameters (kwargs)

  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
313    @staticmethod
314    def to_json_ntv(value, name=None, typ=None, **kwargs):
315        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
316
317        *Parameters*
318
319        - **typ** : string (default None) - type of the NTV object,
320        - **name** : string (default None) - name of the NTV object
321        - **value** : Series values
322        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
323        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
324
325        table = kwargs.get('table', False)
326        no_val = kwargs.get('no_val', False)
327        srs = value.astype(SeriesConnec.astype.get(
328            value.dtype.name, value.dtype.name))
329        sr_name = srs.name if srs.name else ''
330        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
331
332        if table:
333            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
334            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
335            if no_val:
336                return (ntv_name, ntv_type)
337            return (ntv_value, ntv_name, ntv_type)
338        if srs.dtype.name == 'category':
339            cdc = pd.Series(srs.cat.categories)
340            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
341            cat_value = PdUtil.ntv_val(ntv_type, cdc)
342            cat_value = NtvList(cat_value, ntv_type=ntv_type)
343            cod_value = list(srs.cat.codes)
344            coef = NtvConnector.encode_coef(cod_value)
345            ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)]
346            ntv_type = None
347        else:
348            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
349            ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs), 
350                                     def_type=ntv_type).ntv_value
351        if len(ntv_value) == 1:
352            ntv_value[0].set_name(ntv_name) 
353            return (ntv_value[0].to_obj(), name, 
354                    SeriesConnec.clas_typ if not typ else typ)
355        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
356                SeriesConnec.clas_typ if not typ else typ)

convert a Series (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : Series values
  • table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
  • no_val : boolean (default False) - if True return (ntv_name, ntv_type)
@staticmethod
def to_idx(ser):
358    @staticmethod
359    def to_idx(ser):
360        ''' convert a Series in categorical data
361
362        *return (dict)*
363
364        { 'codec': 'list of pandas categories',
365          'name': 'name of the series',
366          'keys': 'list of pandas codes' }
367        '''
368        idx = ser.astype('category')
369        lis = list(idx.cat.categories)
370        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
371            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
372                   for ts in lis]
373        return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)}

convert a Series in categorical data

return (dict)

{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }

@staticmethod
def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
375    @staticmethod
376    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
377        ''' return a pd.Series from Field data (codec, name, keys)
378
379        *Parameters*
380
381        - **ntv_codec**: Ntv object - codec value to convert in Series values
382        - **ntv_type**: string - default type to apply to convert in dtype
383        - **ntv_name**: string - name of the Series
384
385        *parameters (kwargs)*
386
387        - **index**: list (default None) - if present, add the index in Series
388        - **leng**: integer (default None) - leng of the Series (used with single codec value)
389        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
390        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
391        '''
392        option = {'index': None, 'leng': None, 'alias': False,
393                  'annotated': False} | kwargs
394        types = SeriesConnec.types.set_index('ntv_type')
395        astype = SeriesConnec.astype
396        leng = option['leng']
397
398        ntv_type = ntv_codec.type_str
399        len_unique = leng if len(ntv_codec) == 1 and leng else 1
400        pd_convert = ntv_type in types.index
401
402        pd_name, name_type, dtype = PdUtil.pd_name(
403            ntv_name, ntv_type, pd_convert)
404        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
405                                 option['annotated'], pd_convert)
406        if ntv_keys:
407            if pd_convert and name_type != 'array':
408                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
409                cat_type = categ.dtype.name
410                categories = categ.astype(astype.get(cat_type, cat_type))
411            else:
412                categories = pd.Series(ntv_obj, dtype='object')
413            cat = pd.CategoricalDtype(categories=categories)
414            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
415            srs = pd.Series(data, name=pd_name,
416                            index=option['index'], dtype='category')
417        else:
418            data = ntv_obj * len_unique
419            if pd_convert:
420                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
421            else:
422                srs = pd.Series(data, name=pd_name, dtype=dtype)
423
424        if option['alias']:
425            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
426        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))

return a pd.Series from Field data (codec, name, keys)

Parameters

  • ntv_codec: Ntv object - codec value to convert in Series values
  • ntv_type: string - default type to apply to convert in dtype
  • ntv_name: string - name of the Series

parameters (kwargs)

  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
Inherited Members
json_ntv.ntv_util.NtvConnector
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
init_ntv_keys
class PdUtil:
446class PdUtil:
447    '''ntv-pandas utilities.
448
449    This class includes static methods:
450
451    Ntv and pandas
452    - **ntv_type**: return NTVtype from name_type and dtype of a Series
453    - **convert**: convert Series with external NTVtype
454    - **ntv_val**: convert a simple Series into NTV json-value
455    - **ntv_obj**: return a list of values to convert in a Series
456    - **pd_name**: return a tuple with the name of the Series and the type deduced from the name
457    - **pd_index**: return a DataFrame with index
458    - **unic**: return simple value if the Series contains a single value
459
460    TableSchema
461    - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series
462    - **name_table**: return a list of non index field's names from a json Table
463    - **ntvtype_table**: return a list of non index field's ntv_type from a json Table
464    - **table_schema**: add 'format' and 'type' keys in a Json TableSchema
465    - **table_val**: convert a Series into TableSchema json-value
466    - **ntv_table**: return NTVtype from the TableSchema data
467    '''
468    @staticmethod
469    def to_obj_table(jsn, **kwargs):
470        ''' convert json TableSchema data into a DataFrame or a Series'''
471        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
472        name = PdUtil.name_table(jsn['schema']['fields'])
473        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
474                   for nam, ntvtyp in zip(name, ntv_type)]
475        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
476                    for nam, ntvtyp in zip(name, ntv_type)]
477        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
478        dfr = PdUtil.pd_index(dfr)
479        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
480                            for ind, col in enumerate(dfr.columns)})
481        dfr = dfr.astype({col: pd_dtype[ind]
482                         for ind, col in enumerate(dfr.columns)})
483        dfr.columns = pd_name
484        if len(dfr.columns) == 1:
485            return dfr[dfr.columns[0]]
486        return dfr
487    
488    @staticmethod 
489    def decode_ntv_to_val(ntv):
490        ''' return a value from a ntv_field'''
491        if isinstance(ntv, NtvSingle):
492            return ntv.to_obj(simpleval=True)
493        return [ntv_val.to_obj() for ntv_val in ntv]
494
495    @staticmethod
496    def name_table(fields):
497        '''return a list of non index field's names from a json Table'''
498        names = [field.get('name', None) for field in fields
499                 if field.get('name', None) != 'index']
500        return [None if name == 'values' else name for name in names]
501
502    @staticmethod
503    def ntvtype_table(fields):
504        '''return a list of non index field's ntv_type from a json Table'''
505        return [PdUtil.ntv_table(field.get('format', 'default'),
506                field.get('type', None)) for field in fields
507                if field.get('name', None) != 'index']
508
509    @staticmethod
510    def table_schema(schema, name, ntv_type):
511        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
512        for the field defined by 'name' '''
513        ind = [field['name'] for field in schema['fields']].index(name)
514        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
515        if tabletype['format'] == 'default':
516            schema['fields'][ind].pop('format', None)
517        else:
518            schema['fields'][ind]['format'] = tabletype['format']
519        schema['fields'][ind]['type'] = tabletype['type']
520        schema['fields'][ind].pop('extDtype', None)
521        return schema
522
523    @staticmethod
524    def table_val(ntv_type, ntv_name, srs):
525        '''convert a Series into TableSchema json-value.
526
527        *Parameters*
528
529        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
530        - **ntv_name**: string - name of the Series
531        - **srs** : Series to be converted.'''
532        srs = PdUtil.convert(ntv_type, srs)
533        srs.name = ntv_name
534        tab_val = json.loads(srs.to_json(orient='table',
535                                         date_format='iso', default_handler=str))
536        name = 'values' if srs.name is None else srs.name
537        tab_val['schema'] = PdUtil.table_schema(
538            tab_val['schema'], name, ntv_type)
539        return tab_val
540
541    @staticmethod
542    def convert(ntv_type, srs, tojson=True):
543        ''' convert Series with external NTVtype.
544
545        *Parameters*
546
547        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
548        - **srs** : Series to be converted.
549        - **tojson** : boolean (default True) - apply to json function'''
550        if tojson:
551            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
552                return srs.apply(ShapelyConnec.to_coord)
553            if ntv_type == 'geojson':
554                return srs.apply(ShapelyConnec.to_geojson)
555            if ntv_type == 'date':
556                return srs.astype(str)
557            return srs
558        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
559            return srs.apply(ShapelyConnec.to_geometry)
560        if ntv_type == 'geojson':
561            return srs.apply(ShapelyConnec.from_geojson)
562        if ntv_type == 'datetime':
563            return pd.to_datetime(srs)
564        if ntv_type == 'date':
565            return pd.to_datetime(srs).dt.date
566        if ntv_type == 'time':
567            return pd.to_datetime(srs).dt.time
568        return srs
569
570    @staticmethod
571    def ntv_type(name_type, dtype, table=False):
572        ''' return NTVtype from name_type and dtype of a Series .
573
574        *Parameters*
575
576        - **name_type** : string - type included in the Series name,
577        - **dtype** : string - dtype of the Series.
578        - **table** : boolean (default False) - True if Table Schema conversion
579        '''
580        if not name_type:
581            types_none = SeriesConnec.types.set_index('name_type').loc[None]
582            if dtype in types_none.dtype.values:
583                return types_none.set_index('dtype').loc[dtype].ntv_type
584            if not table:
585                return None
586            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
587            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
588        return name_type
589
590    @staticmethod
591    def ntv_val(ntv_type, srs):
592        ''' convert a simple Series into NTV json-value.
593
594        *Parameters*
595
596        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
597        - **srs** : Series to be *converted.'''
598        srs = PdUtil.convert(ntv_type, srs)
599        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
600            return srs.to_list()
601        if srs.dtype.name == 'object':
602            return srs.to_list()
603        return json.loads(srs.to_json(orient='records',
604                                      date_format='iso', default_handler=str))
605
606    @staticmethod
607    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
608        '''return a list of values to convert in a Series'''
609        if pd_convert:
610            if name_type == 'array':
611                return ntv_codec.to_obj(format='obj', simpleval=True)
612            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
613                                          def_type=ntv_codec.type_str, fast=True)
614            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
615        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
616
617    @staticmethod
618    def ntv_table(table_format, table_type):
619        ''' return NTVtype from the TableSchema data.
620
621        *Parameters*
622
623        - **table_format** : string - TableSchema format,
624        - **table_type** : string - TableSchema type'''
625        return SeriesConnec.table.set_index(['type', 'format']).loc[
626            (table_type, table_format)].values[0]
627
628    @staticmethod
629    def pd_index(dfr):
630        '''return a DataFrame with index'''
631        if 'index' in dfr.columns:
632            dfr = dfr.set_index('index')
633            dfr.index.rename(None, inplace=True)
634        return dfr
635
636    @staticmethod
637    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
638        '''return a tuple with the name of the Series, the type deduced from
639        the name and the dtype'''
640        ntv_name = '' if ntv_name is None else ntv_name
641        typtab = SeriesConnec.typtab.set_index('ntv_type')
642        types = SeriesConnec.types.set_index('ntv_type')
643        if table and ntv_type.lower() in typtab.index:
644            name_type = typtab.loc[ntv_type.lower()]['name_type']
645            dtype = typtab.loc[ntv_type.lower()]['dtype']
646        elif pd_convert or table:
647            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
648            dtype = types.loc[ntv_type]['dtype']
649        else:
650            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
651        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
652        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
653        return (pd_name if pd_name else None, name_type, dtype)
654
655    @staticmethod
656    def unic(srs):
657        ''' return simple value if the Series contains a single value'''
658        if str(srs.dtype) == 'category':
659            return srs
660        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs

ntv-pandas utilities.

This class includes static methods:

Ntv and pandas

  • ntv_type: return NTVtype from name_type and dtype of a Series
  • convert: convert Series with external NTVtype
  • ntv_val: convert a simple Series into NTV json-value
  • ntv_obj: return a list of values to convert in a Series
  • pd_name: return a tuple with the name of the Series and the type deduced from the name
  • pd_index: return a DataFrame with index
  • unic: return simple value if the Series contains a single value

TableSchema

  • to_obj_table: convert json TableSchema data into a DataFrame or a Series
  • name_table: return a list of non index field's names from a json Table
  • ntvtype_table: return a list of non index field's ntv_type from a json Table
  • table_schema: add 'format' and 'type' keys in a Json TableSchema
  • table_val: convert a Series into TableSchema json-value
  • ntv_table: return NTVtype from the TableSchema data
@staticmethod
def to_obj_table(jsn, **kwargs):
468    @staticmethod
469    def to_obj_table(jsn, **kwargs):
470        ''' convert json TableSchema data into a DataFrame or a Series'''
471        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
472        name = PdUtil.name_table(jsn['schema']['fields'])
473        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
474                   for nam, ntvtyp in zip(name, ntv_type)]
475        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
476                    for nam, ntvtyp in zip(name, ntv_type)]
477        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
478        dfr = PdUtil.pd_index(dfr)
479        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
480                            for ind, col in enumerate(dfr.columns)})
481        dfr = dfr.astype({col: pd_dtype[ind]
482                         for ind, col in enumerate(dfr.columns)})
483        dfr.columns = pd_name
484        if len(dfr.columns) == 1:
485            return dfr[dfr.columns[0]]
486        return dfr

convert json TableSchema data into a DataFrame or a Series

@staticmethod
def decode_ntv_to_val(ntv):
488    @staticmethod 
489    def decode_ntv_to_val(ntv):
490        ''' return a value from a ntv_field'''
491        if isinstance(ntv, NtvSingle):
492            return ntv.to_obj(simpleval=True)
493        return [ntv_val.to_obj() for ntv_val in ntv]

return a value from a ntv_field

@staticmethod
def name_table(fields):
495    @staticmethod
496    def name_table(fields):
497        '''return a list of non index field's names from a json Table'''
498        names = [field.get('name', None) for field in fields
499                 if field.get('name', None) != 'index']
500        return [None if name == 'values' else name for name in names]

return a list of non index field's names from a json Table

@staticmethod
def ntvtype_table(fields):
502    @staticmethod
503    def ntvtype_table(fields):
504        '''return a list of non index field's ntv_type from a json Table'''
505        return [PdUtil.ntv_table(field.get('format', 'default'),
506                field.get('type', None)) for field in fields
507                if field.get('name', None) != 'index']

return a list of non index field's ntv_type from a json Table

@staticmethod
def table_schema(schema, name, ntv_type):
509    @staticmethod
510    def table_schema(schema, name, ntv_type):
511        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
512        for the field defined by 'name' '''
513        ind = [field['name'] for field in schema['fields']].index(name)
514        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
515        if tabletype['format'] == 'default':
516            schema['fields'][ind].pop('format', None)
517        else:
518            schema['fields'][ind]['format'] = tabletype['format']
519        schema['fields'][ind]['type'] = tabletype['type']
520        schema['fields'][ind].pop('extDtype', None)
521        return schema

convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema for the field defined by 'name'

@staticmethod
def table_val(ntv_type, ntv_name, srs):
523    @staticmethod
524    def table_val(ntv_type, ntv_name, srs):
525        '''convert a Series into TableSchema json-value.
526
527        *Parameters*
528
529        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
530        - **ntv_name**: string - name of the Series
531        - **srs** : Series to be converted.'''
532        srs = PdUtil.convert(ntv_type, srs)
533        srs.name = ntv_name
534        tab_val = json.loads(srs.to_json(orient='table',
535                                         date_format='iso', default_handler=str))
536        name = 'values' if srs.name is None else srs.name
537        tab_val['schema'] = PdUtil.table_schema(
538            tab_val['schema'], name, ntv_type)
539        return tab_val

convert a Series into TableSchema json-value.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • ntv_name: string - name of the Series
  • srs : Series to be converted.
@staticmethod
def convert(ntv_type, srs, tojson=True):
541    @staticmethod
542    def convert(ntv_type, srs, tojson=True):
543        ''' convert Series with external NTVtype.
544
545        *Parameters*
546
547        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
548        - **srs** : Series to be converted.
549        - **tojson** : boolean (default True) - apply to json function'''
550        if tojson:
551            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
552                return srs.apply(ShapelyConnec.to_coord)
553            if ntv_type == 'geojson':
554                return srs.apply(ShapelyConnec.to_geojson)
555            if ntv_type == 'date':
556                return srs.astype(str)
557            return srs
558        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
559            return srs.apply(ShapelyConnec.to_geometry)
560        if ntv_type == 'geojson':
561            return srs.apply(ShapelyConnec.from_geojson)
562        if ntv_type == 'datetime':
563            return pd.to_datetime(srs)
564        if ntv_type == 'date':
565            return pd.to_datetime(srs).dt.date
566        if ntv_type == 'time':
567            return pd.to_datetime(srs).dt.time
568        return srs

convert Series with external NTVtype.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • srs : Series to be converted.
  • tojson : boolean (default True) - apply to json function
@staticmethod
def ntv_type(name_type, dtype, table=False):
570    @staticmethod
571    def ntv_type(name_type, dtype, table=False):
572        ''' return NTVtype from name_type and dtype of a Series .
573
574        *Parameters*
575
576        - **name_type** : string - type included in the Series name,
577        - **dtype** : string - dtype of the Series.
578        - **table** : boolean (default False) - True if Table Schema conversion
579        '''
580        if not name_type:
581            types_none = SeriesConnec.types.set_index('name_type').loc[None]
582            if dtype in types_none.dtype.values:
583                return types_none.set_index('dtype').loc[dtype].ntv_type
584            if not table:
585                return None
586            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
587            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
588        return name_type

return NTVtype from name_type and dtype of a Series .

Parameters

  • name_type : string - type included in the Series name,
  • dtype : string - dtype of the Series.
  • table : boolean (default False) - True if Table Schema conversion
@staticmethod
def ntv_val(ntv_type, srs):
590    @staticmethod
591    def ntv_val(ntv_type, srs):
592        ''' convert a simple Series into NTV json-value.
593
594        *Parameters*
595
596        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
597        - **srs** : Series to be *converted.'''
598        srs = PdUtil.convert(ntv_type, srs)
599        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
600            return srs.to_list()
601        if srs.dtype.name == 'object':
602            return srs.to_list()
603        return json.loads(srs.to_json(orient='records',
604                                      date_format='iso', default_handler=str))

convert a simple Series into NTV json-value.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • srs : Series to be *converted.
@staticmethod
def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
606    @staticmethod
607    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
608        '''return a list of values to convert in a Series'''
609        if pd_convert:
610            if name_type == 'array':
611                return ntv_codec.to_obj(format='obj', simpleval=True)
612            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
613                                          def_type=ntv_codec.type_str, fast=True)
614            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
615        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)

return a list of values to convert in a Series

@staticmethod
def ntv_table(table_format, table_type):
617    @staticmethod
618    def ntv_table(table_format, table_type):
619        ''' return NTVtype from the TableSchema data.
620
621        *Parameters*
622
623        - **table_format** : string - TableSchema format,
624        - **table_type** : string - TableSchema type'''
625        return SeriesConnec.table.set_index(['type', 'format']).loc[
626            (table_type, table_format)].values[0]

return NTVtype from the TableSchema data.

Parameters

  • table_format : string - TableSchema format,
  • table_type : string - TableSchema type
@staticmethod
def pd_index(dfr):
628    @staticmethod
629    def pd_index(dfr):
630        '''return a DataFrame with index'''
631        if 'index' in dfr.columns:
632            dfr = dfr.set_index('index')
633            dfr.index.rename(None, inplace=True)
634        return dfr

return a DataFrame with index

@staticmethod
def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
636    @staticmethod
637    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
638        '''return a tuple with the name of the Series, the type deduced from
639        the name and the dtype'''
640        ntv_name = '' if ntv_name is None else ntv_name
641        typtab = SeriesConnec.typtab.set_index('ntv_type')
642        types = SeriesConnec.types.set_index('ntv_type')
643        if table and ntv_type.lower() in typtab.index:
644            name_type = typtab.loc[ntv_type.lower()]['name_type']
645            dtype = typtab.loc[ntv_type.lower()]['dtype']
646        elif pd_convert or table:
647            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
648            dtype = types.loc[ntv_type]['dtype']
649        else:
650            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
651        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
652        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
653        return (pd_name if pd_name else None, name_type, dtype)

return a tuple with the name of the Series, the type deduced from the name and the dtype

@staticmethod
def unic(srs):
655    @staticmethod
656    def unic(srs):
657        ''' return simple value if the Series contains a single value'''
658        if str(srs.dtype) == 'category':
659            return srs
660        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs

return simple value if the Series contains a single value