ntv-pandas.ntv_pandas.pandas_ntv_connector

Created on Feb 27 2023

@author: Philippe@loco-labs.io

The pandas_ntv_connector module is part of the ntv-pandas.ntv_pandas package (specification document).

A NtvConnector is defined by:

  • clas_obj: str - define the class name of the object to convert
  • clas_typ: str - define the NTVtype of the converted object
  • to_obj_ntv: method - converter from JsonNTV to the object
  • to_json_ntv: method - converter from the object to JsonNTV

It contains :

  • functions read_json and to_json to convert JSON data and pandas entities

  • the child classes of NTV.json_ntv.ntv.NtvConnector abstract class:

  • an utility class with static methods : PdUtil

  1# -*- coding: utf-8 -*-
  2"""
  3Created on Feb 27 2023
  4
  5@author: Philippe@loco-labs.io
  6
  7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package
  8([specification document](
  9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)).
 10
 11A NtvConnector is defined by:
 12- clas_obj: str - define the class name of the object to convert
 13- clas_typ: str - define the NTVtype of the converted object
 14- to_obj_ntv: method - converter from JsonNTV to the object
 15- to_json_ntv: method - converter from the object to JsonNTV
 16
 17It contains :
 18
 19- functions `read_json` and `to_json` to convert JSON data and pandas entities
 20
 21- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class:
 22    - `DataFrameConnec`: 'tab'   connector
 23    - `SeriesConnec`:    'field' connector
 24
 25- an utility class with static methods : `PdUtil`
 26"""
 27import os
 28import datetime
 29import json
 30import configparser
 31from pathlib import Path
 32import pandas as pd
 33import numpy as np
 34
 35
 36from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle
 37from json_ntv.ntv_util import NtvUtil
 38from json_ntv.ntv_connector import ShapelyConnec
 39
 40path_ntv_pandas = Path(os.path.abspath(__file__)).parent
 41
 42
 43def to_json(pd_array, **kwargs):
 44    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
 45
 46    *parameters*
 47
 48    - **pd_array** : Series or Dataframe to convert
 49    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
 50    - **header** : boolean (default: True) - if True the JSON data is included as
 51    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
 52    - **table** : boolean (default False) - if True return TableSchema format
 53    '''
 54    option = {'encoded': False, 'header': True, 'table': False} | kwargs
 55    option['header'] = False if option['table'] else option['header']
 56    if isinstance(pd_array, pd.Series):
 57        jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0]
 58        head = ':field'
 59    else:
 60        jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0]
 61        head = ':tab'
 62    if option['header']:
 63        jsn = {head: jsn}
 64    if option['encoded']:
 65        return json.dumps(jsn)
 66    return jsn
 67
 68
 69def read_json(jsn, **kwargs):
 70    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
 71
 72    *parameters*
 73
 74    - **jsn** : JSON text or JSON value to convert
 75    - **extkeys**: list (default None) - keys to use if not present in ntv_value
 76    - **decode_str**: boolean (default False) - if True, string values are converted
 77    in object values
 78    - **leng**: integer (default None) - leng of the Series (used with single codec value)
 79    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
 80    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
 81    - **series**: boolean (default False) - used only without header. If True
 82    JSON data is converted into Series else DataFrame
 83    '''
 84    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
 85              'annotated': False, 'series': False} | kwargs
 86    jso = json.loads(jsn) if isinstance(jsn, str) else jsn
 87    if 'schema' in jso:
 88        return PdUtil.to_obj_table(jso, **option)
 89    ntv = Ntv.from_obj(jso)
 90    if ntv.type_str == 'field':
 91        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
 92    if ntv.type_str == 'tab':
 93        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
 94    if option['series']:
 95        return SeriesConnec.to_obj_ntv(ntv, **option)
 96    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
 97
 98
 99def as_def_type(pd_array):
100    '''convert a Series or DataFrame with default dtype'''
101    if isinstance(pd_array, (pd.Series, pd.Index)):
102        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
103    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
104
105
106def equals(pdself, pdother):
107    '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
108    equ = True
109    if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series):
110        type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category'
111        if type_cat:
112            equ &= equals(pdself.cat.categories, pdother.cat.categories)
113        else:
114            equ &= as_def_type(pdself).equals(as_def_type(pdother))
115        equ &= pdself.name == pdother.name
116        if not equ:
117            return False
118    elif isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame):
119        for cself, cother in zip(pdself, pdother):
120            equ &= equals(pdself[cself], pdother[cother])
121    return equ
122
123
124class DataFrameConnec(NtvConnector):
125
126    '''NTV connector for pandas DataFrame.
127
128    One static methods is included:
129
130    - to_listidx: convert a DataFrame in categorical data
131    '''
132
133    clas_obj = 'DataFrame'
134    clas_typ = 'tab'
135
136    @staticmethod
137    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
138        ''' convert json ntv_value into a DataFrame.
139
140        *Parameters*
141
142        - **index** : list (default None) - list of index values,
143        - **alias** : boolean (default False) - if True, alias dtype else default dtype
144        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
145        series = SeriesConnec.to_series
146
147        ntv = Ntv.fast(ntv_value)
148        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
149                for ntvf in ntv]
150        leng = max([idx[6] for idx in lidx])
151        option = kwargs | {'leng': leng}
152        no_keys = []
153        for ind in range(len(lidx)):
154            lind = lidx[ind]
155            no_keys.append(not lind[3] and not lind[4] and not lind[5])
156            NtvConnector.init_ntv_keys(ind, lidx, leng)
157            lind[2] = Ntv.fast(Ntv.obj_ntv(
158                lind[2], typ=lind[1], single=len(lind[2]) == 1))
159        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
160                              else lidx[ind][4], **option) for ind in range(len(lidx))]
161        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
162        return PdUtil.pd_index(dfr)
163
164    @staticmethod
165    def to_json_ntv(value, name=None, typ=None, **kwargs):
166        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
167
168        *Parameters*
169
170        - **typ** : string (default None) - type of the NTV object,
171        - **name** : string (default None) - name of the NTV object
172        - **value** : DataFrame values
173        - **table** : boolean (default False) - if True return TableSchema format'''
174
175        table = kwargs.get('table', False)
176        if not table:
177            df2 = value.reset_index()
178            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
179                           for col in df2.columns]).to_obj()
180            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
181        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
182            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
183            value[col]) for col in value.columns})
184        table_val = json.loads(df2.to_json(orient='table',
185                                           date_format='iso', default_handler=str))
186        for nam in value.columns:
187            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
188                value[nam], table=True, no_val=True)
189            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
190                                                      ntv_name, ntv_type)
191        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
192
193    @staticmethod
194    def to_listidx(dtf):
195        ''' convert a DataFrame in categorical data
196
197        *Return: tuple with:*
198
199        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
200        - **lenght** of the DataFrame'''
201        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
202
203
204class SeriesConnec(NtvConnector):
205    '''NTV connector for pandas Series
206
207    Two static methods are included:
208
209    - to_idx: convert a Series in categorical data
210    - to_series: return a Series from Field data
211    '''
212    clas_obj = 'Series'
213    clas_typ = 'field'
214    config = configparser.ConfigParser()
215    # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini'))
216    config.read(path_ntv_pandas.joinpath('ntv_pandas.ini'))
217    types = pd.DataFrame(json.loads(config['data']['type']),
218                         columns=json.loads(config['data']['column']))
219    astype = json.loads(config['data']['astype'])
220    deftype = {val: key for key, val in astype.items()}
221    config = configparser.ConfigParser()
222    # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini'))
223    config.read(path_ntv_pandas.joinpath('ntv_table.ini'))
224    table = pd.DataFrame(json.loads(config['data']['mapping']),
225                         columns=json.loads(config['data']['column']))
226    typtab = pd.DataFrame(json.loads(config['data']['type']),
227                          columns=json.loads(config['data']['col_type']))
228
229    @staticmethod
230    def to_obj_ntv(ntv_value, **kwargs):
231        '''Generate a Series Object from a Ntv field object
232
233        *Parameters*
234
235        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
236
237        *parameters (kwargs)*
238
239        - **extkeys**: list (default None) - keys to use if not present in ntv_value
240        - **decode_str**: boolean (default False) - if True, string values are converted
241        in object values
242        - **index**: list (default None) - if present, add the index in Series
243        - **leng**: integer (default None) - leng of the Series (used with single codec value)
244        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
245        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
246        '''
247        option = {'extkeys': None, 'decode_str': False, 'leng': None,
248                  'annotated': False} | kwargs
249        if ntv_value is None:
250            return None
251        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
252
253        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
254            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
255        if parent and not option['extkeys']:
256            return None
257        if coef:
258            ntv_keys = NtvConnector.keysfromcoef(
259                coef, leng_field//coef, option['leng'])
260        elif option['extkeys'] and parent:
261            ntv_keys = NtvConnector.keysfromderkeys(
262                option['extkeys'], ntv_keys)
263        elif option['extkeys'] and not parent:
264            ntv_keys = option['extkeys']
265        ntv_codec = Ntv.fast(Ntv.obj_ntv(
266            codec, typ=typ, single=len(codec) == 1))
267        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
268
269    @staticmethod
270    def to_json_ntv(value, name=None, typ=None, **kwargs):
271        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
272
273        *Parameters*
274
275        - **typ** : string (default None) - type of the NTV object,
276        - **name** : string (default None) - name of the NTV object
277        - **value** : Series values
278        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
279        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
280
281        table = kwargs.get('table', False)
282        no_val = kwargs.get('no_val', False)
283        srs = value.astype(SeriesConnec.astype.get(
284            value.dtype.name, value.dtype.name))
285        sr_name = srs.name if srs.name else ''
286        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
287
288        if table:
289            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
290            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
291            if no_val:
292                return (ntv_name, ntv_type)
293            return (ntv_value, ntv_name, ntv_type)
294        if srs.dtype.name == 'category':
295            cdc = pd.Series(srs.cat.categories)
296            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
297            cat_value = PdUtil.ntv_val(ntv_type, cdc)
298            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
299            cod_value = list(srs.cat.codes)
300            coef = NtvConnector.encode_coef(cod_value)
301            ntv_value = [cat_value, [coef] if coef else cod_value]
302            ntv_type = 'json'
303        else:
304            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
305            ntv_value = PdUtil.ntv_val(ntv_type, srs)
306        if len(ntv_value) == 1:
307            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
308                    SeriesConnec.clas_typ if not typ else typ)
309        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
310                SeriesConnec.clas_typ if not typ else typ)
311
312    @staticmethod
313    def to_idx(ser):
314        ''' convert a Series in categorical data
315
316        *return (dict)*
317
318        { 'codec': 'list of pandas categories',
319          'name': 'name of the series',
320          'keys': 'list of pandas codes' }
321        '''
322        idx = ser.astype('category')
323        lis = list(idx.cat.categories)
324        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
325            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
326                   for ts in lis]
327        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
328
329    @staticmethod
330    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
331        ''' return a pd.Series from Field data (codec, name, keys)
332
333        *Parameters*
334
335        - **ntv_codec**: Ntv object - codec value to convert in Series values
336        - **ntv_type**: string - default type to apply to convert in dtype
337        - **ntv_name**: string - name of the Series
338
339        *parameters (kwargs)*
340
341        - **index**: list (default None) - if present, add the index in Series
342        - **leng**: integer (default None) - leng of the Series (used with single codec value)
343        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
344        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
345        '''
346        option = {'index': None, 'leng': None, 'alias': False,
347                  'annotated': False} | kwargs
348        types = SeriesConnec.types.set_index('ntv_type')
349        astype = SeriesConnec.astype
350        leng = option['leng']
351
352        ntv_type = ntv_codec.type_str
353        len_unique = leng if len(ntv_codec) == 1 and leng else 1
354        pd_convert = ntv_type in types.index
355
356        pd_name, name_type, dtype = PdUtil.pd_name(
357            ntv_name, ntv_type, pd_convert)
358        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
359                                 option['annotated'], pd_convert)
360        if ntv_keys:
361            if pd_convert and name_type != 'array':
362                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
363                cat_type = categ.dtype.name
364                categories = categ.astype(astype.get(cat_type, cat_type))
365            else:
366                categories = pd.Series(ntv_obj, dtype='object')
367            cat = pd.CategoricalDtype(categories=categories)
368            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
369            srs = pd.Series(data, name=pd_name,
370                            index=option['index'], dtype='category')
371        else:
372            data = ntv_obj * len_unique
373            if pd_convert:
374                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
375            else:
376                srs = pd.Series(data, name=pd_name, dtype=dtype)
377
378        if option['alias']:
379            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
380        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
381
382    @staticmethod
383    def _from_json(data, dtype, ntv_type, pd_name=None):
384        '''return a Series from a Json data.
385
386        *Parameters*
387
388        - **data**: Json-value - data to convert in a Series
389        - **dtype**: string - dtype of the Series
390        - **ntv_type**: string - default type to apply to convert in dtype
391        - **pd_name**: string - name of the Series including ntv_type
392
393        NTVvalue and a ntv_type'''
394        srs = pd.read_json(json.dumps(data), dtype=dtype, typ='series')
395        if not pd_name is None:
396            srs = srs.rename(pd_name)
397        return PdUtil.convert(ntv_type, srs, tojson=False)
398
399
400class PdUtil:
401    '''ntv-pandas utilities.
402
403    This class includes static methods:
404
405    Ntv and pandas
406    - **ntv_type**: return NTVtype from name_type and dtype of a Series
407    - **convert**: convert Series with external NTVtype
408    - **ntv_val**: convert a simple Series into NTV json-value
409    - **ntv_obj**: return a list of values to convert in a Series
410    - **pd_name**: return a tuple with the name of the Series and the type deduced from the name
411    - **pd_index**: return a DataFrame with index
412    - **unic**: return simple value if the Series contains a single value
413
414    TableSchema
415    - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series
416    - **name_table**: return a list of non index field's names from a json Table
417    - **ntvtype_table**: return a list of non index field's ntv_type from a json Table
418    - **table_schema**: add 'format' and 'type' keys in a Json TableSchema
419    - **table_val**: convert a Series into TableSchema json-value
420    - **ntv_table**: return NTVtype from the TableSchema data
421    '''
422    @staticmethod
423    def to_obj_table(jsn, **kwargs):
424        ''' convert json TableSchema data into a DataFrame or a Series'''
425        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
426        name = PdUtil.name_table(jsn['schema']['fields'])
427        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
428                   for nam, ntvtyp in zip(name, ntv_type)]
429        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
430                    for nam, ntvtyp in zip(name, ntv_type)]
431        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
432        dfr = PdUtil.pd_index(dfr)
433        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
434                            for ind, col in enumerate(dfr.columns)})
435        dfr = dfr.astype({col: pd_dtype[ind]
436                         for ind, col in enumerate(dfr.columns)})
437        dfr.columns = pd_name
438        if len(dfr.columns) == 1:
439            return dfr[dfr.columns[0]]
440        return dfr
441    
442    @staticmethod 
443    def decode_ntv_to_val(ntv):
444        ''' return a value from a ntv_field'''
445        if isinstance(ntv, NtvSingle):
446            return ntv.to_obj(simpleval=True)
447        return [ntv_val.to_obj() for ntv_val in ntv]
448
449    @staticmethod
450    def name_table(fields):
451        '''return a list of non index field's names from a json Table'''
452        names = [field.get('name', None) for field in fields
453                 if field.get('name', None) != 'index']
454        return [None if name == 'values' else name for name in names]
455
456    @staticmethod
457    def ntvtype_table(fields):
458        '''return a list of non index field's ntv_type from a json Table'''
459        return [PdUtil.ntv_table(field.get('format', 'default'),
460                field.get('type', None)) for field in fields
461                if field.get('name', None) != 'index']
462
463    @staticmethod
464    def table_schema(schema, name, ntv_type):
465        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
466        for the field defined by 'name' '''
467        ind = [field['name'] for field in schema['fields']].index(name)
468        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
469        if tabletype['format'] == 'default':
470            schema['fields'][ind].pop('format', None)
471        else:
472            schema['fields'][ind]['format'] = tabletype['format']
473        schema['fields'][ind]['type'] = tabletype['type']
474        schema['fields'][ind].pop('extDtype', None)
475        return schema
476
477    @staticmethod
478    def table_val(ntv_type, ntv_name, srs):
479        '''convert a Series into TableSchema json-value.
480
481        *Parameters*
482
483        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
484        - **ntv_name**: string - name of the Series
485        - **srs** : Series to be converted.'''
486        srs = PdUtil.convert(ntv_type, srs)
487        srs.name = ntv_name
488        tab_val = json.loads(srs.to_json(orient='table',
489                                         date_format='iso', default_handler=str))
490        name = 'values' if srs.name is None else srs.name
491        tab_val['schema'] = PdUtil.table_schema(
492            tab_val['schema'], name, ntv_type)
493        return tab_val
494
495    @staticmethod
496    def convert(ntv_type, srs, tojson=True):
497        ''' convert Series with external NTVtype.
498
499        *Parameters*
500
501        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
502        - **srs** : Series to be converted.
503        - **tojson** : boolean (default True) - apply to json function'''
504        if tojson:
505            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
506                return srs.apply(ShapelyConnec.to_coord)
507            if ntv_type == 'geojson':
508                return srs.apply(ShapelyConnec.to_geojson)
509            if ntv_type == 'date':
510                return srs.astype(str)
511            return srs
512        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
513            return srs.apply(ShapelyConnec.to_geometry)
514        if ntv_type == 'geojson':
515            return srs.apply(ShapelyConnec.from_geojson)
516        if ntv_type == 'datetime':
517            return pd.to_datetime(srs)
518        if ntv_type == 'date':
519            return pd.to_datetime(srs).dt.date
520        if ntv_type == 'time':
521            return pd.to_datetime(srs).dt.time
522        return srs
523
524    @staticmethod
525    def ntv_type(name_type, dtype, table=False):
526        ''' return NTVtype from name_type and dtype of a Series .
527
528        *Parameters*
529
530        - **name_type** : string - type included in the Series name,
531        - **dtype** : string - dtype of the Series.
532        - **table** : boolean (default False) - True if Table Schema conversion
533        '''
534        if not name_type:
535            types_none = SeriesConnec.types.set_index('name_type').loc[None]
536            if dtype in types_none.dtype.values:
537                return types_none.set_index('dtype').loc[dtype].ntv_type
538            if not table:
539                return 'json'
540            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
541            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
542        return name_type
543
544    @staticmethod
545    def ntv_val(ntv_type, srs):
546        ''' convert a simple Series into NTV json-value.
547
548        *Parameters*
549
550        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
551        - **srs** : Series to be *converted.'''
552        srs = PdUtil.convert(ntv_type, srs)
553        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
554            return srs.to_list()
555        if srs.dtype.name == 'object':
556            return srs.to_list()
557        return json.loads(srs.to_json(orient='records',
558                                      date_format='iso', default_handler=str))
559
560    @staticmethod
561    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
562        '''return a list of values to convert in a Series'''
563        if pd_convert:
564            if name_type == 'array':
565                return ntv_codec.to_obj(format='obj', simpleval=True)
566            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
567                                          def_type=ntv_codec.type_str, fast=True)
568            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
569        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
570
571    @staticmethod
572    def ntv_table(table_format, table_type):
573        ''' return NTVtype from the TableSchema data.
574
575        *Parameters*
576
577        - **table_format** : string - TableSchema format,
578        - **table_type** : string - TableSchema type'''
579        return SeriesConnec.table.set_index(['type', 'format']).loc[
580            (table_type, table_format)].values[0]
581
582    @staticmethod
583    def pd_index(dfr):
584        '''return a DataFrame with index'''
585        if 'index' in dfr.columns:
586            dfr = dfr.set_index('index')
587            dfr.index.rename(None, inplace=True)
588        return dfr
589
590    @staticmethod
591    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
592        '''return a tuple with the name of the Series, the type deduced from
593        the name and the dtype'''
594        ntv_name = '' if ntv_name is None else ntv_name
595        typtab = SeriesConnec.typtab.set_index('ntv_type')
596        types = SeriesConnec.types.set_index('ntv_type')
597        if table and ntv_type.lower() in typtab.index:
598            name_type = typtab.loc[ntv_type.lower()]['name_type']
599            dtype = typtab.loc[ntv_type.lower()]['dtype']
600        elif pd_convert or table:
601            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
602            dtype = types.loc[ntv_type]['dtype']
603        else:
604            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
605        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
606        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
607        return (pd_name if pd_name else None, name_type, dtype)
608
609    @staticmethod
610    def unic(srs):
611        ''' return simple value if the Series contains a single value'''
612        if str(srs.dtype) == 'category':
613            return srs
614        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
def to_json(pd_array, **kwargs):
44def to_json(pd_array, **kwargs):
45    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
46
47    *parameters*
48
49    - **pd_array** : Series or Dataframe to convert
50    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
51    - **header** : boolean (default: True) - if True the JSON data is included as
52    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
53    - **table** : boolean (default False) - if True return TableSchema format
54    '''
55    option = {'encoded': False, 'header': True, 'table': False} | kwargs
56    option['header'] = False if option['table'] else option['header']
57    if isinstance(pd_array, pd.Series):
58        jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0]
59        head = ':field'
60    else:
61        jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'])[0]
62        head = ':tab'
63    if option['header']:
64        jsn = {head: jsn}
65    if option['encoded']:
66        return json.dumps(jsn)
67    return jsn

convert pandas Series or Dataframe to JSON text or JSON Value.

parameters

  • pd_array : Series or Dataframe to convert
  • encoded : boolean (default: False) - if True return a JSON text else a JSON value
  • header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
  • table : boolean (default False) - if True return TableSchema format
def read_json(jsn, **kwargs):
70def read_json(jsn, **kwargs):
71    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
72
73    *parameters*
74
75    - **jsn** : JSON text or JSON value to convert
76    - **extkeys**: list (default None) - keys to use if not present in ntv_value
77    - **decode_str**: boolean (default False) - if True, string values are converted
78    in object values
79    - **leng**: integer (default None) - leng of the Series (used with single codec value)
80    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
81    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
82    - **series**: boolean (default False) - used only without header. If True
83    JSON data is converted into Series else DataFrame
84    '''
85    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
86              'annotated': False, 'series': False} | kwargs
87    jso = json.loads(jsn) if isinstance(jsn, str) else jsn
88    if 'schema' in jso:
89        return PdUtil.to_obj_table(jso, **option)
90    ntv = Ntv.from_obj(jso)
91    if ntv.type_str == 'field':
92        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
93    if ntv.type_str == 'tab':
94        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
95    if option['series']:
96        return SeriesConnec.to_obj_ntv(ntv, **option)
97    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)

convert JSON text or JSON Value to pandas Series or Dataframe.

parameters

  • jsn : JSON text or JSON value to convert
  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
  • series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
def as_def_type(pd_array):
100def as_def_type(pd_array):
101    '''convert a Series or DataFrame with default dtype'''
102    if isinstance(pd_array, (pd.Series, pd.Index)):
103        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
104    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})

convert a Series or DataFrame with default dtype

def equals(pdself, pdother):
107def equals(pdself, pdother):
108    '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
109    equ = True
110    if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series):
111        type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category'
112        if type_cat:
113            equ &= equals(pdself.cat.categories, pdother.cat.categories)
114        else:
115            equ &= as_def_type(pdself).equals(as_def_type(pdother))
116        equ &= pdself.name == pdother.name
117        if not equ:
118            return False
119    elif isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame):
120        for cself, cother in zip(pdself, pdother):
121            equ &= equals(pdself[cself], pdother[cother])
122    return equ

return True if pd.equals is True and names are equal and dtype of categories are equal

class DataFrameConnec(json_ntv.ntv_util.NtvConnector):
125class DataFrameConnec(NtvConnector):
126
127    '''NTV connector for pandas DataFrame.
128
129    One static methods is included:
130
131    - to_listidx: convert a DataFrame in categorical data
132    '''
133
134    clas_obj = 'DataFrame'
135    clas_typ = 'tab'
136
137    @staticmethod
138    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
139        ''' convert json ntv_value into a DataFrame.
140
141        *Parameters*
142
143        - **index** : list (default None) - list of index values,
144        - **alias** : boolean (default False) - if True, alias dtype else default dtype
145        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
146        series = SeriesConnec.to_series
147
148        ntv = Ntv.fast(ntv_value)
149        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
150                for ntvf in ntv]
151        leng = max([idx[6] for idx in lidx])
152        option = kwargs | {'leng': leng}
153        no_keys = []
154        for ind in range(len(lidx)):
155            lind = lidx[ind]
156            no_keys.append(not lind[3] and not lind[4] and not lind[5])
157            NtvConnector.init_ntv_keys(ind, lidx, leng)
158            lind[2] = Ntv.fast(Ntv.obj_ntv(
159                lind[2], typ=lind[1], single=len(lind[2]) == 1))
160        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
161                              else lidx[ind][4], **option) for ind in range(len(lidx))]
162        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
163        return PdUtil.pd_index(dfr)
164
165    @staticmethod
166    def to_json_ntv(value, name=None, typ=None, **kwargs):
167        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
168
169        *Parameters*
170
171        - **typ** : string (default None) - type of the NTV object,
172        - **name** : string (default None) - name of the NTV object
173        - **value** : DataFrame values
174        - **table** : boolean (default False) - if True return TableSchema format'''
175
176        table = kwargs.get('table', False)
177        if not table:
178            df2 = value.reset_index()
179            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
180                           for col in df2.columns]).to_obj()
181            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
182        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
183            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
184            value[col]) for col in value.columns})
185        table_val = json.loads(df2.to_json(orient='table',
186                                           date_format='iso', default_handler=str))
187        for nam in value.columns:
188            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
189                value[nam], table=True, no_val=True)
190            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
191                                                      ntv_name, ntv_type)
192        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
193
194    @staticmethod
195    def to_listidx(dtf):
196        ''' convert a DataFrame in categorical data
197
198        *Return: tuple with:*
199
200        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
201        - **lenght** of the DataFrame'''
202        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

NTV connector for pandas DataFrame.

One static methods is included:

  • to_listidx: convert a DataFrame in categorical data
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
137    @staticmethod
138    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
139        ''' convert json ntv_value into a DataFrame.
140
141        *Parameters*
142
143        - **index** : list (default None) - list of index values,
144        - **alias** : boolean (default False) - if True, alias dtype else default dtype
145        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
146        series = SeriesConnec.to_series
147
148        ntv = Ntv.fast(ntv_value)
149        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
150                for ntvf in ntv]
151        leng = max([idx[6] for idx in lidx])
152        option = kwargs | {'leng': leng}
153        no_keys = []
154        for ind in range(len(lidx)):
155            lind = lidx[ind]
156            no_keys.append(not lind[3] and not lind[4] and not lind[5])
157            NtvConnector.init_ntv_keys(ind, lidx, leng)
158            lind[2] = Ntv.fast(Ntv.obj_ntv(
159                lind[2], typ=lind[1], single=len(lind[2]) == 1))
160        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
161                              else lidx[ind][4], **option) for ind in range(len(lidx))]
162        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
163        return PdUtil.pd_index(dfr)

convert json ntv_value into a DataFrame.

Parameters

  • index : list (default None) - list of index values,
  • alias : boolean (default False) - if True, alias dtype else default dtype
  • annotated : boolean (default False) - if True, NTV names are not included.
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
165    @staticmethod
166    def to_json_ntv(value, name=None, typ=None, **kwargs):
167        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
168
169        *Parameters*
170
171        - **typ** : string (default None) - type of the NTV object,
172        - **name** : string (default None) - name of the NTV object
173        - **value** : DataFrame values
174        - **table** : boolean (default False) - if True return TableSchema format'''
175
176        table = kwargs.get('table', False)
177        if not table:
178            df2 = value.reset_index()
179            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
180                           for col in df2.columns]).to_obj()
181            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
182        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
183            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
184            value[col]) for col in value.columns})
185        table_val = json.loads(df2.to_json(orient='table',
186                                           date_format='iso', default_handler=str))
187        for nam in value.columns:
188            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
189                value[nam], table=True, no_val=True)
190            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
191                                                      ntv_name, ntv_type)
192        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)

convert a DataFrame (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : DataFrame values
  • table : boolean (default False) - if True return TableSchema format
@staticmethod
def to_listidx(dtf):
194    @staticmethod
195    def to_listidx(dtf):
196        ''' convert a DataFrame in categorical data
197
198        *Return: tuple with:*
199
200        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
201        - **lenght** of the DataFrame'''
202        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

convert a DataFrame in categorical data

Return: tuple with:

  • list of dict (keys : 'codec', 'name, 'keys') for each column
  • lenght of the DataFrame
Inherited Members
json_ntv.ntv_util.NtvConnector
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
init_ntv_keys
class SeriesConnec(json_ntv.ntv_util.NtvConnector):
205class SeriesConnec(NtvConnector):
206    '''NTV connector for pandas Series
207
208    Two static methods are included:
209
210    - to_idx: convert a Series in categorical data
211    - to_series: return a Series from Field data
212    '''
213    clas_obj = 'Series'
214    clas_typ = 'field'
215    config = configparser.ConfigParser()
216    # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_pandas.ini'))
217    config.read(path_ntv_pandas.joinpath('ntv_pandas.ini'))
218    types = pd.DataFrame(json.loads(config['data']['type']),
219                         columns=json.loads(config['data']['column']))
220    astype = json.loads(config['data']['astype'])
221    deftype = {val: key for key, val in astype.items()}
222    config = configparser.ConfigParser()
223    # config.read(Path(ntv_pandas.__file__).parent.joinpath('ntv_table.ini'))
224    config.read(path_ntv_pandas.joinpath('ntv_table.ini'))
225    table = pd.DataFrame(json.loads(config['data']['mapping']),
226                         columns=json.loads(config['data']['column']))
227    typtab = pd.DataFrame(json.loads(config['data']['type']),
228                          columns=json.loads(config['data']['col_type']))
229
230    @staticmethod
231    def to_obj_ntv(ntv_value, **kwargs):
232        '''Generate a Series Object from a Ntv field object
233
234        *Parameters*
235
236        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
237
238        *parameters (kwargs)*
239
240        - **extkeys**: list (default None) - keys to use if not present in ntv_value
241        - **decode_str**: boolean (default False) - if True, string values are converted
242        in object values
243        - **index**: list (default None) - if present, add the index in Series
244        - **leng**: integer (default None) - leng of the Series (used with single codec value)
245        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
246        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
247        '''
248        option = {'extkeys': None, 'decode_str': False, 'leng': None,
249                  'annotated': False} | kwargs
250        if ntv_value is None:
251            return None
252        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
253
254        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
255            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
256        if parent and not option['extkeys']:
257            return None
258        if coef:
259            ntv_keys = NtvConnector.keysfromcoef(
260                coef, leng_field//coef, option['leng'])
261        elif option['extkeys'] and parent:
262            ntv_keys = NtvConnector.keysfromderkeys(
263                option['extkeys'], ntv_keys)
264        elif option['extkeys'] and not parent:
265            ntv_keys = option['extkeys']
266        ntv_codec = Ntv.fast(Ntv.obj_ntv(
267            codec, typ=typ, single=len(codec) == 1))
268        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
269
270    @staticmethod
271    def to_json_ntv(value, name=None, typ=None, **kwargs):
272        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
273
274        *Parameters*
275
276        - **typ** : string (default None) - type of the NTV object,
277        - **name** : string (default None) - name of the NTV object
278        - **value** : Series values
279        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
280        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
281
282        table = kwargs.get('table', False)
283        no_val = kwargs.get('no_val', False)
284        srs = value.astype(SeriesConnec.astype.get(
285            value.dtype.name, value.dtype.name))
286        sr_name = srs.name if srs.name else ''
287        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
288
289        if table:
290            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
291            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
292            if no_val:
293                return (ntv_name, ntv_type)
294            return (ntv_value, ntv_name, ntv_type)
295        if srs.dtype.name == 'category':
296            cdc = pd.Series(srs.cat.categories)
297            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
298            cat_value = PdUtil.ntv_val(ntv_type, cdc)
299            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
300            cod_value = list(srs.cat.codes)
301            coef = NtvConnector.encode_coef(cod_value)
302            ntv_value = [cat_value, [coef] if coef else cod_value]
303            ntv_type = 'json'
304        else:
305            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
306            ntv_value = PdUtil.ntv_val(ntv_type, srs)
307        if len(ntv_value) == 1:
308            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
309                    SeriesConnec.clas_typ if not typ else typ)
310        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
311                SeriesConnec.clas_typ if not typ else typ)
312
313    @staticmethod
314    def to_idx(ser):
315        ''' convert a Series in categorical data
316
317        *return (dict)*
318
319        { 'codec': 'list of pandas categories',
320          'name': 'name of the series',
321          'keys': 'list of pandas codes' }
322        '''
323        idx = ser.astype('category')
324        lis = list(idx.cat.categories)
325        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
326            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
327                   for ts in lis]
328        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}
329
330    @staticmethod
331    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
332        ''' return a pd.Series from Field data (codec, name, keys)
333
334        *Parameters*
335
336        - **ntv_codec**: Ntv object - codec value to convert in Series values
337        - **ntv_type**: string - default type to apply to convert in dtype
338        - **ntv_name**: string - name of the Series
339
340        *parameters (kwargs)*
341
342        - **index**: list (default None) - if present, add the index in Series
343        - **leng**: integer (default None) - leng of the Series (used with single codec value)
344        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
345        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
346        '''
347        option = {'index': None, 'leng': None, 'alias': False,
348                  'annotated': False} | kwargs
349        types = SeriesConnec.types.set_index('ntv_type')
350        astype = SeriesConnec.astype
351        leng = option['leng']
352
353        ntv_type = ntv_codec.type_str
354        len_unique = leng if len(ntv_codec) == 1 and leng else 1
355        pd_convert = ntv_type in types.index
356
357        pd_name, name_type, dtype = PdUtil.pd_name(
358            ntv_name, ntv_type, pd_convert)
359        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
360                                 option['annotated'], pd_convert)
361        if ntv_keys:
362            if pd_convert and name_type != 'array':
363                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
364                cat_type = categ.dtype.name
365                categories = categ.astype(astype.get(cat_type, cat_type))
366            else:
367                categories = pd.Series(ntv_obj, dtype='object')
368            cat = pd.CategoricalDtype(categories=categories)
369            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
370            srs = pd.Series(data, name=pd_name,
371                            index=option['index'], dtype='category')
372        else:
373            data = ntv_obj * len_unique
374            if pd_convert:
375                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
376            else:
377                srs = pd.Series(data, name=pd_name, dtype=dtype)
378
379        if option['alias']:
380            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
381        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
382
383    @staticmethod
384    def _from_json(data, dtype, ntv_type, pd_name=None):
385        '''return a Series from a Json data.
386
387        *Parameters*
388
389        - **data**: Json-value - data to convert in a Series
390        - **dtype**: string - dtype of the Series
391        - **ntv_type**: string - default type to apply to convert in dtype
392        - **pd_name**: string - name of the Series including ntv_type
393
394        NTVvalue and a ntv_type'''
395        srs = pd.read_json(json.dumps(data), dtype=dtype, typ='series')
396        if not pd_name is None:
397            srs = srs.rename(pd_name)
398        return PdUtil.convert(ntv_type, srs, tojson=False)

NTV connector for pandas Series

Two static methods are included:

  • to_idx: convert a Series in categorical data
  • to_series: return a Series from Field data
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
230    @staticmethod
231    def to_obj_ntv(ntv_value, **kwargs):
232        '''Generate a Series Object from a Ntv field object
233
234        *Parameters*
235
236        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
237
238        *parameters (kwargs)*
239
240        - **extkeys**: list (default None) - keys to use if not present in ntv_value
241        - **decode_str**: boolean (default False) - if True, string values are converted
242        in object values
243        - **index**: list (default None) - if present, add the index in Series
244        - **leng**: integer (default None) - leng of the Series (used with single codec value)
245        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
246        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
247        '''
248        option = {'extkeys': None, 'decode_str': False, 'leng': None,
249                  'annotated': False} | kwargs
250        if ntv_value is None:
251            return None
252        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
253
254        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
255            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
256        if parent and not option['extkeys']:
257            return None
258        if coef:
259            ntv_keys = NtvConnector.keysfromcoef(
260                coef, leng_field//coef, option['leng'])
261        elif option['extkeys'] and parent:
262            ntv_keys = NtvConnector.keysfromderkeys(
263                option['extkeys'], ntv_keys)
264        elif option['extkeys'] and not parent:
265            ntv_keys = option['extkeys']
266        ntv_codec = Ntv.fast(Ntv.obj_ntv(
267            codec, typ=typ, single=len(codec) == 1))
268        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)

Generate a Series Object from a Ntv field object

Parameters

  • ntv_value: Ntv object or Ntv value - value to convert in Series

parameters (kwargs)

  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
270    @staticmethod
271    def to_json_ntv(value, name=None, typ=None, **kwargs):
272        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
273
274        *Parameters*
275
276        - **typ** : string (default None) - type of the NTV object,
277        - **name** : string (default None) - name of the NTV object
278        - **value** : Series values
279        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
280        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
281
282        table = kwargs.get('table', False)
283        no_val = kwargs.get('no_val', False)
284        srs = value.astype(SeriesConnec.astype.get(
285            value.dtype.name, value.dtype.name))
286        sr_name = srs.name if srs.name else ''
287        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
288
289        if table:
290            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
291            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
292            if no_val:
293                return (ntv_name, ntv_type)
294            return (ntv_value, ntv_name, ntv_type)
295        if srs.dtype.name == 'category':
296            cdc = pd.Series(srs.cat.categories)
297            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
298            cat_value = PdUtil.ntv_val(ntv_type, cdc)
299            cat_value = NtvList(cat_value, ntv_type=ntv_type).to_obj()
300            cod_value = list(srs.cat.codes)
301            coef = NtvConnector.encode_coef(cod_value)
302            ntv_value = [cat_value, [coef] if coef else cod_value]
303            ntv_type = 'json'
304        else:
305            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
306            ntv_value = PdUtil.ntv_val(ntv_type, srs)
307        if len(ntv_value) == 1:
308            return (NtvSingle(ntv_value[0], ntv_name, ntv_type).to_obj(), name,
309                    SeriesConnec.clas_typ if not typ else typ)
310        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
311                SeriesConnec.clas_typ if not typ else typ)

convert a Series (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : Series values
  • table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
  • no_val : boolean (default False) - if True return (ntv_name, ntv_type)
@staticmethod
def to_idx(ser):
313    @staticmethod
314    def to_idx(ser):
315        ''' convert a Series in categorical data
316
317        *return (dict)*
318
319        { 'codec': 'list of pandas categories',
320          'name': 'name of the series',
321          'keys': 'list of pandas codes' }
322        '''
323        idx = ser.astype('category')
324        lis = list(idx.cat.categories)
325        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
326            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
327                   for ts in lis]
328        return {'codec': lis, 'name': ser .name, 'keys': list(idx.cat.codes)}

convert a Series in categorical data

return (dict)

{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }

@staticmethod
def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
330    @staticmethod
331    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
332        ''' return a pd.Series from Field data (codec, name, keys)
333
334        *Parameters*
335
336        - **ntv_codec**: Ntv object - codec value to convert in Series values
337        - **ntv_type**: string - default type to apply to convert in dtype
338        - **ntv_name**: string - name of the Series
339
340        *parameters (kwargs)*
341
342        - **index**: list (default None) - if present, add the index in Series
343        - **leng**: integer (default None) - leng of the Series (used with single codec value)
344        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
345        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
346        '''
347        option = {'index': None, 'leng': None, 'alias': False,
348                  'annotated': False} | kwargs
349        types = SeriesConnec.types.set_index('ntv_type')
350        astype = SeriesConnec.astype
351        leng = option['leng']
352
353        ntv_type = ntv_codec.type_str
354        len_unique = leng if len(ntv_codec) == 1 and leng else 1
355        pd_convert = ntv_type in types.index
356
357        pd_name, name_type, dtype = PdUtil.pd_name(
358            ntv_name, ntv_type, pd_convert)
359        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
360                                 option['annotated'], pd_convert)
361        if ntv_keys:
362            if pd_convert and name_type != 'array':
363                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
364                cat_type = categ.dtype.name
365                categories = categ.astype(astype.get(cat_type, cat_type))
366            else:
367                categories = pd.Series(ntv_obj, dtype='object')
368            cat = pd.CategoricalDtype(categories=categories)
369            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
370            srs = pd.Series(data, name=pd_name,
371                            index=option['index'], dtype='category')
372        else:
373            data = ntv_obj * len_unique
374            if pd_convert:
375                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
376            else:
377                srs = pd.Series(data, name=pd_name, dtype=dtype)
378
379        if option['alias']:
380            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
381        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))

return a pd.Series from Field data (codec, name, keys)

Parameters

  • ntv_codec: Ntv object - codec value to convert in Series values
  • ntv_type: string - default type to apply to convert in dtype
  • ntv_name: string - name of the Series

parameters (kwargs)

  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
Inherited Members
json_ntv.ntv_util.NtvConnector
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
init_ntv_keys
class PdUtil:
401class PdUtil:
402    '''ntv-pandas utilities.
403
404    This class includes static methods:
405
406    Ntv and pandas
407    - **ntv_type**: return NTVtype from name_type and dtype of a Series
408    - **convert**: convert Series with external NTVtype
409    - **ntv_val**: convert a simple Series into NTV json-value
410    - **ntv_obj**: return a list of values to convert in a Series
411    - **pd_name**: return a tuple with the name of the Series and the type deduced from the name
412    - **pd_index**: return a DataFrame with index
413    - **unic**: return simple value if the Series contains a single value
414
415    TableSchema
416    - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series
417    - **name_table**: return a list of non index field's names from a json Table
418    - **ntvtype_table**: return a list of non index field's ntv_type from a json Table
419    - **table_schema**: add 'format' and 'type' keys in a Json TableSchema
420    - **table_val**: convert a Series into TableSchema json-value
421    - **ntv_table**: return NTVtype from the TableSchema data
422    '''
423    @staticmethod
424    def to_obj_table(jsn, **kwargs):
425        ''' convert json TableSchema data into a DataFrame or a Series'''
426        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
427        name = PdUtil.name_table(jsn['schema']['fields'])
428        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
429                   for nam, ntvtyp in zip(name, ntv_type)]
430        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
431                    for nam, ntvtyp in zip(name, ntv_type)]
432        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
433        dfr = PdUtil.pd_index(dfr)
434        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
435                            for ind, col in enumerate(dfr.columns)})
436        dfr = dfr.astype({col: pd_dtype[ind]
437                         for ind, col in enumerate(dfr.columns)})
438        dfr.columns = pd_name
439        if len(dfr.columns) == 1:
440            return dfr[dfr.columns[0]]
441        return dfr
442    
443    @staticmethod 
444    def decode_ntv_to_val(ntv):
445        ''' return a value from a ntv_field'''
446        if isinstance(ntv, NtvSingle):
447            return ntv.to_obj(simpleval=True)
448        return [ntv_val.to_obj() for ntv_val in ntv]
449
450    @staticmethod
451    def name_table(fields):
452        '''return a list of non index field's names from a json Table'''
453        names = [field.get('name', None) for field in fields
454                 if field.get('name', None) != 'index']
455        return [None if name == 'values' else name for name in names]
456
457    @staticmethod
458    def ntvtype_table(fields):
459        '''return a list of non index field's ntv_type from a json Table'''
460        return [PdUtil.ntv_table(field.get('format', 'default'),
461                field.get('type', None)) for field in fields
462                if field.get('name', None) != 'index']
463
464    @staticmethod
465    def table_schema(schema, name, ntv_type):
466        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
467        for the field defined by 'name' '''
468        ind = [field['name'] for field in schema['fields']].index(name)
469        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
470        if tabletype['format'] == 'default':
471            schema['fields'][ind].pop('format', None)
472        else:
473            schema['fields'][ind]['format'] = tabletype['format']
474        schema['fields'][ind]['type'] = tabletype['type']
475        schema['fields'][ind].pop('extDtype', None)
476        return schema
477
478    @staticmethod
479    def table_val(ntv_type, ntv_name, srs):
480        '''convert a Series into TableSchema json-value.
481
482        *Parameters*
483
484        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
485        - **ntv_name**: string - name of the Series
486        - **srs** : Series to be converted.'''
487        srs = PdUtil.convert(ntv_type, srs)
488        srs.name = ntv_name
489        tab_val = json.loads(srs.to_json(orient='table',
490                                         date_format='iso', default_handler=str))
491        name = 'values' if srs.name is None else srs.name
492        tab_val['schema'] = PdUtil.table_schema(
493            tab_val['schema'], name, ntv_type)
494        return tab_val
495
496    @staticmethod
497    def convert(ntv_type, srs, tojson=True):
498        ''' convert Series with external NTVtype.
499
500        *Parameters*
501
502        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
503        - **srs** : Series to be converted.
504        - **tojson** : boolean (default True) - apply to json function'''
505        if tojson:
506            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
507                return srs.apply(ShapelyConnec.to_coord)
508            if ntv_type == 'geojson':
509                return srs.apply(ShapelyConnec.to_geojson)
510            if ntv_type == 'date':
511                return srs.astype(str)
512            return srs
513        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
514            return srs.apply(ShapelyConnec.to_geometry)
515        if ntv_type == 'geojson':
516            return srs.apply(ShapelyConnec.from_geojson)
517        if ntv_type == 'datetime':
518            return pd.to_datetime(srs)
519        if ntv_type == 'date':
520            return pd.to_datetime(srs).dt.date
521        if ntv_type == 'time':
522            return pd.to_datetime(srs).dt.time
523        return srs
524
525    @staticmethod
526    def ntv_type(name_type, dtype, table=False):
527        ''' return NTVtype from name_type and dtype of a Series .
528
529        *Parameters*
530
531        - **name_type** : string - type included in the Series name,
532        - **dtype** : string - dtype of the Series.
533        - **table** : boolean (default False) - True if Table Schema conversion
534        '''
535        if not name_type:
536            types_none = SeriesConnec.types.set_index('name_type').loc[None]
537            if dtype in types_none.dtype.values:
538                return types_none.set_index('dtype').loc[dtype].ntv_type
539            if not table:
540                return 'json'
541            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
542            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
543        return name_type
544
545    @staticmethod
546    def ntv_val(ntv_type, srs):
547        ''' convert a simple Series into NTV json-value.
548
549        *Parameters*
550
551        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
552        - **srs** : Series to be *converted.'''
553        srs = PdUtil.convert(ntv_type, srs)
554        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
555            return srs.to_list()
556        if srs.dtype.name == 'object':
557            return srs.to_list()
558        return json.loads(srs.to_json(orient='records',
559                                      date_format='iso', default_handler=str))
560
561    @staticmethod
562    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
563        '''return a list of values to convert in a Series'''
564        if pd_convert:
565            if name_type == 'array':
566                return ntv_codec.to_obj(format='obj', simpleval=True)
567            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
568                                          def_type=ntv_codec.type_str, fast=True)
569            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
570        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
571
572    @staticmethod
573    def ntv_table(table_format, table_type):
574        ''' return NTVtype from the TableSchema data.
575
576        *Parameters*
577
578        - **table_format** : string - TableSchema format,
579        - **table_type** : string - TableSchema type'''
580        return SeriesConnec.table.set_index(['type', 'format']).loc[
581            (table_type, table_format)].values[0]
582
583    @staticmethod
584    def pd_index(dfr):
585        '''return a DataFrame with index'''
586        if 'index' in dfr.columns:
587            dfr = dfr.set_index('index')
588            dfr.index.rename(None, inplace=True)
589        return dfr
590
591    @staticmethod
592    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
593        '''return a tuple with the name of the Series, the type deduced from
594        the name and the dtype'''
595        ntv_name = '' if ntv_name is None else ntv_name
596        typtab = SeriesConnec.typtab.set_index('ntv_type')
597        types = SeriesConnec.types.set_index('ntv_type')
598        if table and ntv_type.lower() in typtab.index:
599            name_type = typtab.loc[ntv_type.lower()]['name_type']
600            dtype = typtab.loc[ntv_type.lower()]['dtype']
601        elif pd_convert or table:
602            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
603            dtype = types.loc[ntv_type]['dtype']
604        else:
605            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
606        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
607        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
608        return (pd_name if pd_name else None, name_type, dtype)
609
610    @staticmethod
611    def unic(srs):
612        ''' return simple value if the Series contains a single value'''
613        if str(srs.dtype) == 'category':
614            return srs
615        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs

ntv-pandas utilities.

This class includes static methods:

Ntv and pandas

  • ntv_type: return NTVtype from name_type and dtype of a Series
  • convert: convert Series with external NTVtype
  • ntv_val: convert a simple Series into NTV json-value
  • ntv_obj: return a list of values to convert in a Series
  • pd_name: return a tuple with the name of the Series and the type deduced from the name
  • pd_index: return a DataFrame with index
  • unic: return simple value if the Series contains a single value

TableSchema

  • to_obj_table: convert json TableSchema data into a DataFrame or a Series
  • name_table: return a list of non index field's names from a json Table
  • ntvtype_table: return a list of non index field's ntv_type from a json Table
  • table_schema: add 'format' and 'type' keys in a Json TableSchema
  • table_val: convert a Series into TableSchema json-value
  • ntv_table: return NTVtype from the TableSchema data
@staticmethod
def to_obj_table(jsn, **kwargs):
423    @staticmethod
424    def to_obj_table(jsn, **kwargs):
425        ''' convert json TableSchema data into a DataFrame or a Series'''
426        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
427        name = PdUtil.name_table(jsn['schema']['fields'])
428        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
429                   for nam, ntvtyp in zip(name, ntv_type)]
430        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
431                    for nam, ntvtyp in zip(name, ntv_type)]
432        dfr = pd.read_json(json.dumps(jsn['data']), orient='record')
433        dfr = PdUtil.pd_index(dfr)
434        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
435                            for ind, col in enumerate(dfr.columns)})
436        dfr = dfr.astype({col: pd_dtype[ind]
437                         for ind, col in enumerate(dfr.columns)})
438        dfr.columns = pd_name
439        if len(dfr.columns) == 1:
440            return dfr[dfr.columns[0]]
441        return dfr

convert json TableSchema data into a DataFrame or a Series

@staticmethod
def decode_ntv_to_val(ntv):
443    @staticmethod 
444    def decode_ntv_to_val(ntv):
445        ''' return a value from a ntv_field'''
446        if isinstance(ntv, NtvSingle):
447            return ntv.to_obj(simpleval=True)
448        return [ntv_val.to_obj() for ntv_val in ntv]

return a value from a ntv_field

@staticmethod
def name_table(fields):
450    @staticmethod
451    def name_table(fields):
452        '''return a list of non index field's names from a json Table'''
453        names = [field.get('name', None) for field in fields
454                 if field.get('name', None) != 'index']
455        return [None if name == 'values' else name for name in names]

return a list of non index field's names from a json Table

@staticmethod
def ntvtype_table(fields):
457    @staticmethod
458    def ntvtype_table(fields):
459        '''return a list of non index field's ntv_type from a json Table'''
460        return [PdUtil.ntv_table(field.get('format', 'default'),
461                field.get('type', None)) for field in fields
462                if field.get('name', None) != 'index']

return a list of non index field's ntv_type from a json Table

@staticmethod
def table_schema(schema, name, ntv_type):
464    @staticmethod
465    def table_schema(schema, name, ntv_type):
466        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
467        for the field defined by 'name' '''
468        ind = [field['name'] for field in schema['fields']].index(name)
469        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
470        if tabletype['format'] == 'default':
471            schema['fields'][ind].pop('format', None)
472        else:
473            schema['fields'][ind]['format'] = tabletype['format']
474        schema['fields'][ind]['type'] = tabletype['type']
475        schema['fields'][ind].pop('extDtype', None)
476        return schema

convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema for the field defined by 'name'

@staticmethod
def table_val(ntv_type, ntv_name, srs):
478    @staticmethod
479    def table_val(ntv_type, ntv_name, srs):
480        '''convert a Series into TableSchema json-value.
481
482        *Parameters*
483
484        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
485        - **ntv_name**: string - name of the Series
486        - **srs** : Series to be converted.'''
487        srs = PdUtil.convert(ntv_type, srs)
488        srs.name = ntv_name
489        tab_val = json.loads(srs.to_json(orient='table',
490                                         date_format='iso', default_handler=str))
491        name = 'values' if srs.name is None else srs.name
492        tab_val['schema'] = PdUtil.table_schema(
493            tab_val['schema'], name, ntv_type)
494        return tab_val

convert a Series into TableSchema json-value.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • ntv_name: string - name of the Series
  • srs : Series to be converted.
@staticmethod
def convert(ntv_type, srs, tojson=True):
496    @staticmethod
497    def convert(ntv_type, srs, tojson=True):
498        ''' convert Series with external NTVtype.
499
500        *Parameters*
501
502        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
503        - **srs** : Series to be converted.
504        - **tojson** : boolean (default True) - apply to json function'''
505        if tojson:
506            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
507                return srs.apply(ShapelyConnec.to_coord)
508            if ntv_type == 'geojson':
509                return srs.apply(ShapelyConnec.to_geojson)
510            if ntv_type == 'date':
511                return srs.astype(str)
512            return srs
513        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
514            return srs.apply(ShapelyConnec.to_geometry)
515        if ntv_type == 'geojson':
516            return srs.apply(ShapelyConnec.from_geojson)
517        if ntv_type == 'datetime':
518            return pd.to_datetime(srs)
519        if ntv_type == 'date':
520            return pd.to_datetime(srs).dt.date
521        if ntv_type == 'time':
522            return pd.to_datetime(srs).dt.time
523        return srs

convert Series with external NTVtype.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • srs : Series to be converted.
  • tojson : boolean (default True) - apply to json function
@staticmethod
def ntv_type(name_type, dtype, table=False):
525    @staticmethod
526    def ntv_type(name_type, dtype, table=False):
527        ''' return NTVtype from name_type and dtype of a Series .
528
529        *Parameters*
530
531        - **name_type** : string - type included in the Series name,
532        - **dtype** : string - dtype of the Series.
533        - **table** : boolean (default False) - True if Table Schema conversion
534        '''
535        if not name_type:
536            types_none = SeriesConnec.types.set_index('name_type').loc[None]
537            if dtype in types_none.dtype.values:
538                return types_none.set_index('dtype').loc[dtype].ntv_type
539            if not table:
540                return 'json'
541            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
542            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
543        return name_type

return NTVtype from name_type and dtype of a Series .

Parameters

  • name_type : string - type included in the Series name,
  • dtype : string - dtype of the Series.
  • table : boolean (default False) - True if Table Schema conversion
@staticmethod
def ntv_val(ntv_type, srs):
545    @staticmethod
546    def ntv_val(ntv_type, srs):
547        ''' convert a simple Series into NTV json-value.
548
549        *Parameters*
550
551        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
552        - **srs** : Series to be *converted.'''
553        srs = PdUtil.convert(ntv_type, srs)
554        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
555            return srs.to_list()
556        if srs.dtype.name == 'object':
557            return srs.to_list()
558        return json.loads(srs.to_json(orient='records',
559                                      date_format='iso', default_handler=str))

convert a simple Series into NTV json-value.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • srs : Series to be *converted.
@staticmethod
def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
561    @staticmethod
562    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
563        '''return a list of values to convert in a Series'''
564        if pd_convert:
565            if name_type == 'array':
566                return ntv_codec.to_obj(format='obj', simpleval=True)
567            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
568                                          def_type=ntv_codec.type_str, fast=True)
569            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
570        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)

return a list of values to convert in a Series

@staticmethod
def ntv_table(table_format, table_type):
572    @staticmethod
573    def ntv_table(table_format, table_type):
574        ''' return NTVtype from the TableSchema data.
575
576        *Parameters*
577
578        - **table_format** : string - TableSchema format,
579        - **table_type** : string - TableSchema type'''
580        return SeriesConnec.table.set_index(['type', 'format']).loc[
581            (table_type, table_format)].values[0]

return NTVtype from the TableSchema data.

Parameters

  • table_format : string - TableSchema format,
  • table_type : string - TableSchema type
@staticmethod
def pd_index(dfr):
583    @staticmethod
584    def pd_index(dfr):
585        '''return a DataFrame with index'''
586        if 'index' in dfr.columns:
587            dfr = dfr.set_index('index')
588            dfr.index.rename(None, inplace=True)
589        return dfr

return a DataFrame with index

@staticmethod
def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
591    @staticmethod
592    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
593        '''return a tuple with the name of the Series, the type deduced from
594        the name and the dtype'''
595        ntv_name = '' if ntv_name is None else ntv_name
596        typtab = SeriesConnec.typtab.set_index('ntv_type')
597        types = SeriesConnec.types.set_index('ntv_type')
598        if table and ntv_type.lower() in typtab.index:
599            name_type = typtab.loc[ntv_type.lower()]['name_type']
600            dtype = typtab.loc[ntv_type.lower()]['dtype']
601        elif pd_convert or table:
602            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
603            dtype = types.loc[ntv_type]['dtype']
604        else:
605            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
606        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
607        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
608        return (pd_name if pd_name else None, name_type, dtype)

return a tuple with the name of the Series, the type deduced from the name and the dtype

@staticmethod
def unic(srs):
610    @staticmethod
611    def unic(srs):
612        ''' return simple value if the Series contains a single value'''
613        if str(srs.dtype) == 'category':
614            return srs
615        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs

return simple value if the Series contains a single value