ntv-pandas.ntv_pandas.pandas_ntv_connector

Created on Feb 27 2023

@author: Philippe@loco-labs.io

The pandas_ntv_connector module is part of the ntv-pandas.ntv_pandas package (specification document).

A NtvConnector is defined by:

  • clas_obj: str - define the class name of the object to convert
  • clas_typ: str - define the NTVtype of the converted object
  • to_obj_ntv: method - converter from JsonNTV to the object
  • to_json_ntv: method - converter from the object to JsonNTV

It contains :

The functions to_json, to_analysis, check_relation, as_def_type and equals are used with the npd accessor.

  1# -*- coding: utf-8 -*-
  2"""
  3Created on Feb 27 2023
  4
  5@author: Philippe@loco-labs.io
  6
  7The `pandas_ntv_connector` module is part of the `ntv-pandas.ntv_pandas` package
  8([specification document](
  9https://loco-philippe.github.io/ES/JSON%20semantic%20format%20(JSON-NTV).htm)).
 10
 11A NtvConnector is defined by:
 12- clas_obj: str - define the class name of the object to convert
 13- clas_typ: str - define the NTVtype of the converted object
 14- to_obj_ntv: method - converter from JsonNTV to the object
 15- to_json_ntv: method - converter from the object to JsonNTV
 16
 17It contains :
 18
 19- functions `read_json` and `to_json` to convert JSON data and pandas entities
 20- function `to_analysis` to create data used by the `tab_analysis` module
 21- function `check_relation` to identify rows with inconsistent relationships
 22- functions `as_def_type` and `equals`
 23
 24- the child classes of `NTV.json_ntv.ntv.NtvConnector` abstract class:
 25    - `DataFrameConnec`: 'tab'   connector
 26    - `SeriesConnec`:    'field' connector
 27
 28- an utility class with static methods : `PdUtil`
 29
 30The functions `to_json`, `to_analysis`, `check_relation`, `as_def_type` and
 31`equals` are used with the `npd` accessor.
 32
 33"""
 34import os
 35import datetime
 36import json
 37import configparser
 38from pathlib import Path
 39from collections import Counter
 40from io import StringIO
 41import pandas as pd
 42import numpy as np
 43
 44
 45from json_ntv.ntv import Ntv, NtvConnector, NtvList, NtvSingle
 46from json_ntv.ntv_util import NtvUtil
 47from json_ntv.ntv_connector import ShapelyConnec
 48from tab_dataset.cfield import Cfield
 49from ntv_numpy import Xdataset
 50
 51path_ntv_pandas = Path(os.path.abspath(__file__)).parent
 52
 53
 54def as_def_type(pd_array):
 55    '''convert a Series or DataFrame with default dtype'''
 56    if isinstance(pd_array, (pd.Series, pd.Index)):
 57        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
 58    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})
 59
 60
 61def check_relation(pd_df, parent, child, typecoupl, value=True):
 62    ''' Accessor for method `cdataset.Cdataset.check_relation` invoket as
 63    `pd.DataFrame.npd.check_relation`.
 64    Get the inconsistent records for a relationship.
 65
 66     *Parameters*
 67
 68    - **child** : str - name of the child Series involved in the relation
 69    - **parent**: str - name of the parent Series involved in the relation
 70    - **typecoupl**: str - relationship to check ('derived' or 'coupled')
 71    - **value**: boolean (default True) - if True return a dict with inconsistent
 72    values of the Series, else a tuple with index of records)
 73
 74    *Returns* :
 75
 76    - dict with inconsistent values of the Series
 77    - or a tuple with row of records'''
 78    parent_idx = SeriesConnec.to_idx(pd_df[parent])
 79    parent_field = Cfield(parent_idx['codec'], parent, parent_idx['keys'])
 80    child_idx = SeriesConnec.to_idx(pd_df[child])
 81    child_field = Cfield(child_idx['codec'], child, child_idx['keys'])
 82    return Cfield.check_relation(parent_field, child_field, typecoupl, value)
 83
 84
 85def equals(pdself, pdother):
 86    '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
 87    if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series):
 88        return SeriesConnec.equals(pdself, pdother)
 89    if isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame):
 90        return DataFrameConnec.equals(pdself, pdother)
 91    return False
 92
 93
 94def read_json(jsn, **kwargs):
 95    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
 96
 97    *parameters*
 98
 99    - **jsn** : JSON text or JSON value to convert
100    - **extkeys**: list (default None) - keys to use if not present in ntv_value
101    - **decode_str**: boolean (default False) - if True, string values are converted
102    in object values
103    - **leng**: integer (default None) - leng of the Series (used with single codec value)
104    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
105    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
106    - **series**: boolean (default False) - used only without header. If True
107    JSON data is converted into Series else DataFrame
108    '''
109    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
110              'annotated': False, 'series': False} | kwargs
111    jso = json.loads(jsn) if isinstance(jsn, str) else jsn
112    if 'schema' in jso:
113        return PdUtil.to_obj_table(jso, **option)
114    ntv = Ntv.from_obj(jso)
115    if ntv.type_str == 'field':
116        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
117    if ntv.type_str == 'tab':
118        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
119    if option['series']:
120        return SeriesConnec.to_obj_ntv(ntv, **option)
121    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
122
123
124def _dist(key1, key2, distr=False):
125    '''return default coupling codec between two keys list and optionaly if
126    the relationship is distributed'''
127    if not key1 or not key2:
128        return 0
129    k1k2 = [tuple((v1, v2)) for v1, v2 in zip(key1, key2)]
130    dist = len(list(dict.fromkeys(k1k2)))
131    if not distr:
132        return dist
133    distrib = False
134    if dist == (max(key1) + 1) * (max(key2) + 1):
135        distrib = max(Counter(k1k2).values()) == len(key1) // dist
136        # distrib = min(sum(map(lambda x: (x + i) % (max(a) + 1), a)) == sum(a)
137        # for i in range(1, max(a)+1))
138    return [dist, distrib]
139
140
141def to_analysis(pd_df, distr=False):
142    '''return a dict with data used in AnaDataset module'''
143
144    keys = [list(pd_df[col].astype('category').cat.codes)
145            for col in pd_df.columns]
146    lencodec = [len(set(key)) for key in keys]
147    if distr:
148        dist = [[_dist(keys[i], keys[j], distr) for j in range(i+1, len(keys))]
149                for i in range(len(keys)-1)]
150    else:
151        dist = [[len(set(zip(keys[i], keys[j]))) for j in range(i+1, len(keys))]
152                for i in range(len(keys)-1)]
153    return {'fields': [{'lencodec': lencodec[ind], 'id': pd_df.columns[ind],
154                        'mincodec': lencodec[ind]}
155                       for ind in range(len(pd_df.columns))],
156            'name': None, 'length': len(pd_df),
157            'relations': {pd_df.columns[i]: {pd_df.columns[j+i+1]: dist[i][j]
158                          for j in range(len(dist[i]))} for i in range(len(dist))}}
159
160
161def to_json(pd_array, **kwargs):
162    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
163
164    *parameters*
165
166    - **pd_array** : Series or Dataframe to convert
167    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
168    - **header** : boolean (default: True) - if True the JSON data is included as
169    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
170    - **table** : boolean (default False) - if True return TableSchema format
171    - **index** : boolean (default True) - if True the index Series is included
172    '''
173    option = {'encoded': False, 'header': True,
174              'table': False, 'index': True} | kwargs
175    option['header'] = False if option['table'] else option['header']
176    if isinstance(pd_array, pd.Series):
177        jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0]
178        head = ':field'
179    else:
180        jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'],
181                                          index=option['index'])[0]
182        head = ':tab'
183    if option['header']:
184        jsn = {head: jsn}
185    if option['encoded']:
186        return json.dumps(jsn)
187    return jsn
188
189
190def from_xarray(xdt, **kwargs):
191    ''' convert xarray.Dataset to pandas DataFrame.
192
193    *Parameters*
194
195    - **json_name**: Boolean (default True) - if False use full_name else json_name
196    - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
197    - **dims**: list of string (default None) - order of dimensions full_name to apply
198    '''
199    return Xdataset.from_xarray(xdt).to_dataframe(**kwargs)
200
201
202def from_scipp(sci, **kwargs):
203    ''' convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame.
204
205    *Parameters*
206
207    - **json_name**: Boolean (default True) - if False use full_name else json_name
208    - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
209    - **dims**: list of string (default None) - order of dimensions full_name to apply
210    '''
211    return Xdataset.from_scipp(sci).to_dataframe(**kwargs)
212
213
214class DataFrameConnec(NtvConnector):
215
216    '''NTV connector for pandas DataFrame.
217
218    One static methods is included:
219
220    - to_listidx: convert a DataFrame in categorical data
221    '''
222
223    clas_obj = 'DataFrame'
224    clas_typ = 'tab'
225
226    @staticmethod
227    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
228        ''' convert json ntv_value into a DataFrame.
229
230        *Parameters*
231
232        - **index** : list (default None) - list of index values,
233        - **alias** : boolean (default False) - if True, alias dtype else default dtype
234        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
235        series = SeriesConnec.to_series
236
237        ntv = Ntv.fast(ntv_value)
238        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
239                for ntvf in ntv]
240        leng = max([idx[6] for idx in lidx])
241        option = kwargs | {'leng': leng}
242        no_keys = []
243        for ind, lind in enumerate(lidx):
244            no_keys.append(not lind[3] and not lind[4] and not lind[5])
245            NtvConnector.init_ntv_keys(ind, lidx, leng)
246            lind[2] = Ntv.fast(Ntv.obj_ntv(
247                lind[2], typ=lind[1], single=len(lind[2]) == 1))
248        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
249                              else lidx[ind][4], **option) for ind in range(len(lidx))]
250        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
251        return PdUtil.pd_index(dfr)
252
253    @staticmethod
254    def to_json_ntv(value, name=None, typ=None, **kwargs):
255        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
256
257        *Parameters*
258
259        - **typ** : string (default None) - type of the NTV object,
260        - **name** : string (default None) - name of the NTV object
261        - **value** : DataFrame values
262        - **table** : boolean (default False) - if True return TableSchema format
263        - **index** : boolean (default True) - if True the index Series is included
264        '''
265        table = kwargs.get('table', False)
266        index = kwargs.get('index', True)
267        if not table:
268            df2 = value.reset_index() if index else value
269            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
270                           for col in df2.columns]).to_obj()
271            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
272        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
273            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
274            value[col]) for col in value.columns})
275        table_val = json.loads(df2.to_json(orient='table',
276                                           date_format='iso', default_handler=str))
277        for nam in value.columns:
278            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
279                value[nam], table=True, no_val=True)
280            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
281                                                      ntv_name, ntv_type)
282        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
283
284    @staticmethod
285    def to_listidx(dtf):
286        ''' convert a DataFrame in categorical data
287
288        *Return: tuple with:*
289
290        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
291        - **lenght** of the DataFrame'''
292        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
293
294    @staticmethod
295    def equals(pdself, pdother):
296        '''return True if columns are equals'''
297        if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)):
298            return False
299        if len(pdself.columns) != len(pdother.columns):
300            return False
301        for cself, cother in zip(pdself, pdother):
302            if not SeriesConnec.equals(pdself[cself], pdother[cother]):
303                return False
304        return True
305
306
307class SeriesConnec(NtvConnector):
308    '''NTV connector for pandas Series
309
310    Two static methods are included:
311
312    - to_idx: convert a Series in categorical data
313    - to_series: return a Series from Field data
314    '''
315    clas_obj = 'Series'
316    clas_typ = 'field'
317    config = configparser.ConfigParser()
318    config.read(path_ntv_pandas.joinpath('ntv_pandas.ini'))
319    types = pd.DataFrame(json.loads(config['data']['type']),
320                         columns=json.loads(config['data']['column']))
321    astype = json.loads(config['data']['astype'])
322    deftype = {val: key for key, val in astype.items()}
323    config = configparser.ConfigParser()
324    config.read(path_ntv_pandas.joinpath('ntv_table.ini'))
325    table = pd.DataFrame(json.loads(config['data']['mapping']),
326                         columns=json.loads(config['data']['column']))
327    typtab = pd.DataFrame(json.loads(config['data']['type']),
328                          columns=json.loads(config['data']['col_type']))
329
330    @staticmethod
331    def to_obj_ntv(ntv_value, **kwargs):
332        '''Generate a Series Object from a Ntv field object
333
334        *Parameters*
335
336        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
337
338        *parameters (kwargs)*
339
340        - **extkeys**: list (default None) - keys to use if not present in ntv_value
341        - **decode_str**: boolean (default False) - if True, string values are converted
342        in object values
343        - **index**: list (default None) - if present, add the index in Series
344        - **leng**: integer (default None) - leng of the Series (used with single codec value)
345        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
346        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
347        '''
348        option = {'extkeys': None, 'decode_str': False, 'leng': None,
349                  'annotated': False} | kwargs
350        if ntv_value is None:
351            return None
352        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
353
354        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
355            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
356        if parent and not option['extkeys']:
357            return None
358        if coef:
359            ntv_keys = NtvConnector.keysfromcoef(
360                coef, leng_field//coef, option['leng'])
361        elif option['extkeys'] and parent:
362            ntv_keys = NtvConnector.keysfromderkeys(
363                option['extkeys'], ntv_keys)
364        elif option['extkeys'] and not parent:
365            ntv_keys = option['extkeys']
366        ntv_codec = Ntv.fast(Ntv.obj_ntv(
367            codec, typ=typ, single=len(codec) == 1))
368        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
369
370    @staticmethod
371    def to_json_ntv(value, name=None, typ=None, **kwargs):
372        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
373
374        *Parameters*
375
376        - **typ** : string (default None) - type of the NTV object,
377        - **name** : string (default None) - name of the NTV object
378        - **value** : Series values
379        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
380        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
381
382        table = kwargs.get('table', False)
383        no_val = kwargs.get('no_val', False)
384        srs = value.astype(SeriesConnec.astype.get(
385            value.dtype.name, value.dtype.name))
386        sr_name = srs.name if srs.name else ''
387        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
388
389        if table:
390            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
391            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
392            if no_val:
393                return (ntv_name, ntv_type)
394            return (ntv_value, ntv_name, ntv_type)
395        if srs.dtype.name == 'category':
396            cdc = pd.Series(srs.cat.categories)
397            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
398            cat_value = PdUtil.ntv_val(ntv_type, cdc)
399            cat_value = NtvList(cat_value, ntv_type=ntv_type)
400            cod_value = list(srs.cat.codes)
401            coef = NtvConnector.encode_coef(cod_value)
402            ntv_value = [cat_value, NtvList(
403                [coef]) if coef else NtvList(cod_value)]
404            ntv_type = None
405        else:
406            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
407            ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs),
408                                     def_type=ntv_type).ntv_value
409        if len(ntv_value) == 1:
410            ntv_value[0].set_name(ntv_name)
411            return (ntv_value[0].to_obj(), name,
412                    SeriesConnec.clas_typ if not typ else typ)
413        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
414                SeriesConnec.clas_typ if not typ else typ)
415
416    @staticmethod
417    def to_idx(ser):
418        ''' convert a Series in categorical data
419
420        *return (dict)*
421
422        { 'codec': 'list of pandas categories',
423          'name': 'name of the series',
424          'keys': 'list of pandas codes' }
425        '''
426        idx = ser.astype('category')
427        lis = list(idx.cat.categories)
428        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
429            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
430                   for ts in lis]
431        return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)}
432
433    @staticmethod
434    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
435        ''' return a pd.Series from Field data (codec, name, keys)
436
437        *Parameters*
438
439        - **ntv_codec**: Ntv object - codec value to convert in Series values
440        - **ntv_type**: string - default type to apply to convert in dtype
441        - **ntv_name**: string - name of the Series
442
443        *parameters (kwargs)*
444
445        - **index**: list (default None) - if present, add the index in Series
446        - **leng**: integer (default None) - leng of the Series (used with single codec value)
447        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
448        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
449        '''
450        option = {'index': None, 'leng': None, 'alias': False,
451                  'annotated': False} | kwargs
452        types = SeriesConnec.types.set_index('ntv_type')
453        astype = SeriesConnec.astype
454        leng = option['leng']
455
456        ntv_type = ntv_codec.type_str
457        len_unique = leng if len(ntv_codec) == 1 and leng else 1
458        pd_convert = ntv_type in types.index
459
460        pd_name, name_type, dtype = PdUtil.pd_name(
461            ntv_name, ntv_type, pd_convert)
462        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
463                                 option['annotated'], pd_convert)
464        if ntv_keys:
465            if pd_convert and name_type != 'array':
466                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
467                cat_type = categ.dtype.name
468                categories = categ.astype(astype.get(cat_type, cat_type))
469            else:
470                categories = pd.Series(ntv_obj, dtype='object')
471            cat = pd.CategoricalDtype(categories=categories)
472            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
473            srs = pd.Series(data, name=pd_name,
474                            index=option['index'], dtype='category')
475        else:
476            data = ntv_obj * len_unique
477            if pd_convert:
478                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
479            else:
480                srs = pd.Series(data, name=pd_name, dtype=dtype)
481
482        if option['alias']:
483            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
484        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
485
486    @staticmethod
487    def _from_json(data, dtype, ntv_type, pd_name=None):
488        '''return a Series from a Json data.
489
490        *Parameters*
491
492        - **data**: Json-value - data to convert in a Series
493        - **dtype**: string - dtype of the Series
494        - **ntv_type**: string - default type to apply to convert in dtype
495        - **pd_name**: string - name of the Series including ntv_type
496
497        NTVvalue and a ntv_type'''
498        srs = pd.read_json(StringIO(json.dumps(data)),
499                           dtype=dtype, typ='series')
500        if not pd_name is None:
501            srs = srs.rename(pd_name)
502        return PdUtil.convert(ntv_type, srs, tojson=False)
503
504    @staticmethod
505    def equals(pdself, pdother):
506        '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
507        if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)):
508            return False
509        if pdself.name != pdother.name:
510            return False
511        type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category'
512        if type_cat:
513            return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories)
514        return as_def_type(pdself).equals(as_def_type(pdother))
515
516
517class PdUtil:
518    '''ntv-pandas utilities.
519
520    This class includes static methods:
521
522    Ntv and pandas
523    - **ntv_type**: return NTVtype from name_type and dtype of a Series
524    - **convert**: convert Series with external NTVtype
525    - **ntv_val**: convert a simple Series into NTV json-value
526    - **ntv_obj**: return a list of values to convert in a Series
527    - **pd_name**: return a tuple with the name of the Series and the type deduced from the name
528    - **pd_index**: return a DataFrame with index
529    - **unic**: return simple value if the Series contains a single value
530
531    TableSchema
532    - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series
533    - **name_table**: return a list of non index field's names from a json Table
534    - **ntvtype_table**: return a list of non index field's ntv_type from a json Table
535    - **table_schema**: add 'format' and 'type' keys in a Json TableSchema
536    - **table_val**: convert a Series into TableSchema json-value
537    - **ntv_table**: return NTVtype from the TableSchema data
538    '''
539    @staticmethod
540    def to_obj_table(jsn, **kwargs):
541        ''' convert json TableSchema data into a DataFrame or a Series'''
542        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
543        name = PdUtil.name_table(jsn['schema']['fields'])
544        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
545                   for nam, ntvtyp in zip(name, ntv_type)]
546        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
547                    for nam, ntvtyp in zip(name, ntv_type)]
548        dfr = pd.read_json(StringIO(json.dumps(jsn['data'])), orient='record')
549        dfr = PdUtil.pd_index(dfr)
550        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
551                            for ind, col in enumerate(dfr.columns)})
552        dfr = dfr.astype({col: pd_dtype[ind]
553                         for ind, col in enumerate(dfr.columns)})
554        dfr.columns = pd_name
555        if len(dfr.columns) == 1:
556            return dfr[dfr.columns[0]]
557        return dfr
558
559    @staticmethod
560    def decode_ntv_to_val(ntv):
561        ''' return a value from a ntv_field'''
562        if isinstance(ntv, NtvSingle):
563            return ntv.to_obj(simpleval=True)
564        return [ntv_val.to_obj() for ntv_val in ntv]
565
566    @staticmethod
567    def name_table(fields):
568        '''return a list of non index field's names from a json Table'''
569        names = [field.get('name', None) for field in fields
570                 if field.get('name', None) != 'index']
571        return [None if name == 'values' else name for name in names]
572
573    @staticmethod
574    def ntvtype_table(fields):
575        '''return a list of non index field's ntv_type from a json Table'''
576        return [PdUtil.ntv_table(field.get('format', 'default'),
577                field.get('type', None)) for field in fields
578                if field.get('name', None) != 'index']
579
580    @staticmethod
581    def table_schema(schema, name, ntv_type):
582        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
583        for the field defined by 'name' '''
584        ind = [field['name'] for field in schema['fields']].index(name)
585        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
586        if tabletype['format'] == 'default':
587            schema['fields'][ind].pop('format', None)
588        else:
589            schema['fields'][ind]['format'] = tabletype['format']
590        schema['fields'][ind]['type'] = tabletype['type']
591        schema['fields'][ind].pop('extDtype', None)
592        return schema
593
594    @staticmethod
595    def table_val(ntv_type, ntv_name, srs):
596        '''convert a Series into TableSchema json-value.
597
598        *Parameters*
599
600        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
601        - **ntv_name**: string - name of the Series
602        - **srs** : Series to be converted.'''
603        srs = PdUtil.convert(ntv_type, srs)
604        srs.name = ntv_name
605        tab_val = json.loads(srs.to_json(orient='table',
606                                         date_format='iso', default_handler=str))
607        name = 'values' if srs.name is None else srs.name
608        tab_val['schema'] = PdUtil.table_schema(
609            tab_val['schema'], name, ntv_type)
610        return tab_val
611
612    @staticmethod
613    def convert(ntv_type, srs, tojson=True):
614        ''' convert Series with external NTVtype.
615
616        *Parameters*
617
618        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
619        - **srs** : Series to be converted.
620        - **tojson** : boolean (default True) - apply to json function'''
621        if tojson:
622            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
623                return srs.apply(ShapelyConnec.to_coord)
624            if ntv_type == 'geojson':
625                return srs.apply(ShapelyConnec.to_geojson)
626            if ntv_type == 'date':
627                return srs.astype(str)
628            return srs
629        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
630            return srs.apply(ShapelyConnec.to_geometry)
631        if ntv_type == 'geojson':
632            return srs.apply(ShapelyConnec.from_geojson)
633        if ntv_type == 'datetime':
634            return pd.to_datetime(srs)
635        if ntv_type == 'date':
636            return pd.to_datetime(srs).dt.date
637        if ntv_type == 'time':
638            return pd.to_datetime(srs, format='mixed').dt.time
639        return srs
640
641    @staticmethod
642    def ntv_type(name_type, dtype, table=False):
643        ''' return NTVtype from name_type and dtype of a Series .
644
645        *Parameters*
646
647        - **name_type** : string - type included in the Series name,
648        - **dtype** : string - dtype of the Series.
649        - **table** : boolean (default False) - True if Table Schema conversion
650        '''
651        if not name_type:
652            types_none = SeriesConnec.types.set_index('name_type').loc[None]
653            if dtype in types_none.dtype.values:
654                return types_none.set_index('dtype').loc[dtype].ntv_type
655            if not table:
656                return None
657            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
658            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
659        return name_type
660
661    @staticmethod
662    def ntv_val(ntv_type, srs):
663        ''' convert a simple Series into NTV json-value.
664
665        *Parameters*
666
667        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
668        - **srs** : Series to be converted.'''
669        srs = PdUtil.convert(ntv_type, srs)
670        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
671            return srs.to_list()
672        if srs.dtype.name == 'object':
673            return srs.to_list()
674        return json.loads(srs.to_json(orient='records',
675                                      date_format='iso', default_handler=str))
676
677    @staticmethod
678    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
679        '''return a list of values to convert in a Series'''
680        if pd_convert:
681            if name_type == 'array':
682                return ntv_codec.to_obj(format='obj', simpleval=True)
683            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
684                                          def_type=ntv_codec.type_str, fast=True)
685            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
686        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
687
688    @staticmethod
689    def ntv_table(table_format, table_type):
690        ''' return NTVtype from the TableSchema data.
691
692        *Parameters*
693
694        - **table_format** : string - TableSchema format,
695        - **table_type** : string - TableSchema type'''
696        return SeriesConnec.table.set_index(['type', 'format']).loc[
697            (table_type, table_format)].values[0]
698
699    @staticmethod
700    def pd_index(dfr):
701        '''return a DataFrame with index'''
702        if 'index' in dfr.columns:
703            dfr = dfr.set_index('index')
704            dfr.index.rename(None, inplace=True)
705        return dfr
706
707    @staticmethod
708    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
709        '''return a tuple with the name of the Series, the type deduced from
710        the name and the dtype'''
711        ntv_name = '' if ntv_name is None else ntv_name
712        typtab = SeriesConnec.typtab.set_index('ntv_type')
713        types = SeriesConnec.types.set_index('ntv_type')
714        if table and ntv_type.lower() in typtab.index:
715            name_type = typtab.loc[ntv_type.lower()]['name_type']
716            dtype = typtab.loc[ntv_type.lower()]['dtype']
717        elif pd_convert or table:
718            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
719            dtype = types.loc[ntv_type]['dtype']
720        else:
721            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
722        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
723        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
724        return (pd_name if pd_name else None, name_type, dtype)
725
726    @staticmethod
727    def unic(srs):
728        ''' return simple value if the Series contains a single value'''
729        if str(srs.dtype) == 'category':
730            return srs
731        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs
path_ntv_pandas = WindowsPath('D:/philippe/python ESstandard/ntv-pandas/ntv_pandas')
def as_def_type(pd_array):
55def as_def_type(pd_array):
56    '''convert a Series or DataFrame with default dtype'''
57    if isinstance(pd_array, (pd.Series, pd.Index)):
58        return pd_array.astype(SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name))
59    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})

convert a Series or DataFrame with default dtype

def check_relation(pd_df, parent, child, typecoupl, value=True):
62def check_relation(pd_df, parent, child, typecoupl, value=True):
63    ''' Accessor for method `cdataset.Cdataset.check_relation` invoket as
64    `pd.DataFrame.npd.check_relation`.
65    Get the inconsistent records for a relationship.
66
67     *Parameters*
68
69    - **child** : str - name of the child Series involved in the relation
70    - **parent**: str - name of the parent Series involved in the relation
71    - **typecoupl**: str - relationship to check ('derived' or 'coupled')
72    - **value**: boolean (default True) - if True return a dict with inconsistent
73    values of the Series, else a tuple with index of records)
74
75    *Returns* :
76
77    - dict with inconsistent values of the Series
78    - or a tuple with row of records'''
79    parent_idx = SeriesConnec.to_idx(pd_df[parent])
80    parent_field = Cfield(parent_idx['codec'], parent, parent_idx['keys'])
81    child_idx = SeriesConnec.to_idx(pd_df[child])
82    child_field = Cfield(child_idx['codec'], child, child_idx['keys'])
83    return Cfield.check_relation(parent_field, child_field, typecoupl, value)

Accessor for method cdataset.Cdataset.check_relation invoket as pd.DataFrame.npd.check_relation. Get the inconsistent records for a relationship.

Parameters

  • child : str - name of the child Series involved in the relation
  • parent: str - name of the parent Series involved in the relation
  • typecoupl: str - relationship to check ('derived' or 'coupled')
  • value: boolean (default True) - if True return a dict with inconsistent values of the Series, else a tuple with index of records)

Returns :

  • dict with inconsistent values of the Series
  • or a tuple with row of records
def equals(pdself, pdother):
86def equals(pdself, pdother):
87    '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
88    if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series):
89        return SeriesConnec.equals(pdself, pdother)
90    if isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame):
91        return DataFrameConnec.equals(pdself, pdother)
92    return False

return True if pd.equals is True and names are equal and dtype of categories are equal

def read_json(jsn, **kwargs):
 95def read_json(jsn, **kwargs):
 96    ''' convert JSON text or JSON Value to pandas Series or Dataframe.
 97
 98    *parameters*
 99
100    - **jsn** : JSON text or JSON value to convert
101    - **extkeys**: list (default None) - keys to use if not present in ntv_value
102    - **decode_str**: boolean (default False) - if True, string values are converted
103    in object values
104    - **leng**: integer (default None) - leng of the Series (used with single codec value)
105    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
106    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
107    - **series**: boolean (default False) - used only without header. If True
108    JSON data is converted into Series else DataFrame
109    '''
110    option = {'extkeys': None, 'decode_str': False, 'leng': None, 'alias': False,
111              'annotated': False, 'series': False} | kwargs
112    jso = json.loads(jsn) if isinstance(jsn, str) else jsn
113    if 'schema' in jso:
114        return PdUtil.to_obj_table(jso, **option)
115    ntv = Ntv.from_obj(jso)
116    if ntv.type_str == 'field':
117        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
118    if ntv.type_str == 'tab':
119        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
120    if option['series']:
121        return SeriesConnec.to_obj_ntv(ntv, **option)
122    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)

convert JSON text or JSON Value to pandas Series or Dataframe.

parameters

  • jsn : JSON text or JSON value to convert
  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
  • series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
def to_analysis(pd_df, distr=False):
142def to_analysis(pd_df, distr=False):
143    '''return a dict with data used in AnaDataset module'''
144
145    keys = [list(pd_df[col].astype('category').cat.codes)
146            for col in pd_df.columns]
147    lencodec = [len(set(key)) for key in keys]
148    if distr:
149        dist = [[_dist(keys[i], keys[j], distr) for j in range(i+1, len(keys))]
150                for i in range(len(keys)-1)]
151    else:
152        dist = [[len(set(zip(keys[i], keys[j]))) for j in range(i+1, len(keys))]
153                for i in range(len(keys)-1)]
154    return {'fields': [{'lencodec': lencodec[ind], 'id': pd_df.columns[ind],
155                        'mincodec': lencodec[ind]}
156                       for ind in range(len(pd_df.columns))],
157            'name': None, 'length': len(pd_df),
158            'relations': {pd_df.columns[i]: {pd_df.columns[j+i+1]: dist[i][j]
159                          for j in range(len(dist[i]))} for i in range(len(dist))}}

return a dict with data used in AnaDataset module

def to_json(pd_array, **kwargs):
162def to_json(pd_array, **kwargs):
163    ''' convert pandas Series or Dataframe to JSON text or JSON Value.
164
165    *parameters*
166
167    - **pd_array** : Series or Dataframe to convert
168    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
169    - **header** : boolean (default: True) - if True the JSON data is included as
170    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
171    - **table** : boolean (default False) - if True return TableSchema format
172    - **index** : boolean (default True) - if True the index Series is included
173    '''
174    option = {'encoded': False, 'header': True,
175              'table': False, 'index': True} | kwargs
176    option['header'] = False if option['table'] else option['header']
177    if isinstance(pd_array, pd.Series):
178        jsn = SeriesConnec.to_json_ntv(pd_array, table=option['table'])[0]
179        head = ':field'
180    else:
181        jsn = DataFrameConnec.to_json_ntv(pd_array, table=option['table'],
182                                          index=option['index'])[0]
183        head = ':tab'
184    if option['header']:
185        jsn = {head: jsn}
186    if option['encoded']:
187        return json.dumps(jsn)
188    return jsn

convert pandas Series or Dataframe to JSON text or JSON Value.

parameters

  • pd_array : Series or Dataframe to convert
  • encoded : boolean (default: False) - if True return a JSON text else a JSON value
  • header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
  • table : boolean (default False) - if True return TableSchema format
  • index : boolean (default True) - if True the index Series is included
def from_xarray(xdt, **kwargs):
191def from_xarray(xdt, **kwargs):
192    ''' convert xarray.Dataset to pandas DataFrame.
193
194    *Parameters*
195
196    - **json_name**: Boolean (default True) - if False use full_name else json_name
197    - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
198    - **dims**: list of string (default None) - order of dimensions full_name to apply
199    '''
200    return Xdataset.from_xarray(xdt).to_dataframe(**kwargs)

convert xarray.Dataset to pandas DataFrame.

Parameters

  • json_name: Boolean (default True) - if False use full_name else json_name
  • info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
  • dims: list of string (default None) - order of dimensions full_name to apply
def from_scipp(sci, **kwargs):
203def from_scipp(sci, **kwargs):
204    ''' convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame.
205
206    *Parameters*
207
208    - **json_name**: Boolean (default True) - if False use full_name else json_name
209    - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
210    - **dims**: list of string (default None) - order of dimensions full_name to apply
211    '''
212    return Xdataset.from_scipp(sci).to_dataframe(**kwargs)

convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame.

Parameters

  • json_name: Boolean (default True) - if False use full_name else json_name
  • info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
  • dims: list of string (default None) - order of dimensions full_name to apply
class DataFrameConnec(json_ntv.ntv_util.NtvConnector):
215class DataFrameConnec(NtvConnector):
216
217    '''NTV connector for pandas DataFrame.
218
219    One static methods is included:
220
221    - to_listidx: convert a DataFrame in categorical data
222    '''
223
224    clas_obj = 'DataFrame'
225    clas_typ = 'tab'
226
227    @staticmethod
228    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
229        ''' convert json ntv_value into a DataFrame.
230
231        *Parameters*
232
233        - **index** : list (default None) - list of index values,
234        - **alias** : boolean (default False) - if True, alias dtype else default dtype
235        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
236        series = SeriesConnec.to_series
237
238        ntv = Ntv.fast(ntv_value)
239        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
240                for ntvf in ntv]
241        leng = max([idx[6] for idx in lidx])
242        option = kwargs | {'leng': leng}
243        no_keys = []
244        for ind, lind in enumerate(lidx):
245            no_keys.append(not lind[3] and not lind[4] and not lind[5])
246            NtvConnector.init_ntv_keys(ind, lidx, leng)
247            lind[2] = Ntv.fast(Ntv.obj_ntv(
248                lind[2], typ=lind[1], single=len(lind[2]) == 1))
249        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
250                              else lidx[ind][4], **option) for ind in range(len(lidx))]
251        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
252        return PdUtil.pd_index(dfr)
253
254    @staticmethod
255    def to_json_ntv(value, name=None, typ=None, **kwargs):
256        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
257
258        *Parameters*
259
260        - **typ** : string (default None) - type of the NTV object,
261        - **name** : string (default None) - name of the NTV object
262        - **value** : DataFrame values
263        - **table** : boolean (default False) - if True return TableSchema format
264        - **index** : boolean (default True) - if True the index Series is included
265        '''
266        table = kwargs.get('table', False)
267        index = kwargs.get('index', True)
268        if not table:
269            df2 = value.reset_index() if index else value
270            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
271                           for col in df2.columns]).to_obj()
272            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
273        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
274            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
275            value[col]) for col in value.columns})
276        table_val = json.loads(df2.to_json(orient='table',
277                                           date_format='iso', default_handler=str))
278        for nam in value.columns:
279            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
280                value[nam], table=True, no_val=True)
281            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
282                                                      ntv_name, ntv_type)
283        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
284
285    @staticmethod
286    def to_listidx(dtf):
287        ''' convert a DataFrame in categorical data
288
289        *Return: tuple with:*
290
291        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
292        - **lenght** of the DataFrame'''
293        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
294
295    @staticmethod
296    def equals(pdself, pdother):
297        '''return True if columns are equals'''
298        if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)):
299            return False
300        if len(pdself.columns) != len(pdother.columns):
301            return False
302        for cself, cother in zip(pdself, pdother):
303            if not SeriesConnec.equals(pdself[cself], pdother[cother]):
304                return False
305        return True

NTV connector for pandas DataFrame.

One static methods is included:

  • to_listidx: convert a DataFrame in categorical data
clas_obj = 'DataFrame'
clas_typ = 'tab'
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
227    @staticmethod
228    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
229        ''' convert json ntv_value into a DataFrame.
230
231        *Parameters*
232
233        - **index** : list (default None) - list of index values,
234        - **alias** : boolean (default False) - if True, alias dtype else default dtype
235        - **annotated** : boolean (default False) - if True, NTV names are not included.'''
236        series = SeriesConnec.to_series
237
238        ntv = Ntv.fast(ntv_value)
239        lidx = [list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val))
240                for ntvf in ntv]
241        leng = max([idx[6] for idx in lidx])
242        option = kwargs | {'leng': leng}
243        no_keys = []
244        for ind, lind in enumerate(lidx):
245            no_keys.append(not lind[3] and not lind[4] and not lind[5])
246            NtvConnector.init_ntv_keys(ind, lidx, leng)
247            lind[2] = Ntv.fast(Ntv.obj_ntv(
248                lind[2], typ=lind[1], single=len(lind[2]) == 1))
249        list_series = [series(lidx[ind][2], lidx[ind][0], None if no_keys[ind]
250                              else lidx[ind][4], **option) for ind in range(len(lidx))]
251        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
252        return PdUtil.pd_index(dfr)

convert json ntv_value into a DataFrame.

Parameters

  • index : list (default None) - list of index values,
  • alias : boolean (default False) - if True, alias dtype else default dtype
  • annotated : boolean (default False) - if True, NTV names are not included.
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
254    @staticmethod
255    def to_json_ntv(value, name=None, typ=None, **kwargs):
256        ''' convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
257
258        *Parameters*
259
260        - **typ** : string (default None) - type of the NTV object,
261        - **name** : string (default None) - name of the NTV object
262        - **value** : DataFrame values
263        - **table** : boolean (default False) - if True return TableSchema format
264        - **index** : boolean (default True) - if True the index Series is included
265        '''
266        table = kwargs.get('table', False)
267        index = kwargs.get('index', True)
268        if not table:
269            df2 = value.reset_index() if index else value
270            jsn = Ntv.obj([SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
271                           for col in df2.columns]).to_obj()
272            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
273        df2 = pd.DataFrame({NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
274            SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
275            value[col]) for col in value.columns})
276        table_val = json.loads(df2.to_json(orient='table',
277                                           date_format='iso', default_handler=str))
278        for nam in value.columns:
279            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
280                value[nam], table=True, no_val=True)
281            table_val['schema'] = PdUtil.table_schema(table_val['schema'],
282                                                      ntv_name, ntv_type)
283        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)

convert a DataFrame (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : DataFrame values
  • table : boolean (default False) - if True return TableSchema format
  • index : boolean (default True) - if True the index Series is included
@staticmethod
def to_listidx(dtf):
285    @staticmethod
286    def to_listidx(dtf):
287        ''' convert a DataFrame in categorical data
288
289        *Return: tuple with:*
290
291        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
292        - **lenght** of the DataFrame'''
293        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

convert a DataFrame in categorical data

Return: tuple with:

  • list of dict (keys : 'codec', 'name, 'keys') for each column
  • lenght of the DataFrame
@staticmethod
def equals(pdself, pdother):
295    @staticmethod
296    def equals(pdself, pdother):
297        '''return True if columns are equals'''
298        if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)):
299            return False
300        if len(pdself.columns) != len(pdother.columns):
301            return False
302        for cself, cother in zip(pdself, pdother):
303            if not SeriesConnec.equals(pdself[cself], pdother[cother]):
304                return False
305        return True

return True if columns are equals

Inherited Members
json_ntv.ntv_util.NtvConnector
DIC_NTV_CL
DIC_GEO_CL
DIC_DAT_CL
DIC_FCT
DIC_GEO
DIC_CBOR
DIC_OBJ
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
format_field
init_ntv_keys
class SeriesConnec(json_ntv.ntv_util.NtvConnector):
308class SeriesConnec(NtvConnector):
309    '''NTV connector for pandas Series
310
311    Two static methods are included:
312
313    - to_idx: convert a Series in categorical data
314    - to_series: return a Series from Field data
315    '''
316    clas_obj = 'Series'
317    clas_typ = 'field'
318    config = configparser.ConfigParser()
319    config.read(path_ntv_pandas.joinpath('ntv_pandas.ini'))
320    types = pd.DataFrame(json.loads(config['data']['type']),
321                         columns=json.loads(config['data']['column']))
322    astype = json.loads(config['data']['astype'])
323    deftype = {val: key for key, val in astype.items()}
324    config = configparser.ConfigParser()
325    config.read(path_ntv_pandas.joinpath('ntv_table.ini'))
326    table = pd.DataFrame(json.loads(config['data']['mapping']),
327                         columns=json.loads(config['data']['column']))
328    typtab = pd.DataFrame(json.loads(config['data']['type']),
329                          columns=json.loads(config['data']['col_type']))
330
331    @staticmethod
332    def to_obj_ntv(ntv_value, **kwargs):
333        '''Generate a Series Object from a Ntv field object
334
335        *Parameters*
336
337        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
338
339        *parameters (kwargs)*
340
341        - **extkeys**: list (default None) - keys to use if not present in ntv_value
342        - **decode_str**: boolean (default False) - if True, string values are converted
343        in object values
344        - **index**: list (default None) - if present, add the index in Series
345        - **leng**: integer (default None) - leng of the Series (used with single codec value)
346        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
347        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
348        '''
349        option = {'extkeys': None, 'decode_str': False, 'leng': None,
350                  'annotated': False} | kwargs
351        if ntv_value is None:
352            return None
353        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
354
355        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
356            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
357        if parent and not option['extkeys']:
358            return None
359        if coef:
360            ntv_keys = NtvConnector.keysfromcoef(
361                coef, leng_field//coef, option['leng'])
362        elif option['extkeys'] and parent:
363            ntv_keys = NtvConnector.keysfromderkeys(
364                option['extkeys'], ntv_keys)
365        elif option['extkeys'] and not parent:
366            ntv_keys = option['extkeys']
367        ntv_codec = Ntv.fast(Ntv.obj_ntv(
368            codec, typ=typ, single=len(codec) == 1))
369        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
370
371    @staticmethod
372    def to_json_ntv(value, name=None, typ=None, **kwargs):
373        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
374
375        *Parameters*
376
377        - **typ** : string (default None) - type of the NTV object,
378        - **name** : string (default None) - name of the NTV object
379        - **value** : Series values
380        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
381        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
382
383        table = kwargs.get('table', False)
384        no_val = kwargs.get('no_val', False)
385        srs = value.astype(SeriesConnec.astype.get(
386            value.dtype.name, value.dtype.name))
387        sr_name = srs.name if srs.name else ''
388        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
389
390        if table:
391            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
392            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
393            if no_val:
394                return (ntv_name, ntv_type)
395            return (ntv_value, ntv_name, ntv_type)
396        if srs.dtype.name == 'category':
397            cdc = pd.Series(srs.cat.categories)
398            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
399            cat_value = PdUtil.ntv_val(ntv_type, cdc)
400            cat_value = NtvList(cat_value, ntv_type=ntv_type)
401            cod_value = list(srs.cat.codes)
402            coef = NtvConnector.encode_coef(cod_value)
403            ntv_value = [cat_value, NtvList(
404                [coef]) if coef else NtvList(cod_value)]
405            ntv_type = None
406        else:
407            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
408            ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs),
409                                     def_type=ntv_type).ntv_value
410        if len(ntv_value) == 1:
411            ntv_value[0].set_name(ntv_name)
412            return (ntv_value[0].to_obj(), name,
413                    SeriesConnec.clas_typ if not typ else typ)
414        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
415                SeriesConnec.clas_typ if not typ else typ)
416
417    @staticmethod
418    def to_idx(ser):
419        ''' convert a Series in categorical data
420
421        *return (dict)*
422
423        { 'codec': 'list of pandas categories',
424          'name': 'name of the series',
425          'keys': 'list of pandas codes' }
426        '''
427        idx = ser.astype('category')
428        lis = list(idx.cat.categories)
429        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
430            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
431                   for ts in lis]
432        return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)}
433
434    @staticmethod
435    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
436        ''' return a pd.Series from Field data (codec, name, keys)
437
438        *Parameters*
439
440        - **ntv_codec**: Ntv object - codec value to convert in Series values
441        - **ntv_type**: string - default type to apply to convert in dtype
442        - **ntv_name**: string - name of the Series
443
444        *parameters (kwargs)*
445
446        - **index**: list (default None) - if present, add the index in Series
447        - **leng**: integer (default None) - leng of the Series (used with single codec value)
448        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
449        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
450        '''
451        option = {'index': None, 'leng': None, 'alias': False,
452                  'annotated': False} | kwargs
453        types = SeriesConnec.types.set_index('ntv_type')
454        astype = SeriesConnec.astype
455        leng = option['leng']
456
457        ntv_type = ntv_codec.type_str
458        len_unique = leng if len(ntv_codec) == 1 and leng else 1
459        pd_convert = ntv_type in types.index
460
461        pd_name, name_type, dtype = PdUtil.pd_name(
462            ntv_name, ntv_type, pd_convert)
463        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
464                                 option['annotated'], pd_convert)
465        if ntv_keys:
466            if pd_convert and name_type != 'array':
467                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
468                cat_type = categ.dtype.name
469                categories = categ.astype(astype.get(cat_type, cat_type))
470            else:
471                categories = pd.Series(ntv_obj, dtype='object')
472            cat = pd.CategoricalDtype(categories=categories)
473            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
474            srs = pd.Series(data, name=pd_name,
475                            index=option['index'], dtype='category')
476        else:
477            data = ntv_obj * len_unique
478            if pd_convert:
479                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
480            else:
481                srs = pd.Series(data, name=pd_name, dtype=dtype)
482
483        if option['alias']:
484            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
485        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
486
487    @staticmethod
488    def _from_json(data, dtype, ntv_type, pd_name=None):
489        '''return a Series from a Json data.
490
491        *Parameters*
492
493        - **data**: Json-value - data to convert in a Series
494        - **dtype**: string - dtype of the Series
495        - **ntv_type**: string - default type to apply to convert in dtype
496        - **pd_name**: string - name of the Series including ntv_type
497
498        NTVvalue and a ntv_type'''
499        srs = pd.read_json(StringIO(json.dumps(data)),
500                           dtype=dtype, typ='series')
501        if not pd_name is None:
502            srs = srs.rename(pd_name)
503        return PdUtil.convert(ntv_type, srs, tojson=False)
504
505    @staticmethod
506    def equals(pdself, pdother):
507        '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
508        if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)):
509            return False
510        if pdself.name != pdother.name:
511            return False
512        type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category'
513        if type_cat:
514            return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories)
515        return as_def_type(pdself).equals(as_def_type(pdother))

NTV connector for pandas Series

Two static methods are included:

  • to_idx: convert a Series in categorical data
  • to_series: return a Series from Field data
clas_obj = 'Series'
clas_typ = 'field'
config = <configparser.ConfigParser object>
types = ntv_type name_type dtype 0 None None 1 duration None timedelta64[ns] 2 datetime None datetime64[ns] 3 string None string 4 json None None 5 float16 None Float16 6 float32 None Float32 7 uint8 None UInt8 8 uint16 None UInt16 9 uint32 None UInt32 10 uint64 None UInt64 11 int8 None Int8 12 int16 None Int16 13 int32 None Int32 14 int int Int64 15 float float Float64 16 number number Float64 17 int64 int64 Int64 18 float64 float64 Float64 19 boolean boolean boolean 20 array array object 21 object object object 22 null null object 23 period period string 24 uri uri string 25 email email string 26 file file string 27 date date object 28 time time object 29 point point object 30 line line object 31 polygon polygon object 32 geometry geometry object 33 geojson geojson object 34 month month None 35 year year None 36 day day None 37 wday wday None 38 yday yday None 39 week week None 40 hour hour None 41 minute minute None 42 second second None
astype = {'uint8': 'UInt8', 'uint16': 'UInt16', 'uint32': 'UInt32', 'uint64': 'UInt64', 'int8': 'Int8', 'int16': 'Int16', 'int32': 'Int32', 'int64': 'Int64', 'float16': 'Float16', 'float32': 'Float32', 'float64': 'Float64', 'bool': 'boolean'}
deftype = {'UInt8': 'uint8', 'UInt16': 'uint16', 'UInt32': 'uint32', 'UInt64': 'uint64', 'Int8': 'int8', 'Int16': 'int16', 'Int32': 'int32', 'Int64': 'int64', 'Float16': 'float16', 'Float32': 'float32', 'Float64': 'float64', 'boolean': 'bool'}
table = ntv_type format type 0 int default integer 1 number default number 2 boolean default boolean 3 string default string 4 uri uri string 5 email email string 6 json default object 7 array default array 8 duration default duration 9 datetime default datetime 10 date default date 11 time default time 12 month default yearmonth 13 year default year 14 point array geopoint 15 geojson default geojson 16 float64 float64 number 17 float float number 18 float16 float16 number 19 float32 float32 number 20 uint8 uint8 integer 21 uint16 uint16 integer 22 uint32 uint32 integer 23 uint64 uint64 integer 24 int8 int8 integer 25 int16 int16 integer 26 int32 int32 integer 27 int64 int64 integer 28 file file string 29 null null object 30 object object object 31 day day date 32 wday wday date 33 yday yday date 34 week week date 35 hour hour time 36 minute minute time 37 second second time 38 geometry geometry geojson 39 polygon polygon geojson 40 line line geojson
typtab = ntv_type name_type dtype 0 int None int64 1 json None object 2 boolean None boolean 3 number None float64
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
331    @staticmethod
332    def to_obj_ntv(ntv_value, **kwargs):
333        '''Generate a Series Object from a Ntv field object
334
335        *Parameters*
336
337        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
338
339        *parameters (kwargs)*
340
341        - **extkeys**: list (default None) - keys to use if not present in ntv_value
342        - **decode_str**: boolean (default False) - if True, string values are converted
343        in object values
344        - **index**: list (default None) - if present, add the index in Series
345        - **leng**: integer (default None) - leng of the Series (used with single codec value)
346        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
347        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
348        '''
349        option = {'extkeys': None, 'decode_str': False, 'leng': None,
350                  'annotated': False} | kwargs
351        if ntv_value is None:
352            return None
353        ntv = Ntv.obj(ntv_value, decode_str=option['decode_str'])
354
355        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = \
356            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
357        if parent and not option['extkeys']:
358            return None
359        if coef:
360            ntv_keys = NtvConnector.keysfromcoef(
361                coef, leng_field//coef, option['leng'])
362        elif option['extkeys'] and parent:
363            ntv_keys = NtvConnector.keysfromderkeys(
364                option['extkeys'], ntv_keys)
365        elif option['extkeys'] and not parent:
366            ntv_keys = option['extkeys']
367        ntv_codec = Ntv.fast(Ntv.obj_ntv(
368            codec, typ=typ, single=len(codec) == 1))
369        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)

Generate a Series Object from a Ntv field object

Parameters

  • ntv_value: Ntv object or Ntv value - value to convert in Series

parameters (kwargs)

  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
371    @staticmethod
372    def to_json_ntv(value, name=None, typ=None, **kwargs):
373        ''' convert a Series (value, name, type) into NTV json (json-value, name, type).
374
375        *Parameters*
376
377        - **typ** : string (default None) - type of the NTV object,
378        - **name** : string (default None) - name of the NTV object
379        - **value** : Series values
380        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
381        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)'''
382
383        table = kwargs.get('table', False)
384        no_val = kwargs.get('no_val', False)
385        srs = value.astype(SeriesConnec.astype.get(
386            value.dtype.name, value.dtype.name))
387        sr_name = srs.name if srs.name else ''
388        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
389
390        if table:
391            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
392            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
393            if no_val:
394                return (ntv_name, ntv_type)
395            return (ntv_value, ntv_name, ntv_type)
396        if srs.dtype.name == 'category':
397            cdc = pd.Series(srs.cat.categories)
398            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
399            cat_value = PdUtil.ntv_val(ntv_type, cdc)
400            cat_value = NtvList(cat_value, ntv_type=ntv_type)
401            cod_value = list(srs.cat.codes)
402            coef = NtvConnector.encode_coef(cod_value)
403            ntv_value = [cat_value, NtvList(
404                [coef]) if coef else NtvList(cod_value)]
405            ntv_type = None
406        else:
407            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
408            ntv_value = Ntv.from_obj(PdUtil.ntv_val(ntv_type, srs),
409                                     def_type=ntv_type).ntv_value
410        if len(ntv_value) == 1:
411            ntv_value[0].set_name(ntv_name)
412            return (ntv_value[0].to_obj(), name,
413                    SeriesConnec.clas_typ if not typ else typ)
414        return (NtvList(ntv_value, ntv_name, ntv_type).to_obj(), name,
415                SeriesConnec.clas_typ if not typ else typ)

convert a Series (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : Series values
  • table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
  • no_val : boolean (default False) - if True return (ntv_name, ntv_type)
@staticmethod
def to_idx(ser):
417    @staticmethod
418    def to_idx(ser):
419        ''' convert a Series in categorical data
420
421        *return (dict)*
422
423        { 'codec': 'list of pandas categories',
424          'name': 'name of the series',
425          'keys': 'list of pandas codes' }
426        '''
427        idx = ser.astype('category')
428        lis = list(idx.cat.categories)
429        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
430            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc)
431                   for ts in lis]
432        return {'codec': lis, 'name': ser.name, 'keys': list(idx.cat.codes)}

convert a Series in categorical data

return (dict)

{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }

@staticmethod
def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
434    @staticmethod
435    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
436        ''' return a pd.Series from Field data (codec, name, keys)
437
438        *Parameters*
439
440        - **ntv_codec**: Ntv object - codec value to convert in Series values
441        - **ntv_type**: string - default type to apply to convert in dtype
442        - **ntv_name**: string - name of the Series
443
444        *parameters (kwargs)*
445
446        - **index**: list (default None) - if present, add the index in Series
447        - **leng**: integer (default None) - leng of the Series (used with single codec value)
448        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
449        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
450        '''
451        option = {'index': None, 'leng': None, 'alias': False,
452                  'annotated': False} | kwargs
453        types = SeriesConnec.types.set_index('ntv_type')
454        astype = SeriesConnec.astype
455        leng = option['leng']
456
457        ntv_type = ntv_codec.type_str
458        len_unique = leng if len(ntv_codec) == 1 and leng else 1
459        pd_convert = ntv_type in types.index
460
461        pd_name, name_type, dtype = PdUtil.pd_name(
462            ntv_name, ntv_type, pd_convert)
463        ntv_obj = PdUtil.ntv_obj(ntv_codec, name_type if pd_convert else ntv_type,
464                                 option['annotated'], pd_convert)
465        if ntv_keys:
466            if pd_convert and name_type != 'array':
467                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
468                cat_type = categ.dtype.name
469                categories = categ.astype(astype.get(cat_type, cat_type))
470            else:
471                categories = pd.Series(ntv_obj, dtype='object')
472            cat = pd.CategoricalDtype(categories=categories)
473            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
474            srs = pd.Series(data, name=pd_name,
475                            index=option['index'], dtype='category')
476        else:
477            data = ntv_obj * len_unique
478            if pd_convert:
479                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
480            else:
481                srs = pd.Series(data, name=pd_name, dtype=dtype)
482
483        if option['alias']:
484            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
485        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))

return a pd.Series from Field data (codec, name, keys)

Parameters

  • ntv_codec: Ntv object - codec value to convert in Series values
  • ntv_type: string - default type to apply to convert in dtype
  • ntv_name: string - name of the Series

parameters (kwargs)

  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def equals(pdself, pdother):
505    @staticmethod
506    def equals(pdself, pdother):
507        '''return True if pd.equals is True and names are equal and dtype of categories are equal'''
508        if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)):
509            return False
510        if pdself.name != pdother.name:
511            return False
512        type_cat = str(pdself.dtype) == str(pdother.dtype) == 'category'
513        if type_cat:
514            return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories)
515        return as_def_type(pdself).equals(as_def_type(pdother))

return True if pd.equals is True and names are equal and dtype of categories are equal

Inherited Members
json_ntv.ntv_util.NtvConnector
DIC_NTV_CL
DIC_GEO_CL
DIC_DAT_CL
DIC_FCT
DIC_GEO
DIC_CBOR
DIC_OBJ
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
format_field
init_ntv_keys
class PdUtil:
518class PdUtil:
519    '''ntv-pandas utilities.
520
521    This class includes static methods:
522
523    Ntv and pandas
524    - **ntv_type**: return NTVtype from name_type and dtype of a Series
525    - **convert**: convert Series with external NTVtype
526    - **ntv_val**: convert a simple Series into NTV json-value
527    - **ntv_obj**: return a list of values to convert in a Series
528    - **pd_name**: return a tuple with the name of the Series and the type deduced from the name
529    - **pd_index**: return a DataFrame with index
530    - **unic**: return simple value if the Series contains a single value
531
532    TableSchema
533    - **to_obj_table**: convert json TableSchema data into a DataFrame or a Series
534    - **name_table**: return a list of non index field's names from a json Table
535    - **ntvtype_table**: return a list of non index field's ntv_type from a json Table
536    - **table_schema**: add 'format' and 'type' keys in a Json TableSchema
537    - **table_val**: convert a Series into TableSchema json-value
538    - **ntv_table**: return NTVtype from the TableSchema data
539    '''
540    @staticmethod
541    def to_obj_table(jsn, **kwargs):
542        ''' convert json TableSchema data into a DataFrame or a Series'''
543        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
544        name = PdUtil.name_table(jsn['schema']['fields'])
545        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
546                   for nam, ntvtyp in zip(name, ntv_type)]
547        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
548                    for nam, ntvtyp in zip(name, ntv_type)]
549        dfr = pd.read_json(StringIO(json.dumps(jsn['data'])), orient='record')
550        dfr = PdUtil.pd_index(dfr)
551        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
552                            for ind, col in enumerate(dfr.columns)})
553        dfr = dfr.astype({col: pd_dtype[ind]
554                         for ind, col in enumerate(dfr.columns)})
555        dfr.columns = pd_name
556        if len(dfr.columns) == 1:
557            return dfr[dfr.columns[0]]
558        return dfr
559
560    @staticmethod
561    def decode_ntv_to_val(ntv):
562        ''' return a value from a ntv_field'''
563        if isinstance(ntv, NtvSingle):
564            return ntv.to_obj(simpleval=True)
565        return [ntv_val.to_obj() for ntv_val in ntv]
566
567    @staticmethod
568    def name_table(fields):
569        '''return a list of non index field's names from a json Table'''
570        names = [field.get('name', None) for field in fields
571                 if field.get('name', None) != 'index']
572        return [None if name == 'values' else name for name in names]
573
574    @staticmethod
575    def ntvtype_table(fields):
576        '''return a list of non index field's ntv_type from a json Table'''
577        return [PdUtil.ntv_table(field.get('format', 'default'),
578                field.get('type', None)) for field in fields
579                if field.get('name', None) != 'index']
580
581    @staticmethod
582    def table_schema(schema, name, ntv_type):
583        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
584        for the field defined by 'name' '''
585        ind = [field['name'] for field in schema['fields']].index(name)
586        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
587        if tabletype['format'] == 'default':
588            schema['fields'][ind].pop('format', None)
589        else:
590            schema['fields'][ind]['format'] = tabletype['format']
591        schema['fields'][ind]['type'] = tabletype['type']
592        schema['fields'][ind].pop('extDtype', None)
593        return schema
594
595    @staticmethod
596    def table_val(ntv_type, ntv_name, srs):
597        '''convert a Series into TableSchema json-value.
598
599        *Parameters*
600
601        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
602        - **ntv_name**: string - name of the Series
603        - **srs** : Series to be converted.'''
604        srs = PdUtil.convert(ntv_type, srs)
605        srs.name = ntv_name
606        tab_val = json.loads(srs.to_json(orient='table',
607                                         date_format='iso', default_handler=str))
608        name = 'values' if srs.name is None else srs.name
609        tab_val['schema'] = PdUtil.table_schema(
610            tab_val['schema'], name, ntv_type)
611        return tab_val
612
613    @staticmethod
614    def convert(ntv_type, srs, tojson=True):
615        ''' convert Series with external NTVtype.
616
617        *Parameters*
618
619        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
620        - **srs** : Series to be converted.
621        - **tojson** : boolean (default True) - apply to json function'''
622        if tojson:
623            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
624                return srs.apply(ShapelyConnec.to_coord)
625            if ntv_type == 'geojson':
626                return srs.apply(ShapelyConnec.to_geojson)
627            if ntv_type == 'date':
628                return srs.astype(str)
629            return srs
630        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
631            return srs.apply(ShapelyConnec.to_geometry)
632        if ntv_type == 'geojson':
633            return srs.apply(ShapelyConnec.from_geojson)
634        if ntv_type == 'datetime':
635            return pd.to_datetime(srs)
636        if ntv_type == 'date':
637            return pd.to_datetime(srs).dt.date
638        if ntv_type == 'time':
639            return pd.to_datetime(srs, format='mixed').dt.time
640        return srs
641
642    @staticmethod
643    def ntv_type(name_type, dtype, table=False):
644        ''' return NTVtype from name_type and dtype of a Series .
645
646        *Parameters*
647
648        - **name_type** : string - type included in the Series name,
649        - **dtype** : string - dtype of the Series.
650        - **table** : boolean (default False) - True if Table Schema conversion
651        '''
652        if not name_type:
653            types_none = SeriesConnec.types.set_index('name_type').loc[None]
654            if dtype in types_none.dtype.values:
655                return types_none.set_index('dtype').loc[dtype].ntv_type
656            if not table:
657                return None
658            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
659            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
660        return name_type
661
662    @staticmethod
663    def ntv_val(ntv_type, srs):
664        ''' convert a simple Series into NTV json-value.
665
666        *Parameters*
667
668        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
669        - **srs** : Series to be converted.'''
670        srs = PdUtil.convert(ntv_type, srs)
671        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
672            return srs.to_list()
673        if srs.dtype.name == 'object':
674            return srs.to_list()
675        return json.loads(srs.to_json(orient='records',
676                                      date_format='iso', default_handler=str))
677
678    @staticmethod
679    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
680        '''return a list of values to convert in a Series'''
681        if pd_convert:
682            if name_type == 'array':
683                return ntv_codec.to_obj(format='obj', simpleval=True)
684            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
685                                          def_type=ntv_codec.type_str, fast=True)
686            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
687        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)
688
689    @staticmethod
690    def ntv_table(table_format, table_type):
691        ''' return NTVtype from the TableSchema data.
692
693        *Parameters*
694
695        - **table_format** : string - TableSchema format,
696        - **table_type** : string - TableSchema type'''
697        return SeriesConnec.table.set_index(['type', 'format']).loc[
698            (table_type, table_format)].values[0]
699
700    @staticmethod
701    def pd_index(dfr):
702        '''return a DataFrame with index'''
703        if 'index' in dfr.columns:
704            dfr = dfr.set_index('index')
705            dfr.index.rename(None, inplace=True)
706        return dfr
707
708    @staticmethod
709    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
710        '''return a tuple with the name of the Series, the type deduced from
711        the name and the dtype'''
712        ntv_name = '' if ntv_name is None else ntv_name
713        typtab = SeriesConnec.typtab.set_index('ntv_type')
714        types = SeriesConnec.types.set_index('ntv_type')
715        if table and ntv_type.lower() in typtab.index:
716            name_type = typtab.loc[ntv_type.lower()]['name_type']
717            dtype = typtab.loc[ntv_type.lower()]['dtype']
718        elif pd_convert or table:
719            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
720            dtype = types.loc[ntv_type]['dtype']
721        else:
722            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
723        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
724        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
725        return (pd_name if pd_name else None, name_type, dtype)
726
727    @staticmethod
728    def unic(srs):
729        ''' return simple value if the Series contains a single value'''
730        if str(srs.dtype) == 'category':
731            return srs
732        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs

ntv-pandas utilities.

This class includes static methods:

Ntv and pandas

  • ntv_type: return NTVtype from name_type and dtype of a Series
  • convert: convert Series with external NTVtype
  • ntv_val: convert a simple Series into NTV json-value
  • ntv_obj: return a list of values to convert in a Series
  • pd_name: return a tuple with the name of the Series and the type deduced from the name
  • pd_index: return a DataFrame with index
  • unic: return simple value if the Series contains a single value

TableSchema

  • to_obj_table: convert json TableSchema data into a DataFrame or a Series
  • name_table: return a list of non index field's names from a json Table
  • ntvtype_table: return a list of non index field's ntv_type from a json Table
  • table_schema: add 'format' and 'type' keys in a Json TableSchema
  • table_val: convert a Series into TableSchema json-value
  • ntv_table: return NTVtype from the TableSchema data
@staticmethod
def to_obj_table(jsn, **kwargs):
540    @staticmethod
541    def to_obj_table(jsn, **kwargs):
542        ''' convert json TableSchema data into a DataFrame or a Series'''
543        ntv_type = PdUtil.ntvtype_table(jsn['schema']['fields'])
544        name = PdUtil.name_table(jsn['schema']['fields'])
545        pd_name = [PdUtil.pd_name(nam, ntvtyp, table=True)[0]
546                   for nam, ntvtyp in zip(name, ntv_type)]
547        pd_dtype = [PdUtil.pd_name(nam, ntvtyp, table=True)[2]
548                    for nam, ntvtyp in zip(name, ntv_type)]
549        dfr = pd.read_json(StringIO(json.dumps(jsn['data'])), orient='record')
550        dfr = PdUtil.pd_index(dfr)
551        dfr = pd.DataFrame({col: PdUtil.convert(ntv_type[ind], dfr[col], tojson=False)
552                            for ind, col in enumerate(dfr.columns)})
553        dfr = dfr.astype({col: pd_dtype[ind]
554                         for ind, col in enumerate(dfr.columns)})
555        dfr.columns = pd_name
556        if len(dfr.columns) == 1:
557            return dfr[dfr.columns[0]]
558        return dfr

convert json TableSchema data into a DataFrame or a Series

@staticmethod
def decode_ntv_to_val(ntv):
560    @staticmethod
561    def decode_ntv_to_val(ntv):
562        ''' return a value from a ntv_field'''
563        if isinstance(ntv, NtvSingle):
564            return ntv.to_obj(simpleval=True)
565        return [ntv_val.to_obj() for ntv_val in ntv]

return a value from a ntv_field

@staticmethod
def name_table(fields):
567    @staticmethod
568    def name_table(fields):
569        '''return a list of non index field's names from a json Table'''
570        names = [field.get('name', None) for field in fields
571                 if field.get('name', None) != 'index']
572        return [None if name == 'values' else name for name in names]

return a list of non index field's names from a json Table

@staticmethod
def ntvtype_table(fields):
574    @staticmethod
575    def ntvtype_table(fields):
576        '''return a list of non index field's ntv_type from a json Table'''
577        return [PdUtil.ntv_table(field.get('format', 'default'),
578                field.get('type', None)) for field in fields
579                if field.get('name', None) != 'index']

return a list of non index field's ntv_type from a json Table

@staticmethod
def table_schema(schema, name, ntv_type):
581    @staticmethod
582    def table_schema(schema, name, ntv_type):
583        '''convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema
584        for the field defined by 'name' '''
585        ind = [field['name'] for field in schema['fields']].index(name)
586        tabletype = SeriesConnec.table.set_index('ntv_type').loc[ntv_type]
587        if tabletype['format'] == 'default':
588            schema['fields'][ind].pop('format', None)
589        else:
590            schema['fields'][ind]['format'] = tabletype['format']
591        schema['fields'][ind]['type'] = tabletype['type']
592        schema['fields'][ind].pop('extDtype', None)
593        return schema

convert 'ntv_type' in 'format' and 'type' keys in a Json TableSchema for the field defined by 'name'

@staticmethod
def table_val(ntv_type, ntv_name, srs):
595    @staticmethod
596    def table_val(ntv_type, ntv_name, srs):
597        '''convert a Series into TableSchema json-value.
598
599        *Parameters*
600
601        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
602        - **ntv_name**: string - name of the Series
603        - **srs** : Series to be converted.'''
604        srs = PdUtil.convert(ntv_type, srs)
605        srs.name = ntv_name
606        tab_val = json.loads(srs.to_json(orient='table',
607                                         date_format='iso', default_handler=str))
608        name = 'values' if srs.name is None else srs.name
609        tab_val['schema'] = PdUtil.table_schema(
610            tab_val['schema'], name, ntv_type)
611        return tab_val

convert a Series into TableSchema json-value.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • ntv_name: string - name of the Series
  • srs : Series to be converted.
@staticmethod
def convert(ntv_type, srs, tojson=True):
613    @staticmethod
614    def convert(ntv_type, srs, tojson=True):
615        ''' convert Series with external NTVtype.
616
617        *Parameters*
618
619        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
620        - **srs** : Series to be converted.
621        - **tojson** : boolean (default True) - apply to json function'''
622        if tojson:
623            if ntv_type in ['point', 'line', 'polygon', 'geometry']:
624                return srs.apply(ShapelyConnec.to_coord)
625            if ntv_type == 'geojson':
626                return srs.apply(ShapelyConnec.to_geojson)
627            if ntv_type == 'date':
628                return srs.astype(str)
629            return srs
630        if ntv_type in ['point', 'line', 'polygon', 'geometry']:
631            return srs.apply(ShapelyConnec.to_geometry)
632        if ntv_type == 'geojson':
633            return srs.apply(ShapelyConnec.from_geojson)
634        if ntv_type == 'datetime':
635            return pd.to_datetime(srs)
636        if ntv_type == 'date':
637            return pd.to_datetime(srs).dt.date
638        if ntv_type == 'time':
639            return pd.to_datetime(srs, format='mixed').dt.time
640        return srs

convert Series with external NTVtype.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • srs : Series to be converted.
  • tojson : boolean (default True) - apply to json function
@staticmethod
def ntv_type(name_type, dtype, table=False):
642    @staticmethod
643    def ntv_type(name_type, dtype, table=False):
644        ''' return NTVtype from name_type and dtype of a Series .
645
646        *Parameters*
647
648        - **name_type** : string - type included in the Series name,
649        - **dtype** : string - dtype of the Series.
650        - **table** : boolean (default False) - True if Table Schema conversion
651        '''
652        if not name_type:
653            types_none = SeriesConnec.types.set_index('name_type').loc[None]
654            if dtype in types_none.dtype.values:
655                return types_none.set_index('dtype').loc[dtype].ntv_type
656            if not table:
657                return None
658            typtab = SeriesConnec.typtab.set_index('name_type').loc[None]
659            return typtab.set_index('dtype').loc[dtype.lower()].ntv_type
660        return name_type

return NTVtype from name_type and dtype of a Series .

Parameters

  • name_type : string - type included in the Series name,
  • dtype : string - dtype of the Series.
  • table : boolean (default False) - True if Table Schema conversion
@staticmethod
def ntv_val(ntv_type, srs):
662    @staticmethod
663    def ntv_val(ntv_type, srs):
664        ''' convert a simple Series into NTV json-value.
665
666        *Parameters*
667
668        - **ntv_type** : string - NTVtype deduced from the Series name_type and dtype,
669        - **srs** : Series to be converted.'''
670        srs = PdUtil.convert(ntv_type, srs)
671        if ntv_type in ['point', 'line', 'polygon', 'geometry', 'geojson']:
672            return srs.to_list()
673        if srs.dtype.name == 'object':
674            return srs.to_list()
675        return json.loads(srs.to_json(orient='records',
676                                      date_format='iso', default_handler=str))

convert a simple Series into NTV json-value.

Parameters

  • ntv_type : string - NTVtype deduced from the Series name_type and dtype,
  • srs : Series to be converted.
@staticmethod
def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
678    @staticmethod
679    def ntv_obj(ntv_codec, name_type, annotated, pd_convert):
680        '''return a list of values to convert in a Series'''
681        if pd_convert:
682            if name_type == 'array':
683                return ntv_codec.to_obj(format='obj', simpleval=True)
684            ntv_obj = ntv_codec.obj_value(simpleval=annotated, json_array=False,
685                                          def_type=ntv_codec.type_str, fast=True)
686            return ntv_obj if isinstance(ntv_obj, list) else [ntv_obj]
687        return ntv_codec.to_obj(format='obj', simpleval=True, def_type=name_type)

return a list of values to convert in a Series

@staticmethod
def ntv_table(table_format, table_type):
689    @staticmethod
690    def ntv_table(table_format, table_type):
691        ''' return NTVtype from the TableSchema data.
692
693        *Parameters*
694
695        - **table_format** : string - TableSchema format,
696        - **table_type** : string - TableSchema type'''
697        return SeriesConnec.table.set_index(['type', 'format']).loc[
698            (table_type, table_format)].values[0]

return NTVtype from the TableSchema data.

Parameters

  • table_format : string - TableSchema format,
  • table_type : string - TableSchema type
@staticmethod
def pd_index(dfr):
700    @staticmethod
701    def pd_index(dfr):
702        '''return a DataFrame with index'''
703        if 'index' in dfr.columns:
704            dfr = dfr.set_index('index')
705            dfr.index.rename(None, inplace=True)
706        return dfr

return a DataFrame with index

@staticmethod
def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
708    @staticmethod
709    def pd_name(ntv_name, ntv_type, pd_convert=True, table=False):
710        '''return a tuple with the name of the Series, the type deduced from
711        the name and the dtype'''
712        ntv_name = '' if ntv_name is None else ntv_name
713        typtab = SeriesConnec.typtab.set_index('ntv_type')
714        types = SeriesConnec.types.set_index('ntv_type')
715        if table and ntv_type.lower() in typtab.index:
716            name_type = typtab.loc[ntv_type.lower()]['name_type']
717            dtype = typtab.loc[ntv_type.lower()]['dtype']
718        elif pd_convert or table:
719            name_type = types.loc[ntv_type]['name_type'] if ntv_type != '' else ''
720            dtype = types.loc[ntv_type]['dtype']
721        else:
722            return (ntv_name + '::' + ntv_type, ntv_type, 'object')
723        dtype = SeriesConnec.deftype.get(dtype, dtype)  # ajout
724        pd_name = ntv_name + '::' + name_type if name_type else ntv_name
725        return (pd_name if pd_name else None, name_type, dtype)

return a tuple with the name of the Series, the type deduced from the name and the dtype

@staticmethod
def unic(srs):
727    @staticmethod
728    def unic(srs):
729        ''' return simple value if the Series contains a single value'''
730        if str(srs.dtype) == 'category':
731            return srs
732        return srs[:1] if np.array_equal(srs.values, [srs.values[0]] * len(srs)) else srs

return simple value if the Series contains a single value