ntv-pandas.ntv_pandas

NTV-pandas Package

Created on Sept 2023

@author: philippe@loco-labs.io

This package contains the following classes and functions:

  • ntv-pandas.ntv_pandas.pandas_ntv_connector :

    • ntv-pandas.ntv_pandas.pandas_ntv_connector.DataFrameConnec
    • ntv-pandas.ntv_pandas.pandas_ntv_connector.SeriesConnec
    • ntv-pandas.ntv_pandas.pandas_ntv_connector.PdUtil
    • ntv-pandas.ntv_pandas.pandas_ntv_connector.to_json
    • ntv-pandas.ntv_pandas.pandas_ntv_connector.read_json
    • ntv-pandas.ntv_pandas.pandas_ntv_connector.analysis
    • ntv-pandas.ntv_pandas.pandas_ntv_connector.as_def_type
    • ntv-pandas.ntv_pandas.pandas_ntv_connector.equals
  • ntv-pandas.ntv_pandas.accessors :

    • ntv-pandas.ntv_pandas.accessors.NpdSeriesAccessor
    • ntv-pandas.ntv_pandas.accessors.NpdDataFrameAccessor
 1# -*- coding: utf-8 -*-
 2"""
 3***NTV-pandas Package***
 4
 5Created on Sept 2023
 6
 7@author: philippe@loco-labs.io
 8
 9This package contains the following classes and functions:
10
11- `ntv-pandas.ntv_pandas.pandas_ntv_connector` :
12
13    - `ntv-pandas.ntv_pandas.pandas_ntv_connector.DataFrameConnec`
14    - `ntv-pandas.ntv_pandas.pandas_ntv_connector.SeriesConnec`
15    - `ntv-pandas.ntv_pandas.pandas_ntv_connector.PdUtil`
16    - `ntv-pandas.ntv_pandas.pandas_ntv_connector.to_json`
17    - `ntv-pandas.ntv_pandas.pandas_ntv_connector.read_json`
18    - `ntv-pandas.ntv_pandas.pandas_ntv_connector.analysis`
19    - `ntv-pandas.ntv_pandas.pandas_ntv_connector.as_def_type`
20    - `ntv-pandas.ntv_pandas.pandas_ntv_connector.equals`
21
22- `ntv-pandas.ntv_pandas.accessors` :
23
24    - `ntv-pandas.ntv_pandas.accessors.NpdSeriesAccessor`
25    - `ntv-pandas.ntv_pandas.accessors.NpdDataFrameAccessor`
26"""
27
28from ntv_pandas.pandas_ntv_connector import DataFrameConnec, SeriesConnec, read_json
29from ntv_pandas.pandas_ntv_connector import to_json, as_def_type, equals, to_analysis
30from ntv_pandas.pandas_ntv_connector import from_xarray, from_scipp
31import ntv_pandas.pandas_accessors as pandas_accessors
32
33__all__ = [
34    "DataFrameConnec",
35    "SeriesConnec",
36    "read_json",
37    "to_json",
38    "as_def_type",
39    "equals",
40    "to_analysis",
41    "from_xarray",
42    "from_scipp",
43    "pandas_accessors",
44]
class DataFrameConnec(json_ntv.ntv_util.NtvConnector):
239class DataFrameConnec(NtvConnector):
240    """NTV connector for pandas DataFrame.
241
242    One static methods is included:
243
244    - to_listidx: convert a DataFrame in categorical data
245    """
246
247    clas_obj = "DataFrame"
248    clas_typ = "tab"
249
250    @staticmethod
251    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
252        """convert json ntv_value into a DataFrame.
253
254        *Parameters*
255
256        - **index** : list (default None) - list of index values,
257        - **alias** : boolean (default False) - if True, alias dtype else default dtype
258        - **annotated** : boolean (default False) - if True, NTV names are not included."""
259        series = SeriesConnec.to_series
260
261        ntv = Ntv.fast(ntv_value)
262        lidx = [
263            list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) for ntvf in ntv
264        ]
265        leng = max([idx[6] for idx in lidx])
266        option = kwargs | {"leng": leng}
267        no_keys = []
268        for ind, lind in enumerate(lidx):
269            no_keys.append(not lind[3] and not lind[4] and not lind[5])
270            NtvConnector.init_ntv_keys(ind, lidx, leng)
271            lind[2] = Ntv.fast(
272                Ntv.obj_ntv(lind[2], typ=lind[1], single=len(lind[2]) == 1)
273            )
274        list_series = [
275            series(
276                lidx[ind][2],
277                lidx[ind][0],
278                None if no_keys[ind] else lidx[ind][4],
279                **option,
280            )
281            for ind in range(len(lidx))
282        ]
283        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
284        return PdUtil.pd_index(dfr)
285
286    @staticmethod
287    def to_json_ntv(value, name=None, typ=None, **kwargs):
288        """convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
289
290        *Parameters*
291
292        - **typ** : string (default None) - type of the NTV object,
293        - **name** : string (default None) - name of the NTV object
294        - **value** : DataFrame values
295        - **table** : boolean (default False) - if True return TableSchema format
296        - **index** : boolean (default True) - if True the index Series is included
297        """
298        table = kwargs.get("table", False)
299        index = kwargs.get("index", True)
300        if not table:
301            df2 = value.reset_index() if index else value
302            jsn = Ntv.obj(
303                [
304                    SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
305                    for col in df2.columns
306                ]
307            ).to_obj()
308            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
309        df2 = pd.DataFrame(
310            {
311                NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
312                    SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
313                    value[col],
314                )
315                for col in value.columns
316            }
317        )
318        table_val = json.loads(
319            df2.to_json(orient="table", date_format="iso", default_handler=str)
320        )
321        for nam in value.columns:
322            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
323                value[nam], table=True, no_val=True
324            )
325            table_val["schema"] = PdUtil.table_schema(
326                table_val["schema"], ntv_name, ntv_type
327            )
328        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)
329
330    @staticmethod
331    def to_listidx(dtf):
332        """convert a DataFrame in categorical data
333
334        *Return: tuple with:*
335
336        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
337        - **lenght** of the DataFrame"""
338        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))
339
340    @staticmethod
341    def equals(pdself, pdother):
342        """return True if columns are equals"""
343        if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)):
344            return False
345        if len(pdself.columns) != len(pdother.columns):
346            return False
347        for cself, cother in zip(pdself, pdother):
348            if not SeriesConnec.equals(pdself[cself], pdother[cother]):
349                return False
350        return True

NTV connector for pandas DataFrame.

One static methods is included:

  • to_listidx: convert a DataFrame in categorical data
clas_obj = 'DataFrame'
clas_typ = 'tab'
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
250    @staticmethod
251    def to_obj_ntv(ntv_value, **kwargs):  # reindex=True, decode_str=False):
252        """convert json ntv_value into a DataFrame.
253
254        *Parameters*
255
256        - **index** : list (default None) - list of index values,
257        - **alias** : boolean (default False) - if True, alias dtype else default dtype
258        - **annotated** : boolean (default False) - if True, NTV names are not included."""
259        series = SeriesConnec.to_series
260
261        ntv = Ntv.fast(ntv_value)
262        lidx = [
263            list(NtvUtil.decode_ntv_tab(ntvf, PdUtil.decode_ntv_to_val)) for ntvf in ntv
264        ]
265        leng = max([idx[6] for idx in lidx])
266        option = kwargs | {"leng": leng}
267        no_keys = []
268        for ind, lind in enumerate(lidx):
269            no_keys.append(not lind[3] and not lind[4] and not lind[5])
270            NtvConnector.init_ntv_keys(ind, lidx, leng)
271            lind[2] = Ntv.fast(
272                Ntv.obj_ntv(lind[2], typ=lind[1], single=len(lind[2]) == 1)
273            )
274        list_series = [
275            series(
276                lidx[ind][2],
277                lidx[ind][0],
278                None if no_keys[ind] else lidx[ind][4],
279                **option,
280            )
281            for ind in range(len(lidx))
282        ]
283        dfr = pd.DataFrame({ser.name: ser for ser in list_series})
284        return PdUtil.pd_index(dfr)

convert json ntv_value into a DataFrame.

Parameters

  • index : list (default None) - list of index values,
  • alias : boolean (default False) - if True, alias dtype else default dtype
  • annotated : boolean (default False) - if True, NTV names are not included.
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
286    @staticmethod
287    def to_json_ntv(value, name=None, typ=None, **kwargs):
288        """convert a DataFrame (value, name, type) into NTV json (json-value, name, type).
289
290        *Parameters*
291
292        - **typ** : string (default None) - type of the NTV object,
293        - **name** : string (default None) - name of the NTV object
294        - **value** : DataFrame values
295        - **table** : boolean (default False) - if True return TableSchema format
296        - **index** : boolean (default True) - if True the index Series is included
297        """
298        table = kwargs.get("table", False)
299        index = kwargs.get("index", True)
300        if not table:
301            df2 = value.reset_index() if index else value
302            jsn = Ntv.obj(
303                [
304                    SeriesConnec.to_json_ntv(PdUtil.unic(df2[col]))[0]
305                    for col in df2.columns
306                ]
307            ).to_obj()
308            return (jsn, name, DataFrameConnec.clas_typ if not typ else typ)
309        df2 = pd.DataFrame(
310            {
311                NtvUtil.from_obj_name(col)[0]: PdUtil.convert(
312                    SeriesConnec.to_json_ntv(value[col], table=True, no_val=True)[1],
313                    value[col],
314                )
315                for col in value.columns
316            }
317        )
318        table_val = json.loads(
319            df2.to_json(orient="table", date_format="iso", default_handler=str)
320        )
321        for nam in value.columns:
322            ntv_name, ntv_type = SeriesConnec.to_json_ntv(
323                value[nam], table=True, no_val=True
324            )
325            table_val["schema"] = PdUtil.table_schema(
326                table_val["schema"], ntv_name, ntv_type
327            )
328        return (table_val, name, DataFrameConnec.clas_typ if not typ else typ)

convert a DataFrame (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : DataFrame values
  • table : boolean (default False) - if True return TableSchema format
  • index : boolean (default True) - if True the index Series is included
@staticmethod
def to_listidx(dtf):
330    @staticmethod
331    def to_listidx(dtf):
332        """convert a DataFrame in categorical data
333
334        *Return: tuple with:*
335
336        - **list** of dict (keys : 'codec', 'name, 'keys') for each column
337        - **lenght** of the DataFrame"""
338        return ([SeriesConnec.to_idx(ser) for name, ser in dtf.items()], len(dtf))

convert a DataFrame in categorical data

Return: tuple with:

  • list of dict (keys : 'codec', 'name, 'keys') for each column
  • lenght of the DataFrame
@staticmethod
def equals(pdself, pdother):
340    @staticmethod
341    def equals(pdself, pdother):
342        """return True if columns are equals"""
343        if not (isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame)):
344            return False
345        if len(pdself.columns) != len(pdother.columns):
346            return False
347        for cself, cother in zip(pdself, pdother):
348            if not SeriesConnec.equals(pdself[cself], pdother[cother]):
349                return False
350        return True

return True if columns are equals

Inherited Members
json_ntv.ntv_util.NtvConnector
DIC_NTV_CL
DIC_GEO_CL
DIC_DAT_CL
DIC_FCT
DIC_GEO
DIC_CBOR
DIC_OBJ
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
format_field
init_ntv_keys
class SeriesConnec(json_ntv.ntv_util.NtvConnector):
353class SeriesConnec(NtvConnector):
354    """NTV connector for pandas Series
355
356    Two static methods are included:
357
358    - to_idx: convert a Series in categorical data
359    - to_series: return a Series from Field data
360    """
361
362    clas_obj = "Series"
363    clas_typ = "field"
364    config = configparser.ConfigParser()
365    config.read(path_ntv_pandas.joinpath("ntv_pandas.ini"))
366    types = pd.DataFrame(
367        json.loads(config["data"]["type"]), columns=json.loads(config["data"]["column"])
368    )
369    astype = json.loads(config["data"]["astype"])
370    deftype = {val: key for key, val in astype.items()}
371    config = configparser.ConfigParser()
372    config.read(path_ntv_pandas.joinpath("ntv_table.ini"))
373    table = pd.DataFrame(
374        json.loads(config["data"]["mapping"]),
375        columns=json.loads(config["data"]["column"]),
376    )
377    typtab = pd.DataFrame(
378        json.loads(config["data"]["type"]),
379        columns=json.loads(config["data"]["col_type"]),
380    )
381
382    @staticmethod
383    def to_obj_ntv(ntv_value, **kwargs):
384        """Generate a Series Object from a Ntv field object
385
386        *Parameters*
387
388        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
389
390        *parameters (kwargs)*
391
392        - **extkeys**: list (default None) - keys to use if not present in ntv_value
393        - **decode_str**: boolean (default False) - if True, string values are converted
394        in object values
395        - **index**: list (default None) - if present, add the index in Series
396        - **leng**: integer (default None) - leng of the Series (used with single codec value)
397        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
398        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
399        """
400        option = {
401            "extkeys": None,
402            "decode_str": False,
403            "leng": None,
404            "annotated": False,
405        } | kwargs
406        if ntv_value is None:
407            return None
408        ntv = Ntv.obj(ntv_value, decode_str=option["decode_str"])
409
410        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = (
411            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
412        )
413        if parent and not option["extkeys"]:
414            return None
415        if coef:
416            ntv_keys = NtvConnector.keysfromcoef(
417                coef, leng_field // coef, option["leng"]
418            )
419        elif option["extkeys"] and parent:
420            ntv_keys = NtvConnector.keysfromderkeys(option["extkeys"], ntv_keys)
421        elif option["extkeys"] and not parent:
422            ntv_keys = option["extkeys"]
423        ntv_codec = Ntv.fast(Ntv.obj_ntv(codec, typ=typ, single=len(codec) == 1))
424        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)
425
426    @staticmethod
427    def to_json_ntv(value, name=None, typ=None, **kwargs):
428        """convert a Series (value, name, type) into NTV json (json-value, name, type).
429
430        *Parameters*
431
432        - **typ** : string (default None) - type of the NTV object,
433        - **name** : string (default None) - name of the NTV object
434        - **value** : Series values
435        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
436        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)"""
437
438        table = kwargs.get("table", False)
439        no_val = kwargs.get("no_val", False)
440        srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name))
441        sr_name = srs.name if srs.name else ""
442        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
443
444        if table:
445            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
446            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
447            if no_val:
448                return (ntv_name, ntv_type)
449            return (ntv_value, ntv_name, ntv_type)
450        if srs.dtype.name == "category":
451            cdc = pd.Series(srs.cat.categories)
452            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
453            cat_value = PdUtil.ntv_val(ntv_type, cdc)
454            cat_value = NtvList(cat_value, ntv_type=ntv_type)
455            cod_value = list(srs.cat.codes)
456            coef = NtvConnector.encode_coef(cod_value)
457            ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)]
458            ntv_type = None
459        else:
460            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
461            ntv_value = Ntv.from_obj(
462                PdUtil.ntv_val(ntv_type, srs), def_type=ntv_type
463            ).ntv_value
464        if len(ntv_value) == 1:
465            ntv_value[0].set_name(ntv_name)
466            return (
467                ntv_value[0].to_obj(),
468                name,
469                SeriesConnec.clas_typ if not typ else typ,
470            )
471        return (
472            NtvList(ntv_value, ntv_name, ntv_type).to_obj(),
473            name,
474            SeriesConnec.clas_typ if not typ else typ,
475        )
476
477    @staticmethod
478    def to_idx(ser):
479        """convert a Series in categorical data
480
481        *return (dict)*
482
483        { 'codec': 'list of pandas categories',
484          'name': 'name of the series',
485          'keys': 'list of pandas codes' }
486        """
487        idx = ser.astype("category")
488        lis = list(idx.cat.categories)
489        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
490            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) for ts in lis]
491        return {"codec": lis, "name": ser.name, "keys": list(idx.cat.codes)}
492
493    @staticmethod
494    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
495        """return a pd.Series from Field data (codec, name, keys)
496
497        *Parameters*
498
499        - **ntv_codec**: Ntv object - codec value to convert in Series values
500        - **ntv_type**: string - default type to apply to convert in dtype
501        - **ntv_name**: string - name of the Series
502
503        *parameters (kwargs)*
504
505        - **index**: list (default None) - if present, add the index in Series
506        - **leng**: integer (default None) - leng of the Series (used with single codec value)
507        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
508        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
509        """
510        option = {
511            "index": None,
512            "leng": None,
513            "alias": False,
514            "annotated": False,
515        } | kwargs
516        types = SeriesConnec.types.set_index("ntv_type")
517        astype = SeriesConnec.astype
518        leng = option["leng"]
519
520        ntv_type = ntv_codec.type_str
521        len_unique = leng if len(ntv_codec) == 1 and leng else 1
522        pd_convert = ntv_type in types.index
523
524        pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert)
525        ntv_obj = PdUtil.ntv_obj(
526            ntv_codec,
527            name_type if pd_convert else ntv_type,
528            option["annotated"],
529            pd_convert,
530        )
531        if ntv_keys:
532            if pd_convert and name_type != "array":
533                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
534                cat_type = categ.dtype.name
535                categories = categ.astype(astype.get(cat_type, cat_type))
536            else:
537                categories = pd.Series(ntv_obj, dtype="object")
538            cat = pd.CategoricalDtype(categories=categories)
539            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
540            srs = pd.Series(data, name=pd_name, index=option["index"], dtype="category")
541        else:
542            data = ntv_obj * len_unique
543            if pd_convert:
544                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
545            else:
546                srs = pd.Series(data, name=pd_name, dtype=dtype)
547
548        if option["alias"]:
549            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
550        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))
551
552    @staticmethod
553    def _from_json(data, dtype, ntv_type, pd_name=None):
554        """return a Series from a Json data.
555
556        *Parameters*
557
558        - **data**: Json-value - data to convert in a Series
559        - **dtype**: string - dtype of the Series
560        - **ntv_type**: string - default type to apply to convert in dtype
561        - **pd_name**: string - name of the Series including ntv_type
562
563        NTVvalue and a ntv_type"""
564        srs = pd.read_json(StringIO(json.dumps(data)), dtype=dtype, typ="series")
565        if pd_name is not None:
566            srs = srs.rename(pd_name)
567        return PdUtil.convert(ntv_type, srs, tojson=False)
568
569    @staticmethod
570    def equals(pdself, pdother):
571        """return True if pd.equals is True and names are equal and dtype of categories are equal"""
572        if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)):
573            return False
574        if pdself.name != pdother.name:
575            return False
576        type_cat = str(pdself.dtype) == str(pdother.dtype) == "category"
577        if type_cat:
578            return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories)
579        return as_def_type(pdself).equals(as_def_type(pdother))

NTV connector for pandas Series

Two static methods are included:

  • to_idx: convert a Series in categorical data
  • to_series: return a Series from Field data
clas_obj = 'Series'
clas_typ = 'field'
config = <configparser.ConfigParser object>
types = ntv_type name_type dtype 0 None None 1 duration None timedelta64[ns] 2 datetime None datetime64[ns] 3 string None string 4 json None None 5 float16 None Float16 6 float32 None Float32 7 uint8 None UInt8 8 uint16 None UInt16 9 uint32 None UInt32 10 uint64 None UInt64 11 int8 None Int8 12 int16 None Int16 13 int32 None Int32 14 int int Int64 15 float float Float64 16 number number Float64 17 int64 int64 Int64 18 float64 float64 Float64 19 boolean boolean boolean 20 array array object 21 object object object 22 null null object 23 period period string 24 uri uri string 25 email email string 26 file file string 27 date date object 28 time time object 29 point point object 30 line line object 31 polygon polygon object 32 geometry geometry object 33 geojson geojson object 34 month month None 35 year year None 36 day day None 37 wday wday None 38 yday yday None 39 week week None 40 hour hour None 41 minute minute None 42 second second None
astype = {'uint8': 'UInt8', 'uint16': 'UInt16', 'uint32': 'UInt32', 'uint64': 'UInt64', 'int8': 'Int8', 'int16': 'Int16', 'int32': 'Int32', 'int64': 'Int64', 'float16': 'Float16', 'float32': 'Float32', 'float64': 'Float64', 'bool': 'boolean'}
deftype = {'UInt8': 'uint8', 'UInt16': 'uint16', 'UInt32': 'uint32', 'UInt64': 'uint64', 'Int8': 'int8', 'Int16': 'int16', 'Int32': 'int32', 'Int64': 'int64', 'Float16': 'float16', 'Float32': 'float32', 'Float64': 'float64', 'boolean': 'bool'}
table = ntv_type format type 0 int default integer 1 number default number 2 boolean default boolean 3 string default string 4 uri uri string 5 email email string 6 json default object 7 array default array 8 duration default duration 9 datetime default datetime 10 date default date 11 time default time 12 month default yearmonth 13 year default year 14 point array geopoint 15 geojson default geojson 16 float64 float64 number 17 float float number 18 float16 float16 number 19 float32 float32 number 20 uint8 uint8 integer 21 uint16 uint16 integer 22 uint32 uint32 integer 23 uint64 uint64 integer 24 int8 int8 integer 25 int16 int16 integer 26 int32 int32 integer 27 int64 int64 integer 28 file file string 29 null null object 30 object object object 31 day day date 32 wday wday date 33 yday yday date 34 week week date 35 hour hour time 36 minute minute time 37 second second time 38 geometry geometry geojson 39 polygon polygon geojson 40 line line geojson
typtab = ntv_type name_type dtype 0 int None int64 1 json None object 2 boolean None boolean 3 number None float64
@staticmethod
def to_obj_ntv(ntv_value, **kwargs):
382    @staticmethod
383    def to_obj_ntv(ntv_value, **kwargs):
384        """Generate a Series Object from a Ntv field object
385
386        *Parameters*
387
388        - **ntv_value**: Ntv object or Ntv value - value to convert in Series
389
390        *parameters (kwargs)*
391
392        - **extkeys**: list (default None) - keys to use if not present in ntv_value
393        - **decode_str**: boolean (default False) - if True, string values are converted
394        in object values
395        - **index**: list (default None) - if present, add the index in Series
396        - **leng**: integer (default None) - leng of the Series (used with single codec value)
397        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
398        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
399        """
400        option = {
401            "extkeys": None,
402            "decode_str": False,
403            "leng": None,
404            "annotated": False,
405        } | kwargs
406        if ntv_value is None:
407            return None
408        ntv = Ntv.obj(ntv_value, decode_str=option["decode_str"])
409
410        ntv_name, typ, codec, parent, ntv_keys, coef, leng_field = (
411            NtvUtil.decode_ntv_tab(ntv, PdUtil.decode_ntv_to_val)
412        )
413        if parent and not option["extkeys"]:
414            return None
415        if coef:
416            ntv_keys = NtvConnector.keysfromcoef(
417                coef, leng_field // coef, option["leng"]
418            )
419        elif option["extkeys"] and parent:
420            ntv_keys = NtvConnector.keysfromderkeys(option["extkeys"], ntv_keys)
421        elif option["extkeys"] and not parent:
422            ntv_keys = option["extkeys"]
423        ntv_codec = Ntv.fast(Ntv.obj_ntv(codec, typ=typ, single=len(codec) == 1))
424        return SeriesConnec.to_series(ntv_codec, ntv_name, ntv_keys, **option)

Generate a Series Object from a Ntv field object

Parameters

  • ntv_value: Ntv object or Ntv value - value to convert in Series

parameters (kwargs)

  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def to_json_ntv(value, name=None, typ=None, **kwargs):
426    @staticmethod
427    def to_json_ntv(value, name=None, typ=None, **kwargs):
428        """convert a Series (value, name, type) into NTV json (json-value, name, type).
429
430        *Parameters*
431
432        - **typ** : string (default None) - type of the NTV object,
433        - **name** : string (default None) - name of the NTV object
434        - **value** : Series values
435        - **table** : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
436        - **no_val** : boolean (default False) - if True return (ntv_name, ntv_type)"""
437
438        table = kwargs.get("table", False)
439        no_val = kwargs.get("no_val", False)
440        srs = value.astype(SeriesConnec.astype.get(value.dtype.name, value.dtype.name))
441        sr_name = srs.name if srs.name else ""
442        ntv_name, name_type = NtvUtil.from_obj_name(sr_name)[:2]
443
444        if table:
445            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name, table=True)
446            ntv_value = PdUtil.table_val(ntv_type, ntv_name, srs)
447            if no_val:
448                return (ntv_name, ntv_type)
449            return (ntv_value, ntv_name, ntv_type)
450        if srs.dtype.name == "category":
451            cdc = pd.Series(srs.cat.categories)
452            ntv_type = PdUtil.ntv_type(name_type, cdc.dtype.name)
453            cat_value = PdUtil.ntv_val(ntv_type, cdc)
454            cat_value = NtvList(cat_value, ntv_type=ntv_type)
455            cod_value = list(srs.cat.codes)
456            coef = NtvConnector.encode_coef(cod_value)
457            ntv_value = [cat_value, NtvList([coef]) if coef else NtvList(cod_value)]
458            ntv_type = None
459        else:
460            ntv_type = PdUtil.ntv_type(name_type, srs.dtype.name)
461            ntv_value = Ntv.from_obj(
462                PdUtil.ntv_val(ntv_type, srs), def_type=ntv_type
463            ).ntv_value
464        if len(ntv_value) == 1:
465            ntv_value[0].set_name(ntv_name)
466            return (
467                ntv_value[0].to_obj(),
468                name,
469                SeriesConnec.clas_typ if not typ else typ,
470            )
471        return (
472            NtvList(ntv_value, ntv_name, ntv_type).to_obj(),
473            name,
474            SeriesConnec.clas_typ if not typ else typ,
475        )

convert a Series (value, name, type) into NTV json (json-value, name, type).

Parameters

  • typ : string (default None) - type of the NTV object,
  • name : string (default None) - name of the NTV object
  • value : Series values
  • table : boolean (default False) - if True return (ntv_value, ntv_name, ntv_type)
  • no_val : boolean (default False) - if True return (ntv_name, ntv_type)
@staticmethod
def to_idx(ser):
477    @staticmethod
478    def to_idx(ser):
479        """convert a Series in categorical data
480
481        *return (dict)*
482
483        { 'codec': 'list of pandas categories',
484          'name': 'name of the series',
485          'keys': 'list of pandas codes' }
486        """
487        idx = ser.astype("category")
488        lis = list(idx.cat.categories)
489        if lis and isinstance(lis[0], pd._libs.tslibs.timestamps.Timestamp):
490            lis = [ts.to_pydatetime().astimezone(datetime.timezone.utc) for ts in lis]
491        return {"codec": lis, "name": ser.name, "keys": list(idx.cat.codes)}

convert a Series in categorical data

return (dict)

{ 'codec': 'list of pandas categories', 'name': 'name of the series', 'keys': 'list of pandas codes' }

@staticmethod
def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
493    @staticmethod
494    def to_series(ntv_codec, ntv_name, ntv_keys, **kwargs):
495        """return a pd.Series from Field data (codec, name, keys)
496
497        *Parameters*
498
499        - **ntv_codec**: Ntv object - codec value to convert in Series values
500        - **ntv_type**: string - default type to apply to convert in dtype
501        - **ntv_name**: string - name of the Series
502
503        *parameters (kwargs)*
504
505        - **index**: list (default None) - if present, add the index in Series
506        - **leng**: integer (default None) - leng of the Series (used with single codec value)
507        - **alias**: boolean (default False) - if True, convert dtype in alias dtype
508        - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
509        """
510        option = {
511            "index": None,
512            "leng": None,
513            "alias": False,
514            "annotated": False,
515        } | kwargs
516        types = SeriesConnec.types.set_index("ntv_type")
517        astype = SeriesConnec.astype
518        leng = option["leng"]
519
520        ntv_type = ntv_codec.type_str
521        len_unique = leng if len(ntv_codec) == 1 and leng else 1
522        pd_convert = ntv_type in types.index
523
524        pd_name, name_type, dtype = PdUtil.pd_name(ntv_name, ntv_type, pd_convert)
525        ntv_obj = PdUtil.ntv_obj(
526            ntv_codec,
527            name_type if pd_convert else ntv_type,
528            option["annotated"],
529            pd_convert,
530        )
531        if ntv_keys:
532            if pd_convert and name_type != "array":
533                categ = SeriesConnec._from_json(ntv_obj, dtype, ntv_type)
534                cat_type = categ.dtype.name
535                categories = categ.astype(astype.get(cat_type, cat_type))
536            else:
537                categories = pd.Series(ntv_obj, dtype="object")
538            cat = pd.CategoricalDtype(categories=categories)
539            data = pd.Categorical.from_codes(codes=ntv_keys, dtype=cat)
540            srs = pd.Series(data, name=pd_name, index=option["index"], dtype="category")
541        else:
542            data = ntv_obj * len_unique
543            if pd_convert:
544                srs = SeriesConnec._from_json(data, dtype, ntv_type, pd_name)
545            else:
546                srs = pd.Series(data, name=pd_name, dtype=dtype)
547
548        if option["alias"]:
549            return srs.astype(astype.get(srs.dtype.name, srs.dtype.name))
550        return srs.astype(SeriesConnec.deftype.get(srs.dtype.name, srs.dtype.name))

return a pd.Series from Field data (codec, name, keys)

Parameters

  • ntv_codec: Ntv object - codec value to convert in Series values
  • ntv_type: string - default type to apply to convert in dtype
  • ntv_name: string - name of the Series

parameters (kwargs)

  • index: list (default None) - if present, add the index in Series
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
@staticmethod
def equals(pdself, pdother):
569    @staticmethod
570    def equals(pdself, pdother):
571        """return True if pd.equals is True and names are equal and dtype of categories are equal"""
572        if not (isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series)):
573            return False
574        if pdself.name != pdother.name:
575            return False
576        type_cat = str(pdself.dtype) == str(pdother.dtype) == "category"
577        if type_cat:
578            return SeriesConnec.equals(pdself.cat.categories, pdother.cat.categories)
579        return as_def_type(pdself).equals(as_def_type(pdother))

return True if pd.equals is True and names are equal and dtype of categories are equal

Inherited Members
json_ntv.ntv_util.NtvConnector
DIC_NTV_CL
DIC_GEO_CL
DIC_DAT_CL
DIC_FCT
DIC_GEO
DIC_CBOR
DIC_OBJ
castable
dic_obj
dic_type
connector
dic_connec
cast
uncast
is_json_class
is_json
keysfromderkeys
encode_coef
keysfromcoef
format_field
init_ntv_keys
def read_json(jsn, **kwargs):
 98def read_json(jsn, **kwargs):
 99    """convert JSON text or JSON Value to pandas Series or Dataframe.
100
101    *parameters*
102
103    - **jsn** : JSON text or JSON value to convert
104    - **extkeys**: list (default None) - keys to use if not present in ntv_value
105    - **decode_str**: boolean (default False) - if True, string values are converted
106    in object values
107    - **leng**: integer (default None) - leng of the Series (used with single codec value)
108    - **alias**: boolean (default False) - if True, convert dtype in alias dtype
109    - **annotated**: boolean (default False) - if True, ntv_codec names are ignored
110    - **series**: boolean (default False) - used only without header. If True
111    JSON data is converted into Series else DataFrame
112    """
113    option = {
114        "extkeys": None,
115        "decode_str": False,
116        "leng": None,
117        "alias": False,
118        "annotated": False,
119        "series": False,
120    } | kwargs
121    jso = json.loads(jsn) if isinstance(jsn, str) else jsn
122    if "schema" in jso:
123        return PdUtil.to_obj_table(jso, **option)
124    ntv = Ntv.from_obj(jso)
125    if ntv.type_str == "field":
126        return SeriesConnec.to_obj_ntv(ntv.ntv_value, **option)
127    if ntv.type_str == "tab":
128        return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)
129    if option["series"]:
130        return SeriesConnec.to_obj_ntv(ntv, **option)
131    return DataFrameConnec.to_obj_ntv(ntv.ntv_value, **option)

convert JSON text or JSON Value to pandas Series or Dataframe.

parameters

  • jsn : JSON text or JSON value to convert
  • extkeys: list (default None) - keys to use if not present in ntv_value
  • decode_str: boolean (default False) - if True, string values are converted in object values
  • leng: integer (default None) - leng of the Series (used with single codec value)
  • alias: boolean (default False) - if True, convert dtype in alias dtype
  • annotated: boolean (default False) - if True, ntv_codec names are ignored
  • series: boolean (default False) - used only without header. If True JSON data is converted into Series else DataFrame
def to_json(pd_array, **kwargs):
186def to_json(pd_array, **kwargs):
187    """convert pandas Series or Dataframe to JSON text or JSON Value.
188
189    *parameters*
190
191    - **pd_array** : Series or Dataframe to convert
192    - **encoded** : boolean (default: False) - if True return a JSON text else a JSON value
193    - **header** : boolean (default: True) - if True the JSON data is included as
194    value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
195    - **table** : boolean (default False) - if True return TableSchema format
196    - **index** : boolean (default True) - if True the index Series is included
197    """
198    option = {"encoded": False, "header": True, "table": False, "index": True} | kwargs
199    option["header"] = False if option["table"] else option["header"]
200    if isinstance(pd_array, pd.Series):
201        jsn = SeriesConnec.to_json_ntv(pd_array, table=option["table"])[0]
202        head = ":field"
203    else:
204        jsn = DataFrameConnec.to_json_ntv(
205            pd_array, table=option["table"], index=option["index"]
206        )[0]
207        head = ":tab"
208    if option["header"]:
209        jsn = {head: jsn}
210    if option["encoded"]:
211        return json.dumps(jsn)
212    return jsn

convert pandas Series or Dataframe to JSON text or JSON Value.

parameters

  • pd_array : Series or Dataframe to convert
  • encoded : boolean (default: False) - if True return a JSON text else a JSON value
  • header : boolean (default: True) - if True the JSON data is included as value in a {key:value} object where key is ':field' for Series or ':tab' for DataFrame
  • table : boolean (default False) - if True return TableSchema format
  • index : boolean (default True) - if True the index Series is included
def as_def_type(pd_array):
56def as_def_type(pd_array):
57    """convert a Series or DataFrame with default dtype"""
58    if isinstance(pd_array, (pd.Series, pd.Index)):
59        return pd_array.astype(
60            SeriesConnec.deftype.get(pd_array.dtype.name, pd_array.dtype.name)
61        )
62    return pd.DataFrame({col: as_def_type(pd_array[col]) for col in pd_array.columns})

convert a Series or DataFrame with default dtype

def equals(pdself, pdother):
89def equals(pdself, pdother):
90    """return True if pd.equals is True and names are equal and dtype of categories are equal"""
91    if isinstance(pdself, pd.Series) and isinstance(pdother, pd.Series):
92        return SeriesConnec.equals(pdself, pdother)
93    if isinstance(pdself, pd.DataFrame) and isinstance(pdother, pd.DataFrame):
94        return DataFrameConnec.equals(pdself, pdother)
95    return False

return True if pd.equals is True and names are equal and dtype of categories are equal

def to_analysis(pd_df, distr=False):
151def to_analysis(pd_df, distr=False):
152    """return a dict with data used in AnaDataset module"""
153
154    keys = [list(pd_df[col].astype("category").cat.codes) for col in pd_df.columns]
155    lencodec = [len(set(key)) for key in keys]
156    if distr:
157        dist = [
158            [_dist(keys[i], keys[j], distr) for j in range(i + 1, len(keys))]
159            for i in range(len(keys) - 1)
160        ]
161    else:
162        dist = [
163            [len(set(zip(keys[i], keys[j]))) for j in range(i + 1, len(keys))]
164            for i in range(len(keys) - 1)
165        ]
166    return {
167        "fields": [
168            {
169                "lencodec": lencodec[ind],
170                "id": pd_df.columns[ind],
171                "mincodec": lencodec[ind],
172            }
173            for ind in range(len(pd_df.columns))
174        ],
175        "name": None,
176        "length": len(pd_df),
177        "relations": {
178            pd_df.columns[i]: {
179                pd_df.columns[j + i + 1]: dist[i][j] for j in range(len(dist[i]))
180            }
181            for i in range(len(dist))
182        },
183    }

return a dict with data used in AnaDataset module

def from_xarray(xdt, **kwargs):
215def from_xarray(xdt, **kwargs):
216    """convert xarray.Dataset to pandas DataFrame.
217
218    *Parameters*
219
220    - **ntv_type**: Boolean (default True) - if False use full_name else json_name
221    - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
222    - **dims**: list of string (default None) - order of dimensions full_name to apply
223    """
224    return Xdataset.from_xarray(xdt).to_dataframe(**kwargs)

convert xarray.Dataset to pandas DataFrame.

Parameters

  • ntv_type: Boolean (default True) - if False use full_name else json_name
  • info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
  • dims: list of string (default None) - order of dimensions full_name to apply
def from_scipp(sci, **kwargs):
227def from_scipp(sci, **kwargs):
228    """convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame.
229
230    *Parameters*
231
232    - **ntv_type**: Boolean (default True) - if False use full_name else json_name
233    - **info**: Boolean (default True) - if True add xdt.info in DataFrame.attrs
234    - **dims**: list of string (default None) - order of dimensions full_name to apply
235    """
236    return Xdataset.from_scipp(sci).to_dataframe(**kwargs)

convert scipp.Dataset / scipp.DataArray / scipp.DataGroup to pandas DataFrame.

Parameters

  • ntv_type: Boolean (default True) - if False use full_name else json_name
  • info: Boolean (default True) - if True add xdt.info in DataFrame.attrs
  • dims: list of string (default None) - order of dimensions full_name to apply