From f484cb3120f1cd0141ac2d220ca2c8846dfc4c5e Mon Sep 17 00:00:00 2001 From: Christoph Alt <christoph.alt@fau.de> Date: Mon, 1 Aug 2022 16:49:02 +0200 Subject: [PATCH] added a function to build data points --- cbutil/__init__.py | 2 +- cbutil/data_points.py | 28 ++++++++++++++++++++++ cbutil/postprocessing/__init__.py | 2 +- tests/test_datapoint.py | 40 +++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 tests/test_datapoint.py diff --git a/cbutil/__init__.py b/cbutil/__init__.py index 81659c2..4e20a5f 100644 --- a/cbutil/__init__.py +++ b/cbutil/__init__.py @@ -2,4 +2,4 @@ from .upload import DBConfig, Uploader from .processing_functions import mesa_pd_text from .postprocessing import * from .util import read_file_line_wise -from .data_points import DataPoint +from .data_points import DataPoint, data_point_factory diff --git a/cbutil/data_points.py b/cbutil/data_points.py index 0885411..3027f4e 100644 --- a/cbutil/data_points.py +++ b/cbutil/data_points.py @@ -11,3 +11,31 @@ class DataPoint: def asdict(self): return asdict(self) + + +def data_point_factory(run, *, + time_key, + measurement_name, + field_keys: set(), + tag_keys: set() = None, + no_tag_keys: set() = None, + ) -> DataPoint: + """ + Returns a data point from a dict. + Parameters: + run(dict): dict from that the data are extracted + time_key(str): key to extract timestamp + tag_keys(set): explicitly define tags + no_tag_key(set): define what are not tags, the tag_keys are inferred automatically + """ + if tag_keys is None and no_tag_keys is None: + raise ValueError("You need either specify tag_keys or no_tag_keys") + + fields = {key: run[key] for key in field_keys} + + if tag_keys is None: + tag_keys = run.keys() - no_tag_keys - field_keys - {time_key} + + tags = {key: run[key]for key in tag_keys} + time = run.get(time_key) + return DataPoint(measurement_name, time, fields=fields, tags=tags) diff --git a/cbutil/postprocessing/__init__.py b/cbutil/postprocessing/__init__.py index 5aa1264..035ee90 100644 --- a/cbutil/postprocessing/__init__.py +++ b/cbutil/postprocessing/__init__.py @@ -1,3 +1,3 @@ -from .plain_text import process_linewise +from .plain_text import process_linewise, iterate_csv from .sqlite import sqlite_context, query_complete_table, build_iterate_query, iterate_all_tables from .sqlite_helper import query_builder diff --git a/tests/test_datapoint.py b/tests/test_datapoint.py new file mode 100644 index 0000000..1d35456 --- /dev/null +++ b/tests/test_datapoint.py @@ -0,0 +1,40 @@ +from cbutil.data_points import data_point_factory +import time +test_dict = { + "timestamp": int(time.time()), + "tag_1key": "tag_1value", + "tag_2key": "tag_2value", + "field_1key": "field_1value", + "field_2key": "field_2value", + "neither_1key": "neither_1value", +} + + +def test_data_point_factory(): + no_tag_keys = {"neither_1key"} + field_keys = {"field_1key", "field_2key"} + dp = data_point_factory(test_dict, + time_key="timestamp", + measurement_name="name", + field_keys=field_keys, + no_tag_keys=no_tag_keys) + + assert dp.time == test_dict["timestamp"] + assert dp.measurement == "name" + assert dp.tags == {k: v for k, v in test_dict.items() if k.startswith("tag_")} + assert dp.fields == {k: v for k, v in test_dict.items() if k.startswith("field_")} + + +def test_data_point_factory_explicit(): + tag_keys = {"tag_1key", "tag_2key"} + field_keys = {"field_1key", "field_2key"} + dp = data_point_factory(test_dict, + time_key="timestamp", + measurement_name="name", + field_keys=field_keys, + tag_keys=tag_keys) + + assert dp.time == test_dict["timestamp"] + assert dp.measurement == "name" + assert dp.tags == {k: v for k, v in test_dict.items() if k.startswith("tag_")} + assert dp.fields == {k: v for k, v in test_dict.items() if k.startswith("field_")} -- GitLab