diff --git a/cbutil/__init__.py b/cbutil/__init__.py index 81659c21c7818b5d04b8e6689febb237b79c923e..4e20a5f52b802e6b992d8c8e61f72d4088b2ef9e 100644 --- a/cbutil/__init__.py +++ b/cbutil/__init__.py @@ -2,4 +2,4 @@ from .upload import DBConfig, Uploader from .processing_functions import mesa_pd_text from .postprocessing import * from .util import read_file_line_wise -from .data_points import DataPoint +from .data_points import DataPoint, data_point_factory diff --git a/cbutil/data_points.py b/cbutil/data_points.py index 08854115c08d1efe7e854a431536d371c8ffef5f..3027f4e2b99553c95b6b3f5b5eb947135c94b21c 100644 --- a/cbutil/data_points.py +++ b/cbutil/data_points.py @@ -11,3 +11,31 @@ class DataPoint: def asdict(self): return asdict(self) + + +def data_point_factory(run, *, + time_key, + measurement_name, + field_keys: set(), + tag_keys: set() = None, + no_tag_keys: set() = None, + ) -> DataPoint: + """ + Returns a data point from a dict. + Parameters: + run(dict): dict from that the data are extracted + time_key(str): key to extract timestamp + tag_keys(set): explicitly define tags + no_tag_key(set): define what are not tags, the tag_keys are inferred automatically + """ + if tag_keys is None and no_tag_keys is None: + raise ValueError("You need either specify tag_keys or no_tag_keys") + + fields = {key: run[key] for key in field_keys} + + if tag_keys is None: + tag_keys = run.keys() - no_tag_keys - field_keys - {time_key} + + tags = {key: run[key]for key in tag_keys} + time = run.get(time_key) + return DataPoint(measurement_name, time, fields=fields, tags=tags) diff --git a/cbutil/postprocessing/__init__.py b/cbutil/postprocessing/__init__.py index 5aa1264f49329d6e92eb50bc8de62f82b8271011..035ee908616633cf23a067a882795ae8abf63aba 100644 --- a/cbutil/postprocessing/__init__.py +++ b/cbutil/postprocessing/__init__.py @@ -1,3 +1,3 @@ -from .plain_text import process_linewise +from .plain_text import process_linewise, iterate_csv from .sqlite import sqlite_context, query_complete_table, build_iterate_query, iterate_all_tables from .sqlite_helper import query_builder diff --git a/tests/test_datapoint.py b/tests/test_datapoint.py new file mode 100644 index 0000000000000000000000000000000000000000..1d354560abf5196fec2e4ee5cd3c3c830e1cf5bc --- /dev/null +++ b/tests/test_datapoint.py @@ -0,0 +1,40 @@ +from cbutil.data_points import data_point_factory +import time +test_dict = { + "timestamp": int(time.time()), + "tag_1key": "tag_1value", + "tag_2key": "tag_2value", + "field_1key": "field_1value", + "field_2key": "field_2value", + "neither_1key": "neither_1value", +} + + +def test_data_point_factory(): + no_tag_keys = {"neither_1key"} + field_keys = {"field_1key", "field_2key"} + dp = data_point_factory(test_dict, + time_key="timestamp", + measurement_name="name", + field_keys=field_keys, + no_tag_keys=no_tag_keys) + + assert dp.time == test_dict["timestamp"] + assert dp.measurement == "name" + assert dp.tags == {k: v for k, v in test_dict.items() if k.startswith("tag_")} + assert dp.fields == {k: v for k, v in test_dict.items() if k.startswith("field_")} + + +def test_data_point_factory_explicit(): + tag_keys = {"tag_1key", "tag_2key"} + field_keys = {"field_1key", "field_2key"} + dp = data_point_factory(test_dict, + time_key="timestamp", + measurement_name="name", + field_keys=field_keys, + tag_keys=tag_keys) + + assert dp.time == test_dict["timestamp"] + assert dp.measurement == "name" + assert dp.tags == {k: v for k, v in test_dict.items() if k.startswith("tag_")} + assert dp.fields == {k: v for k, v in test_dict.items() if k.startswith("field_")}