From f484cb3120f1cd0141ac2d220ca2c8846dfc4c5e Mon Sep 17 00:00:00 2001
From: Christoph Alt <christoph.alt@fau.de>
Date: Mon, 1 Aug 2022 16:49:02 +0200
Subject: [PATCH] added a function to build data points

---
 cbutil/__init__.py                |  2 +-
 cbutil/data_points.py             | 28 ++++++++++++++++++++++
 cbutil/postprocessing/__init__.py |  2 +-
 tests/test_datapoint.py           | 40 +++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_datapoint.py

diff --git a/cbutil/__init__.py b/cbutil/__init__.py
index 81659c2..4e20a5f 100644
--- a/cbutil/__init__.py
+++ b/cbutil/__init__.py
@@ -2,4 +2,4 @@ from .upload import DBConfig, Uploader
 from .processing_functions import mesa_pd_text
 from .postprocessing import *
 from .util import read_file_line_wise
-from .data_points import DataPoint
+from .data_points import DataPoint, data_point_factory
diff --git a/cbutil/data_points.py b/cbutil/data_points.py
index 0885411..3027f4e 100644
--- a/cbutil/data_points.py
+++ b/cbutil/data_points.py
@@ -11,3 +11,31 @@ class DataPoint:
 
     def asdict(self):
         return asdict(self)
+
+
+def data_point_factory(run, *,
+                       time_key,
+                       measurement_name,
+                       field_keys: set(),
+                       tag_keys: set() = None,
+                       no_tag_keys: set() = None,
+                       ) -> DataPoint:
+    """
+    Returns a data point from a dict.
+        Parameters:
+            run(dict): dict from that the data are extracted
+            time_key(str): key to extract timestamp
+            tag_keys(set): explicitly define tags
+            no_tag_key(set): define what are not tags, the tag_keys are inferred automatically
+    """
+    if tag_keys is None and no_tag_keys is None:
+        raise ValueError("You need either specify tag_keys or no_tag_keys")
+
+    fields = {key: run[key] for key in field_keys}
+
+    if tag_keys is None:
+        tag_keys = run.keys() - no_tag_keys - field_keys - {time_key}
+
+    tags = {key: run[key]for key in tag_keys}
+    time = run.get(time_key)
+    return DataPoint(measurement_name, time, fields=fields, tags=tags)
diff --git a/cbutil/postprocessing/__init__.py b/cbutil/postprocessing/__init__.py
index 5aa1264..035ee90 100644
--- a/cbutil/postprocessing/__init__.py
+++ b/cbutil/postprocessing/__init__.py
@@ -1,3 +1,3 @@
-from .plain_text import process_linewise
+from .plain_text import process_linewise, iterate_csv
 from .sqlite import sqlite_context, query_complete_table, build_iterate_query, iterate_all_tables
 from .sqlite_helper import query_builder
diff --git a/tests/test_datapoint.py b/tests/test_datapoint.py
new file mode 100644
index 0000000..1d35456
--- /dev/null
+++ b/tests/test_datapoint.py
@@ -0,0 +1,40 @@
+from cbutil.data_points import data_point_factory
+import time
+test_dict = {
+    "timestamp": int(time.time()),
+    "tag_1key": "tag_1value",
+    "tag_2key": "tag_2value",
+    "field_1key": "field_1value",
+    "field_2key": "field_2value",
+    "neither_1key": "neither_1value",
+}
+
+
+def test_data_point_factory():
+    no_tag_keys = {"neither_1key"}
+    field_keys = {"field_1key", "field_2key"}
+    dp = data_point_factory(test_dict,
+                            time_key="timestamp",
+                            measurement_name="name",
+                            field_keys=field_keys,
+                            no_tag_keys=no_tag_keys)
+
+    assert dp.time == test_dict["timestamp"]
+    assert dp.measurement == "name"
+    assert dp.tags == {k: v for k, v in test_dict.items() if k.startswith("tag_")}
+    assert dp.fields == {k: v for k, v in test_dict.items() if k.startswith("field_")}
+
+
+def test_data_point_factory_explicit():
+    tag_keys = {"tag_1key", "tag_2key"}
+    field_keys = {"field_1key", "field_2key"}
+    dp = data_point_factory(test_dict,
+                            time_key="timestamp",
+                            measurement_name="name",
+                            field_keys=field_keys,
+                            tag_keys=tag_keys)
+
+    assert dp.time == test_dict["timestamp"]
+    assert dp.measurement == "name"
+    assert dp.tags == {k: v for k, v in test_dict.items() if k.startswith("tag_")}
+    assert dp.fields == {k: v for k, v in test_dict.items() if k.startswith("field_")}
-- 
GitLab