Provenance

# Author: Tamás Gál <tgal@km3net.de>
# License: MIT
# Date: 2020-08-23

Introduction

KM3Pipe uses the provenance functionality from thepipe which automatically tracks each activity. This document shows how it works.

import km3pipe as kp
import km3modules as km
import numpy as np

Some dummy modules

class RandomNumberGenerator(kp.Module):
    def configure(self):
        self.h5loc = self.require("h5loc")
        self.n = self.get("n", default=10)

    def process(self, blob):
        table = kp.Table({"x": np.random.randn(self.n)}, h5loc=self.h5loc)
        blob["RandomNumbers"] = table
        return blob

Creating a simple pipeline

We create a very basic pipeline:

pipe = kp.Pipeline()
pipe.attach(km.StatusBar, every=1)
pipe.attach(km.mc.GlobalRandomState, seed=23)
pipe.attach(RandomNumberGenerator, h5loc="/rnd", n=5)
pipe.attach(kp.io.HDF5Sink, filename="rnd.h5")
pipe.drain(11)
Pipeline and module initialisation took 0.002s (CPU 0.002s).
--------------------------[ Blob       1 ]---------------------------
--------------------------[ Blob       2 ]---------------------------
--------------------------[ Blob       3 ]---------------------------
--------------------------[ Blob       4 ]---------------------------
--------------------------[ Blob       5 ]---------------------------
--------------------------[ Blob       6 ]---------------------------
--------------------------[ Blob       7 ]---------------------------
--------------------------[ Blob       8 ]---------------------------
--------------------------[ Blob       9 ]---------------------------
--------------------------[ Blob      10 ]---------------------------
--------------------------[ Blob      11 ]---------------------------
================================[ . ]================================
2024-03-28 03:19:15 km3pipe.io.hdf5.HDF5Sink.HDF5Sink: HDF5 file written to: rnd.h5
============================================================
11 cycles drained in 0.036489s (CPU 0.036489s). Memory peak: 493.36 MB
  wall  mean: 0.002269s  medi: 0.001875s  min: 0.001599s  max: 0.006034s  std: 0.001215s
  CPU   mean: 0.002271s  medi: 0.001876s  min: 0.001600s  max: 0.006036s  std: 0.001215s

Blob([('StatusBar', None), ('GlobalRandomState', None), ('RandomNumberGenerator', None), ('HDF5Sink', None)])

Provenance

The provenance information is managed by the singleton class Provenance. To access all the provenance information, use the as_json() method:

print(kp.Provenance().as_json(indent=2))
[
  {
    "uuid": "65b63b41-cb20-4acf-a784-6eb0851b115b",
    "name": "pipeline",
    "parent_activity": "a89825fc-84f2-4980-bc2f-5a933295f45d",
    "child_activities": [],
    "start": {
      "time_utc": "2024-03-28T03:19:14.999601+00:00",
      "peak_memory": 493.359375
    },
    "stop": {
      "time_utc": "2024-03-28T03:19:15.065218+00:00",
      "peak_memory": 493.359375
    },
    "system": {
      "thepipe_version": "1.3.8",
      "executable": "/builds/km3py/km3pipe/venv/bin/python3",
      "arguments": [
        "/builds/km3py/km3pipe/examples/plot_provenance.py"
      ],
      "environment": {
        "PATH": "/builds/km3py/km3pipe/venv/bin:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
        "LD_LIBRARY_PATH": null,
        "DYLD_LIBRARY_PATH": null,
        "USER": null,
        "HOME": "/root",
        "SHELL": null,
        "VIRTUAL_ENV": "/builds/km3py/km3pipe/venv",
        "CONDA_DEFAULT_ENV": null,
        "CONDA_PREFIX": null,
        "CONDA_EXE": null,
        "CONDA_PROMOMPT_MODIFIER": null,
        "CONDA_SHLVL": null,
        "APPVEYOR": null,
        "CI": "true",
        "CIRCLECI": null,
        "CONTINUOUS_INTEGRATION": null,
        "GITHUB_ACTIONS": null,
        "GITLAB_CI": "true",
        "TF_BUILD": null,
        "TRAVIS": null
      },
      "platform": {
        "architecture_bits": "64bit",
        "architecture_linkage": "ELF",
        "machine": "x86_64",
        "processor": "",
        "node": "runner-qczggpte-project-27-concurrent-0",
        "version": "#137-Ubuntu SMP Wed Jun 15 13:33:07 UTC 2022",
        "system": "Linux",
        "release": "5.4.0-121-generic",
        "libcver": [
          "glibc",
          "2.28"
        ],
        "num_cpus": 128,
        "boot_time": "2022-07-01T13:07:17+00:00"
      },
      "python": {
        "version_string": "3.9.2 (default, Feb 19 2021, 17:11:58) \n[GCC 8.3.0]",
        "version": [
          "3",
          "9",
          "2"
        ],
        "compiler": "GCC 8.3.0",
        "implementation": "CPython",
        "packages": [
          {
            "name": "alabaster",
            "version": "0.7.16"
          },
          {
            "name": "anyascii",
            "version": "0.3.2"
          },
          {
            "name": "astroid",
            "version": "3.1.0"
          },
          {
            "name": "astropy-iers-data",
            "version": "0.2024.3.25.0.29.50"
          },
          {
            "name": "astropy",
            "version": "6.0.1"
          },
          {
            "name": "asttokens",
            "version": "2.4.1"
          },
          {
            "name": "attrs",
            "version": "23.2.0"
          },
          {
            "name": "awkward0",
            "version": "0.15.5"
          },
          {
            "name": "awkward",
            "version": "1.10.5"
          },
          {
            "name": "babel",
            "version": "2.14.0"
          },
          {
            "name": "black",
            "version": "22.3.0"
          },
          {
            "name": "blosc2",
            "version": "2.5.1"
          },
          {
            "name": "cachetools",
            "version": "5.3.3"
          },
          {
            "name": "certifi",
            "version": "2024.2.2"
          },
          {
            "name": "cffi",
            "version": "1.16.0"
          },
          {
            "name": "charset-normalizer",
            "version": "3.3.2"
          },
          {
            "name": "click",
            "version": "8.1.7"
          },
          {
            "name": "colorama",
            "version": "0.4.6"
          },
          {
            "name": "comm",
            "version": "0.2.2"
          },
          {
            "name": "contourpy",
            "version": "1.2.0"
          },
          {
            "name": "coverage",
            "version": "7.4.4"
          },
          {
            "name": "cryptography",
            "version": "42.0.5"
          },
          {
            "name": "cycler",
            "version": "0.12.1"
          },
          {
            "name": "debugpy",
            "version": "1.8.1"
          },
          {
            "name": "decorator",
            "version": "5.1.1"
          },
          {
            "name": "dill",
            "version": "0.3.8"
          },
          {
            "name": "docopt",
            "version": "0.6.2"
          },
          {
            "name": "docutils",
            "version": "0.20.1"
          },
          {
            "name": "exceptiongroup",
            "version": "1.2.0"
          },
          {
            "name": "executing",
            "version": "2.0.1"
          },
          {
            "name": "flake8",
            "version": "7.0.0"
          },
          {
            "name": "fonttools",
            "version": "4.50.0"
          },
          {
            "name": "h5py",
            "version": "3.10.0"
          },
          {
            "name": "healpy",
            "version": "1.16.6"
          },
          {
            "name": "hepunits",
            "version": "2.3.3"
          },
          {
            "name": "idna",
            "version": "3.6"
          },
          {
            "name": "imagesize",
            "version": "1.4.1"
          },
          {
            "name": "importlib-metadata",
            "version": "7.1.0"
          },
          {
            "name": "importlib-resources",
            "version": "6.4.0"
          },
          {
            "name": "iniconfig",
            "version": "2.0.0"
          },
          {
            "name": "ipykernel",
            "version": "6.29.4"
          },
          {
            "name": "ipython",
            "version": "8.18.1"
          },
          {
            "name": "isort",
            "version": "5.13.2"
          },
          {
            "name": "jaraco.classes",
            "version": "3.3.1"
          },
          {
            "name": "jaraco.context",
            "version": "4.3.0"
          },
          {
            "name": "jaraco.functools",
            "version": "4.0.0"
          },
          {
            "name": "jedi",
            "version": "0.19.1"
          },
          {
            "name": "jeepney",
            "version": "0.8.0"
          },
          {
            "name": "jinja2",
            "version": "3.1.3"
          },
          {
            "name": "jupyter-client",
            "version": "8.6.1"
          },
          {
            "name": "jupyter-core",
            "version": "5.7.2"
          },
          {
            "name": "keyring",
            "version": "25.0.0"
          },
          {
            "name": "kiwisolver",
            "version": "1.4.5"
          },
          {
            "name": "km3astro",
            "version": "0.16.0"
          },
          {
            "name": "km3db",
            "version": "0.13.3"
          },
          {
            "name": "km3flux",
            "version": "1.0.3"
          },
          {
            "name": "km3io",
            "version": "1.1.0"
          },
          {
            "name": "km3net-testdata",
            "version": "0.4.12"
          },
          {
            "name": "km3pipe",
            "version": "9.13.12.dev3+g68460ea0"
          },
          {
            "name": "llvmlite",
            "version": "0.42.0"
          },
          {
            "name": "markdown-it-py",
            "version": "3.0.0"
          },
          {
            "name": "markupsafe",
            "version": "2.1.5"
          },
          {
            "name": "matplotlib-inline",
            "version": "0.1.6"
          },
          {
            "name": "matplotlib",
            "version": "3.8.3"
          },
          {
            "name": "mccabe",
            "version": "0.7.0"
          },
          {
            "name": "mdurl",
            "version": "0.1.2"
          },
          {
            "name": "mock",
            "version": "5.1.0"
          },
          {
            "name": "more-itertools",
            "version": "10.2.0"
          },
          {
            "name": "msgpack",
            "version": "1.0.8"
          },
          {
            "name": "mypy-extensions",
            "version": "1.0.0"
          },
          {
            "name": "ndindex",
            "version": "1.8"
          },
          {
            "name": "nest-asyncio",
            "version": "1.6.0"
          },
          {
            "name": "nh3",
            "version": "0.2.17"
          },
          {
            "name": "numba",
            "version": "0.59.1"
          },
          {
            "name": "numexpr",
            "version": "2.9.0"
          },
          {
            "name": "numpy",
            "version": "1.26.4"
          },
          {
            "name": "numpydoc",
            "version": "1.6.0"
          },
          {
            "name": "packaging",
            "version": "24.0"
          },
          {
            "name": "pandas",
            "version": "2.2.1"
          },
          {
            "name": "parso",
            "version": "0.8.3"
          },
          {
            "name": "particle",
            "version": "0.23.1"
          },
          {
            "name": "passlib",
            "version": "1.7.4"
          },
          {
            "name": "pathspec",
            "version": "0.12.1"
          },
          {
            "name": "pexpect",
            "version": "4.9.0"
          },
          {
            "name": "pillow",
            "version": "10.2.0"
          },
          {
            "name": "pip",
            "version": "24.0"
          },
          {
            "name": "pkginfo",
            "version": "1.10.0"
          },
          {
            "name": "platformdirs",
            "version": "4.2.0"
          },
          {
            "name": "pluggy",
            "version": "1.4.0"
          },
          {
            "name": "pockets",
            "version": "0.9.1"
          },
          {
            "name": "prompt-toolkit",
            "version": "3.0.43"
          },
          {
            "name": "psutil",
            "version": "5.9.8"
          },
          {
            "name": "ptyprocess",
            "version": "0.7.0"
          },
          {
            "name": "pure-eval",
            "version": "0.2.2"
          },
          {
            "name": "py-cpuinfo",
            "version": "9.0.0"
          },
          {
            "name": "pycodestyle",
            "version": "2.11.1"
          },
          {
            "name": "pycparser",
            "version": "2.21"
          },
          {
            "name": "pydocstyle",
            "version": "6.3.0"
          },
          {
            "name": "pyerfa",
            "version": "2.0.1.1"
          },
          {
            "name": "pyflakes",
            "version": "3.2.0"
          },
          {
            "name": "pygments",
            "version": "2.17.2"
          },
          {
            "name": "pylint",
            "version": "3.1.0"
          },
          {
            "name": "pypandoc",
            "version": "1.13"
          },
          {
            "name": "pyparsing",
            "version": "3.1.2"
          },
          {
            "name": "pytest-cov",
            "version": "5.0.0"
          },
          {
            "name": "pytest-flake8",
            "version": "1.1.1"
          },
          {
            "name": "pytest-watch",
            "version": "4.2.0"
          },
          {
            "name": "pytest",
            "version": "8.1.1"
          },
          {
            "name": "python-dateutil",
            "version": "2.9.0.post0"
          },
          {
            "name": "pytz",
            "version": "2024.1"
          },
          {
            "name": "pyyaml",
            "version": "6.0.1"
          },
          {
            "name": "pyzmq",
            "version": "25.1.2"
          },
          {
            "name": "readme-renderer",
            "version": "43.0"
          },
          {
            "name": "requests-toolbelt",
            "version": "1.0.0"
          },
          {
            "name": "requests",
            "version": "2.31.0"
          },
          {
            "name": "rfc3986",
            "version": "2.0.0"
          },
          {
            "name": "rich",
            "version": "13.7.1"
          },
          {
            "name": "scipy",
            "version": "1.12.0"
          },
          {
            "name": "seaborn",
            "version": "0.13.2"
          },
          {
            "name": "secretstorage",
            "version": "3.3.3"
          },
          {
            "name": "setuptools-scm",
            "version": "8.0.4"
          },
          {
            "name": "setuptools",
            "version": "69.1.0"
          },
          {
            "name": "six",
            "version": "1.16.0"
          },
          {
            "name": "snowballstemmer",
            "version": "2.2.0"
          },
          {
            "name": "sphinx-autoapi",
            "version": "3.0.0"
          },
          {
            "name": "sphinx-gallery",
            "version": "0.15.0"
          },
          {
            "name": "sphinx-rtd-theme",
            "version": "2.0.0"
          },
          {
            "name": "sphinx",
            "version": "7.2.6"
          },
          {
            "name": "sphinxcontrib-applehelp",
            "version": "1.0.8"
          },
          {
            "name": "sphinxcontrib-devhelp",
            "version": "1.0.6"
          },
          {
            "name": "sphinxcontrib-htmlhelp",
            "version": "2.0.5"
          },
          {
            "name": "sphinxcontrib-jquery",
            "version": "4.1"
          },
          {
            "name": "sphinxcontrib-jsmath",
            "version": "1.0.1"
          },
          {
            "name": "sphinxcontrib-napoleon",
            "version": "0.7"
          },
          {
            "name": "sphinxcontrib-programoutput",
            "version": "0.17"
          },
          {
            "name": "sphinxcontrib-qthelp",
            "version": "1.0.7"
          },
          {
            "name": "sphinxcontrib-serializinghtml",
            "version": "1.1.10"
          },
          {
            "name": "sphinxcontrib-websupport",
            "version": "1.2.7"
          },
          {
            "name": "stack-data",
            "version": "0.6.3"
          },
          {
            "name": "tables",
            "version": "3.9.2"
          },
          {
            "name": "tabulate",
            "version": "0.9.0"
          },
          {
            "name": "thepipe",
            "version": "1.3.8"
          },
          {
            "name": "toml",
            "version": "0.10.2"
          },
          {
            "name": "tomli",
            "version": "2.0.1"
          },
          {
            "name": "tomlkit",
            "version": "0.12.4"
          },
          {
            "name": "tornado",
            "version": "6.4"
          },
          {
            "name": "tqdm",
            "version": "4.66.2"
          },
          {
            "name": "traitlets",
            "version": "5.14.2"
          },
          {
            "name": "twine",
            "version": "5.0.0"
          },
          {
            "name": "typing-extensions",
            "version": "4.10.0"
          },
          {
            "name": "tzdata",
            "version": "2024.1"
          },
          {
            "name": "uproot3-methods",
            "version": "0.10.1"
          },
          {
            "name": "uproot3",
            "version": "3.14.4"
          },
          {
            "name": "uproot",
            "version": "4.3.7"
          },
          {
            "name": "urllib3",
            "version": "2.2.1"
          },
          {
            "name": "urwid",
            "version": "2.6.10"
          },
          {
            "name": "utm",
            "version": "0.7.0"
          },
          {
            "name": "watchdog",
            "version": "4.0.0"
          },
          {
            "name": "wcwidth",
            "version": "0.2.13"
          },
          {
            "name": "wheel",
            "version": "0.42.0"
          },
          {
            "name": "zipp",
            "version": "3.18.1"
          }
        ]
      },
      "start_time_utc": "2024-03-28T03:19:15.027979+00:00"
    },
    "input": [],
    "output": [
      {
        "url": "rnd.h5",
        "uuid": "538aea20-d9e1-46e0-96eb-b77fc524fd3e",
        "comment": "HDF5Sink output"
      }
    ],
    "samples": [],
    "status": "completed",
    "configuration": {
      "planned_cycles": 11,
      "modules": [
        {
          "name": "StatusBar",
          "parameters": {
            "every": 1
          }
        },
        {
          "name": "GlobalRandomState",
          "parameters": {
            "seed": 23
          }
        },
        {
          "name": "RandomNumberGenerator",
          "parameters": {
            "h5loc": "/rnd",
            "n": 5
          }
        },
        {
          "name": "HDF5Sink",
          "parameters": {
            "filename": "rnd.h5"
          }
        }
      ],
      "cycles": 11
    },
    "duration": 0.065617
  }
]

Total running time of the script: (0 minutes 0.100 seconds)

Gallery generated by Sphinx-Gallery