Source code for eemeter.io

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""

   Copyright 2014-2023 OpenEEmeter contributors

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

"""
import numpy as np
import pandas as pd

__all__ = (
    "meter_data_from_csv",
    "meter_data_from_json",
    "meter_data_to_csv",
    "temperature_data_from_csv",
    "temperature_data_from_json",
    "temperature_data_to_csv",
)


[docs]def meter_data_from_csv(
    filepath_or_buffer,
    tz=None,
    start_col="start",
    value_col="value",
    gzipped=False,
    freq=None,
    **kwargs
):
    """Load meter data from a CSV file.

    Default format::

        start,value
        2017-01-01T00:00:00+00:00,0.31
        2017-01-02T00:00:00+00:00,0.4
        2017-01-03T00:00:00+00:00,0.58

    Parameters
    ----------
    filepath_or_buffer : :any:`str` or file-handle
        File path or object.
    tz : :any:`str`, optional
        E.g., ``'UTC'`` or ``'US/Pacific'``
    start_col : :any:`str`, optional, default ``'start'``
        Date period start column.
    value_col : :any:`str`, optional, default ``'value'``
        Value column, can be in any unit.
    gzipped : :any:`bool`, optional
        Whether file is gzipped.
    freq : :any:`str`, optional
        If given, apply frequency to data using :any:`pandas.DataFrame.resample`.
    **kwargs
        Extra keyword arguments to pass to :any:`pandas.read_csv`, such as
        ``sep='|'``.
    """

    read_csv_kwargs = {
        "usecols": [start_col, value_col],
        "dtype": {value_col: np.float64},
        "parse_dates": [start_col],
        "index_col": start_col,
    }

    if gzipped:
        read_csv_kwargs.update({"compression": "gzip"})

    # allow passing extra kwargs
    read_csv_kwargs.update(kwargs)

    df = pd.read_csv(filepath_or_buffer, **read_csv_kwargs)
    df.index = pd.to_datetime(df.index, utc=True)

    # for pandas<0.24, which doesn't localize even with utc=True
    if df.index.tz is None:
        df.index = df.index.tz_localize("UTC")  # pragma: no cover

    if tz is not None:
        df = df.tz_convert(tz)

    if freq == "hourly":
        df = df.resample("H").sum(min_count=1)
    elif freq == "daily":
        df = df.resample("D").sum(min_count=1)

    return df


[docs]def temperature_data_from_csv(
    filepath_or_buffer,
    tz=None,
    date_col="dt",
    temp_col="tempF",
    gzipped=False,
    freq=None,
    **kwargs
):
    """Load temperature data from a CSV file.

    Default format::

        dt,tempF
        2017-01-01T00:00:00+00:00,21
        2017-01-01T01:00:00+00:00,22.5
        2017-01-01T02:00:00+00:00,23.5

    Parameters
    ----------
    filepath_or_buffer : :any:`str` or file-handle
        File path or object.
    tz : :any:`str`, optional
        E.g., ``'UTC'`` or ``'US/Pacific'``
    date_col : :any:`str`, optional, default ``'dt'``
        Date period start column.
    temp_col : :any:`str`, optional, default ``'tempF'``
        Temperature column.
    gzipped : :any:`bool`, optional
        Whether file is gzipped.
    freq : :any:`str`, optional
        If given, apply frequency to data using :any:`pandas.Series.resample`.
    **kwargs
        Extra keyword arguments to pass to :any:`pandas.read_csv`, such as
        ``sep='|'``.
    """
    read_csv_kwargs = {
        "usecols": [date_col, temp_col],
        "dtype": {temp_col: np.float64},
        "parse_dates": [date_col],
        "index_col": date_col,
    }

    if gzipped:
        read_csv_kwargs.update({"compression": "gzip"})

    # allow passing extra kwargs
    read_csv_kwargs.update(kwargs)

    df = pd.read_csv(filepath_or_buffer, **read_csv_kwargs)
    df.index = pd.to_datetime(df.index, utc=True)

    # for pandas<0.24, which doesn't localize even with utc=True
    if df.index.tz is None:
        df.index = df.index.tz_localize("UTC")  # pragma: no cover

    if tz is not None:
        df = df.tz_convert(tz)

    if freq == "hourly":
        df = df.resample("H").sum(min_count=1)

    return df[temp_col]


[docs]def meter_data_from_json(data, orient="list"):
    """Load meter data from json.

    Default format::

        [
            ['2017-01-01T00:00:00+00:00', 3.5],
            ['2017-02-01T00:00:00+00:00', 0.4],
            ['2017-03-01T00:00:00+00:00', 0.46],
        ]

    records format::

        [
            {'start': '2017-01-01T00:00:00+00:00', 'value': 3.5},
            {'start': '2017-02-01T00:00:00+00:00', 'value': 0.4},
            {'start': '2017-03-01T00:00:00+00:00', 'value': 0.46},
        ]

    Parameters
    ----------
    data : :any:`list`
        A list of meter data, with each row representing a single record.
    orient: :any: `str`
        Format of `data` parameter:
            - `list` (a list of lists, with the first element as start date)
            - `records` (a list of dicts)

    Returns
    -------
    df : :any:`pandas.DataFrame`
        DataFrame with a single column (``'value'``) and a
        :any:`pandas.DatetimeIndex`. A second column (``'estimated'``)
        may also be included if the input data contained an estimated boolean flag.
    """

    def _empty_meter_data_dataframe():
        return pd.DataFrame(
            {"value": []}, index=pd.DatetimeIndex([], tz="UTC", name="start")
        )

    if data is None:
        return _empty_meter_data_dataframe()

    if orient == "list":
        df = pd.DataFrame(data, columns=["start", "value"])
        df["start"] = pd.to_datetime(df.start, utc=True)
        df = df.set_index("start")
        return df
    elif orient == "records":

        def _noneify_meter_data_row(row):
            value = row["value"]
            if value is not None:
                try:
                    value = float(value)
                except ValueError:
                    value = None
            out_row = {"start": row["start"], "value": value}
            if "estimated" in row:
                estimated = row.get("estimated")
                out_row["estimated"] = estimated in [True, "true", "True", 1, "1"]
            return out_row

        noneified_data = [_noneify_meter_data_row(row) for row in data]
        df = pd.DataFrame(noneified_data)
        if df.empty:
            return _empty_meter_data_dataframe()
        df["start"] = pd.to_datetime(df.start, utc=True)
        df = df.set_index("start")
        df["value"] = df["value"].astype(float)
        if "estimated" in df.columns:
            df["estimated"] = df["estimated"].fillna(False).astype(bool)
        return df
    else:
        raise ValueError("orientation not recognized.")


[docs]def temperature_data_from_json(data, orient="list"):
    """Load temperature data from json. (Must be given in degrees
    Fahrenheit).

    Default format::

        [
            ['2017-01-01T00:00:00+00:00', 3.5],
            ['2017-01-01T01:00:00+00:00', 5.4],
            ['2017-01-01T02:00:00+00:00', 7.4],
        ]

    Parameters
    ----------
    data : :any:`list`
        List elements are each a rows of data.

    Returns
    -------
    series : :any:`pandas.Series`
        DataFrame with a single column (``'tempF'``) and a
        :any:`pandas.DatetimeIndex`.
    """
    if orient == "list":
        df = pd.DataFrame(data, columns=["dt", "tempF"])
        series = df.tempF
        series.index = pd.to_datetime(df.dt, utc=True)
        return series
    else:
        raise ValueError("orientation not recognized.")


[docs]def meter_data_to_csv(meter_data, path_or_buf):
    """Write meter data to CSV. See also :any:`pandas.DataFrame.to_csv`.

    Parameters
    ----------
    meter_data : :any:`pandas.DataFrame`
        Meter data DataFrame with ``'value'`` column and
        :any:`pandas.DatetimeIndex`.
    path_or_buf : :any:`str` or file handle, default None
        File path or object, if None is provided the result is returned as a string.
    """
    if meter_data.index.name is None:
        meter_data.index.name = "start"
    return meter_data.to_csv(path_or_buf, index=True)


[docs]def temperature_data_to_csv(temperature_data, path_or_buf):
    """Write temperature data to CSV. See also :any:`pandas.DataFrame.to_csv`.

    Parameters
    ----------
    temperature_data : :any:`pandas.Series`
        Temperature data series with :any:`pandas.DatetimeIndex`.
    path_or_buf : :any:`str` or file handle, default None
        File path or object, if None is provided the result is returned as a string.
    """
    if temperature_data.index.name is None:
        temperature_data.index.name = "dt"
    if temperature_data.name is None:
        temperature_data.name = "temperature"
    return temperature_data.to_frame().to_csv(path_or_buf, index=True)
Source code for eemeter.io

Useful Links

Table of Contents