Source code for etna.transforms.timestamp.fourier
import math
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence
import numpy as np
import pandas as pd
from etna.distributions import BaseDistribution
from etna.distributions import IntDistribution
from etna.transforms.base import FutureMixin
from etna.transforms.base import IrreversibleTransform
[docs]class FourierTransform(IrreversibleTransform, FutureMixin):
"""Adds fourier features to the dataset.
Notes
-----
To understand how transform works we recommend:
`Fourier series <https://otexts.com/fpp2/useful-predictors.html#fourier-series>`_.
* Parameter ``period`` is responsible for the seasonality we want to capture.
* Parameters ``order`` and ``mods`` define which harmonics will be used.
Parameter ``order`` is a more user-friendly version of ``mods``.
For example, ``order=2`` can be represented as ``mods=[1, 2, 3, 4]`` if ``period`` > 4 and
as ``mods=[1, 2, 3]`` if 3 <= ``period`` <= 4.
"""
def __init__(
self,
period: float,
order: Optional[int] = None,
mods: Optional[Sequence[int]] = None,
out_column: Optional[str] = None,
):
"""Create instance of FourierTransform.
Parameters
----------
period:
the period of the seasonality to capture in frequency units of time series;
``period`` should be >= 2
order:
upper order of Fourier components to include;
``order`` should be >= 1 and <= ceil(period/2))
mods:
alternative and precise way of defining which harmonics will be used,
for example ``mods=[1, 3, 4]`` means that sin of the first order
and sin and cos of the second order will be used;
``mods`` should be >= 1 and < period
out_column:
* if set, name of added column, the final name will be '{out_columnt}_{mod}';
* if don't set, name will be ``transform.__repr__()``,
repr will be made for transform that creates exactly this column
Raises
------
ValueError:
if period < 2
ValueError:
if both or none of order, mods is set
ValueError:
if order is < 1 or > ceil(period/2)
ValueError:
if at least one mod is < 1 or >= period
"""
if period < 2:
raise ValueError("Period should be at least 2")
self.period = period
self.order = order
self.mods = mods
self._mods: Sequence[int]
if order is not None and mods is None:
if order < 1 or order > math.ceil(period / 2):
raise ValueError("Order should be within [1, ceil(period/2)] range")
self._mods = [mod for mod in range(1, 2 * order + 1) if mod < period]
elif mods is not None and order is None:
if min(mods) < 1 or max(mods) >= period:
raise ValueError("Every mod should be within [1, int(period)) range")
self._mods = mods
else:
raise ValueError("There should be exactly one option set: order or mods")
self.out_column = out_column
super().__init__(required_features=["target"])
def _get_column_name(self, mod: int) -> str:
if self.out_column is None:
return f"{FourierTransform(period=self.period, mods=[mod]).__repr__()}"
else:
return f"{self.out_column}_{mod}"
[docs] def get_regressors_info(self) -> List[str]:
"""Return the list with regressors created by the transform."""
output_columns = [self._get_column_name(mod=mod) for mod in self._mods]
return output_columns
def _fit(self, df: pd.DataFrame) -> "FourierTransform":
"""Fit method does nothing and is kept for compatibility.
Parameters
----------
df:
dataframe with data.
Returns
-------
result: FourierTransform
"""
return self
@staticmethod
def _construct_answer(df: pd.DataFrame, features: pd.DataFrame) -> pd.DataFrame:
dataframes = []
for seg in df.columns.get_level_values("segment").unique():
tmp = df[seg].join(features)
_idx = tmp.columns.to_frame()
_idx.insert(0, "segment", seg)
tmp.columns = pd.MultiIndex.from_frame(_idx)
dataframes.append(tmp)
result = pd.concat(dataframes, axis=1).sort_index(axis=1)
result.columns.names = ["segment", "feature"]
return result
def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Add harmonics to the dataset.
Parameters
----------
df:
dataframe with data to transform.
Returns
-------
result: pd.Dataframe
transformed dataframe
"""
features = pd.DataFrame(index=df.index)
elapsed = np.arange(features.shape[0]) / self.period
for mod in self._mods:
order = (mod + 1) // 2
is_cos = mod % 2 == 0
features[self._get_column_name(mod)] = np.sin(2 * np.pi * order * elapsed + np.pi / 2 * is_cos)
return self._construct_answer(df, features)
[docs] def params_to_tune(self) -> Dict[str, BaseDistribution]:
"""Get default grid for tuning hyperparameters.
If ``self.order`` is set then this grid tunes ``order`` parameter:
Other parameters are expected to be set by the user.
Returns
-------
:
Grid to tune.
"""
if self.mods is not None:
return {}
max_value = math.ceil(self.period / 2)
return {"order": IntDistribution(low=1, high=max_value, log=True)}