Feature Extraction Benchmarks¶
This walkthrough serves as a benchmark for comparing functime
with tsfresh
feature extraction functions. We begin the analysis by evaluating the speed of feature extraction across time series of three different sizes: 100K, 1M, and 9M. Next, we assess the speed in a groupby and aggregation context, making a performance comparison between functime with polats and tsfresh using pandas.
%%capture
%pip install perfplot
%pip install pandas
%pip install tsfresh
%pip install functime
from typing import Callable
import pandas as pd
import perfplot
import polars as pl
from tsfresh.feature_extraction import feature_calculators as tsfresh
from functime import feature_extractors as fe
pl.Config.set_tbl_rows(100)
pl.Config.set_fmt_str_lengths(60)
pl.Config.set_tbl_hide_column_data_types(True)
polars.config.Config
1. Setup for the comparison¶
We are using the M4 dataset. We create a pd.DataFrame
and pl.DataFrame
and we define a list of dictionnary with the following structure:
(
<functime_function>
,
<tsfresh_function>
,
<functime_parameters>
,
<tsfresh_parameters>
)
_M4_DATASET = "../../data/m4_1d_train.parquet"
DF_PANDAS = (
pd.melt(pd.read_parquet(_M4_DATASET))
.drop(columns=["variable"])
.dropna()
.reset_index(drop=True)
)
DF_PL_EAGER = (
pl.read_parquet(_M4_DATASET).drop("V1").melt().drop("variable").drop_nulls()
)
DF_PL_LAZY = DF_PL_EAGER.lazy()
FUNC_PARAMS_BENCH = [
(fe.absolute_energy, tsfresh.abs_energy, {}, {}),
(fe.absolute_maximum, tsfresh.absolute_maximum, {}, {}),
(fe.absolute_sum_of_changes, tsfresh.absolute_sum_of_changes, {}, {}),
(
fe.approximate_entropy,
tsfresh.approximate_entropy,
{"run_length": 2, "filtering_level": 0.5},
{"m": 2, "r": 0.5},
),
# (fe.augmented_dickey_fuller, tsfresh.augmented_dickey_fuller, "param")
(fe.autocorrelation, tsfresh.autocorrelation, {"n_lags": 4}, {"lag": 4}),
(
fe.autoregressive_coefficients,
tsfresh.ar_coefficient,
{"n_lags": 4},
{"param": [{"coeff": i, "k": 4}] for i in range(5)},
),
(fe.benford_correlation2, tsfresh.benford_correlation, {}, {}),
(fe.benford_correlation, tsfresh.benford_correlation, {}, {}),
(fe.binned_entropy, tsfresh.binned_entropy, {"bin_count": 10}, {"max_bins": 10}),
(fe.c3, tsfresh.c3, {"n_lags": 10}, {"lag": 10}),
(
fe.change_quantiles,
tsfresh.change_quantiles,
{"q_low": 0.1, "q_high": 0.9, "is_abs": True},
{"ql": 0.1, "qh": 0.9, "isabs": True, "f_agg": "mean"},
),
(fe.cid_ce, tsfresh.cid_ce, {"normalize": True}, {"normalize": True}),
(fe.count_above, tsfresh.count_above, {"threshold": 0.0}, {"t": 0.0}),
(fe.count_above_mean, tsfresh.count_above_mean, {}, {}),
(fe.count_below, tsfresh.count_below, {"threshold": 0.0}, {"t": 0.0}),
(fe.count_below_mean, tsfresh.count_below_mean, {}, {}),
# (fe.cwt_coefficients, tsfresh.cwt_coefficients, {"widths": (1, 2, 3), "n_coefficients": 2},{"param": {"widths": (1, 2, 3), "coeff": 2, "w": 1}}),
(
fe.energy_ratios,
tsfresh.energy_ratio_by_chunks,
{"n_chunks": 6},
{"param": [{"num_segments": 6, "segment_focus": i} for i in range(6)]},
),
(fe.first_location_of_maximum, tsfresh.first_location_of_maximum, {}, {}),
(fe.first_location_of_minimum, tsfresh.first_location_of_minimum, {}, {}),
# (fe.fourier_entropy, tsfresh.fourier_entropy, {"n_bins": 10}, {"bins": 10}),
# (fe.friedrich_coefficients, tsfresh.friedrich_coefficients, {"polynomial_order": 3, "n_quantiles": 30}, {"params": [{"m": 3, "r": 30}]}),
(fe.has_duplicate, tsfresh.has_duplicate, {}, {}),
(fe.has_duplicate_max, tsfresh.has_duplicate_max, {}, {}),
(fe.has_duplicate_min, tsfresh.has_duplicate_min, {}, {}),
(
fe.index_mass_quantile,
tsfresh.index_mass_quantile,
{"q": 0.5},
{"param": [{"q": 0.5}]},
),
(
fe.large_standard_deviation,
tsfresh.large_standard_deviation,
{"ratio": 0.25},
{"r": 0.25},
),
(fe.last_location_of_maximum, tsfresh.last_location_of_maximum, {}, {}),
(fe.last_location_of_minimum, tsfresh.last_location_of_minimum, {}, {}),
# (fe.lempel_ziv_complexity, tsfresh.lempel_ziv_complexity, {"n_bins": 5}, {"bins": 5}),
(
fe.linear_trend,
tsfresh.linear_trend,
{},
{
"param": [
{"attr": "pvalue"},
{"attr": "rvalue"},
{"attr": "intercept"},
{"attr": "slope"},
{"attr": "stderr"},
]
},
),
(fe.longest_streak_above_mean, tsfresh.longest_strike_above_mean, {}, {}),
(fe.longest_streak_below_mean, tsfresh.longest_strike_below_mean, {}, {}),
(fe.mean_abs_change, tsfresh.mean_abs_change, {}, {}),
(fe.mean_change, tsfresh.mean_change, {}, {}),
(
fe.mean_n_absolute_max,
tsfresh.mean_n_absolute_max,
{"n_maxima": 20},
{"number_of_maxima": 20},
),
(
fe.mean_second_derivative_central,
tsfresh.mean_second_derivative_central,
{},
{},
),
(
fe.number_crossings,
tsfresh.number_crossing_m,
{"crossing_value": 0.0},
{"m": 0.0},
),
(fe.number_cwt_peaks, tsfresh.number_cwt_peaks, {"max_width": 5}, {"n": 5}),
(fe.number_peaks, tsfresh.number_peaks, {"support": 5}, {"n": 5}),
# (fe.partial_autocorrelation, tsfresh.partial_autocorrelation, "param"),
(
fe.percent_reoccurring_values,
tsfresh.percentage_of_reoccurring_values_to_all_values,
{},
{},
),
(
fe.percent_reoccurring_points,
tsfresh.percentage_of_reoccurring_datapoints_to_all_datapoints,
{},
{},
),
(
fe.permutation_entropy,
tsfresh.permutation_entropy,
{"tau": 1, "n_dims": 3},
{"tau": 1, "dimension": 3},
),
(
fe.range_count,
tsfresh.range_count,
{"lower": 0, "upper": 9, "closed": "none"},
{"min": 0, "max": 9},
),
(fe.ratio_beyond_r_sigma, tsfresh.ratio_beyond_r_sigma, {"ratio": 2}, {"r": 2}),
(
fe.ratio_n_unique_to_length,
tsfresh.ratio_value_number_to_time_series_length,
{},
{},
),
(fe.root_mean_square, tsfresh.root_mean_square, {}, {}),
(fe.sample_entropy, tsfresh.sample_entropy, {}, {}),
(
fe.spkt_welch_density,
tsfresh.spkt_welch_density,
{"n_coeffs": 10},
{"param": [{"coeff": i} for i in range(10)]},
),
(fe.sum_reoccurring_points, tsfresh.sum_of_reoccurring_data_points, {}, {}),
(fe.sum_reoccurring_values, tsfresh.sum_of_reoccurring_values, {}, {}),
(
fe.symmetry_looking,
tsfresh.symmetry_looking,
{"ratio": 0.25},
{"param": [{"r": 0.25}]},
),
(
fe.time_reversal_asymmetry_statistic,
tsfresh.time_reversal_asymmetry_statistic,
{"n_lags": 3},
{"lag": 3},
),
(fe.variation_coefficient, tsfresh.variation_coefficient, {}, {}),
(fe.var_gt_std, tsfresh.variance_larger_than_standard_deviation, {}, {}),
]
2 Benchmark core functions¶
Benchmark core function for time series' length of 100_000, 1_000_000 and 9_000_000. (Except 10_000 for approximate_entropy
and 10_000/100_000 for number_cwt_peaks
and sample_entropy
). all_benchmarks()
iterates through the elements in the FUNC_PARAMS_BENCH
list and invoke benchmark()
for each function.
def benchmark(
f_feat: Callable, ts_feat: Callable, f_params: dict, ts_params: dict, is_expr: bool
):
if f_feat.__name__ == "approximate_entropy":
n_range = [10_000]
elif f_feat.__name__ in ("number_cwt_peaks", "sample_entropy"):
n_range = [10_000, 100_000]
else:
n_range = [10_000, 100_000, 1_000_000, 9_000_000]
benchmark = perfplot.bench(
setup=lambda n: (DF_PL_EAGER.head(n), DF_PANDAS.head(n)),
kernels=[
lambda x, _y: f_feat(x["value"], **f_params)
if not is_expr
else x.select(f_feat(pl.col("value"), **f_params)),
lambda _x, y: ts_feat(y["value"], **ts_params),
],
n_range=n_range,
equality_check=False,
labels=["functime", "tsfresh"],
)
return benchmark
def all_benchmarks(params: list[tuple], is_expr: bool) -> list:
bench_df = pl.DataFrame(
schema={
"Feature name": pl.Utf8,
"n": pl.Int64,
"functime (ms)": pl.Float64,
"tfresh (ms)": pl.Float64,
"diff (ms)": pl.Float64,
"diff %": pl.Float64,
"speedup": pl.Float64,
}
)
for x in params:
try:
f_feat = x[0]
print(f"Feature: {f_feat.__name__}")
bench = benchmark(
f_feat=f_feat,
ts_feat=x[1],
f_params=x[2],
ts_params=x[3],
is_expr=is_expr,
)
bench_df = pl.concat(
[
pl.DataFrame(
{
"Feature name": [x[0].__name__] * len(bench.n_range),
"n": bench.n_range,
"functime (ms)": bench.timings_s[0] * 1_000,
"tfresh (ms)": bench.timings_s[1] * 1_000,
"diff (ms)": (bench.timings_s[0] - bench.timings_s[1])
* 1_000,
"diff %": 100
* (bench.timings_s[0] - bench.timings_s[1])
/ bench.timings_s[1],
"speedup": bench.timings_s[1] / bench.timings_s[0],
}
),
bench_df,
]
)
except ValueError:
print(f"Failed to compute feature {x[0].__name__}")
except ImportError:
print(f"Failed to import feature {x[0].__name__}")
return bench_df
3. Run benchmarks¶
# Code to prettify benchmark results
def table_prettifier(df: pl.DataFrame, n: int):
table = (
df.filter(pl.col("n") == n)
.drop("n")
.sort("speedup", descending=True)
.with_columns(
pl.when(pl.exclude("Feature name").abs() < 0.1)
.then(pl.exclude("Feature name").round(4))
.when(pl.exclude("Feature name").abs() < 1)
.then(pl.exclude("Feature name").round(2))
.when(pl.exclude("Feature name").abs() < 30)
.then(pl.exclude("Feature name").round(1))
.otherwise(pl.exclude("Feature name").round(1))
)
.with_columns(speedup="x " + pl.col("speedup").cast(pl.Utf8))
)
return table
%%capture
bench_expr = all_benchmarks(params = FUNC_PARAMS_BENCH ,expr = True)
bench_series = all_benchmarks(params = FUNC_PARAMS_BENCH, expr = False)
# Lazy benchmarks
df_expr_10k = table_prettifier(bench_expr, n=10_000)
df_expr_100k = table_prettifier(bench_expr, n=100_000)
df_expr_1m = table_prettifier(bench_expr, n=1_000_000)
df_expr_9m = table_prettifier(bench_expr, n=9_000_000)
# Eager benchmarks
df_series_10k = table_prettifier(bench_series, n=10_000)
df_series_100k = table_prettifier(bench_series, n=100_000)
df_series_1m = table_prettifier(bench_series, n=1_000_000)
df_series_9m = table_prettifier(bench_series, n=9_000_000)
INFO:functime.feature_extraction.tsfresh:Expression version of approximate_entropy is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extraction.tsfresh:Expression version of autoregressive_coefficients is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extraction.tsfresh:Expression version of sample_entropy is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extraction.tsfresh:Expression version of spkt_welch_density is not yet implemented due to technical difficulty regarding Polars Expression Plugins.
4. Benchmark results¶
Display 8 tables:
- For
pl.Series
: 10k, 100k, 1M and 9M rows - For
pl.Expr
: 10k, 100k, 1M and 9M rows
Each table contains the execution time (ms) for tsfresh and functime, the difference, the difference in % and the speedup:
4.1 Results for pl.Expr
¶
10k expr¶
df_expr_10k
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"benford_correlation2" | 0.32 | 5.8 | -5.4 | -94.4 | "x 17.9" |
"benford_correlation" | 0.56 | 5.7 | -5.1 | -90.1 | "x 10.1" |
"mean_n_absolute_max" | 0.0393 | 0.39 | -0.35 | -89.9 | "x 9.9" |
"energy_ratios" | 0.16 | 1.0 | -0.88 | -84.4 | "x 6.4" |
"longest_streak_below_mean" | 0.15 | 0.61 | -0.46 | -75.0 | "x 4.0" |
"large_standard_deviation" | 0.0192 | 0.0703 | -0.0512 | -72.8 | "x 3.7" |
"range_count" | 0.018 | 0.0657 | -0.0478 | -72.7 | "x 3.7" |
"longest_streak_above_mean" | 0.17 | 0.59 | -0.42 | -70.7 | "x 3.4" |
"change_quantiles" | 0.17 | 0.5 | -0.32 | -64.9 | "x 2.8" |
"var_gt_std" | 0.0132 | 0.0368 | -0.0236 | -64.2 | "x 2.8" |
"ratio_beyond_r_sigma" | 0.044 | 0.12 | -0.0788 | -64.2 | "x 2.8" |
"number_peaks" | 0.2 | 0.44 | -0.24 | -54.8 | "x 2.2" |
"variation_coefficient" | 0.0276 | 0.0503 | -0.0227 | -45.1 | "x 1.8" |
"symmetry_looking" | 0.071 | 0.13 | -0.0557 | -44.0 | "x 1.8" |
"count_below_mean" | 0.0239 | 0.0422 | -0.0183 | -43.4 | "x 1.8" |
"count_below" | 0.0169 | 0.0286 | -0.0117 | -41.0 | "x 1.7" |
"count_above" | 0.0171 | 0.0285 | -0.0114 | -40.1 | "x 1.7" |
"count_above_mean" | 0.0256 | 0.0419 | -0.0162 | -38.8 | "x 1.6" |
"has_duplicate_min" | 0.0305 | 0.046 | -0.0155 | -33.7 | "x 1.5" |
"absolute_maximum" | 0.0202 | 0.0291 | -0.0089 | -30.6 | "x 1.4" |
"ratio_n_unique_to_length" | 0.27 | 0.39 | -0.12 | -30.2 | "x 1.4" |
"has_duplicate_max" | 0.0323 | 0.0457 | -0.0134 | -29.4 | "x 1.4" |
"has_duplicate" | 0.32 | 0.39 | -0.0752 | -19.2 | "x 1.2" |
"binned_entropy" | 0.0977 | 0.12 | -0.0198 | -16.8 | "x 1.2" |
"sum_reoccurring_points" | 0.36 | 0.43 | -0.07 | -16.2 | "x 1.2" |
"percent_reoccurring_values" | 0.36 | 0.42 | -0.0621 | -14.9 | "x 1.2" |
"percent_reoccurring_points" | 0.37 | 0.4 | -0.0315 | -7.8 | "x 1.1" |
"c3" | 0.0599 | 0.0618 | -0.0019 | -3.1 | "x 1.0" |
"root_mean_square" | 0.0345 | 0.0338 | 0.0008 | 2.2 | "x 0.98" |
"mean_abs_change" | 0.0134 | 0.0128 | 0.0005 | 4.2 | "x 0.96" |
"absolute_sum_of_changes" | 0.0128 | 0.0111 | 0.0017 | 15.0 | "x 0.87" |
"number_crossings" | 0.028 | 0.0229 | 0.0051 | 22.2 | "x 0.82" |
"absolute_energy" | 0.0159 | 0.012 | 0.0038 | 31.8 | "x 0.76" |
"permutation_entropy" | 5.7 | 4.2 | 1.4 | 33.7 | "x 0.75" |
"linear_trend" | 0.51 | 0.36 | 0.15 | 43.0 | "x 0.7" |
"sum_reoccurring_values" | 0.71 | 0.45 | 0.26 | 57.9 | "x 0.63" |
"cid_ce" | 0.17 | 0.11 | 0.0683 | 64.3 | "x 0.61" |
"index_mass_quantile" | 0.0934 | 0.0491 | 0.0443 | 90.2 | "x 0.53" |
"autocorrelation" | 0.0749 | 0.0381 | 0.0368 | 96.5 | "x 0.51" |
"first_location_of_minimum" | 0.027 | 0.0123 | 0.0147 | 120.1 | "x 0.45" |
"first_location_of_maximum" | 0.0276 | 0.0122 | 0.0154 | 125.9 | "x 0.44" |
"last_location_of_minimum" | 0.0332 | 0.0095 | 0.0237 | 248.1 | "x 0.29" |
"last_location_of_maximum" | 0.0353 | 0.0094 | 0.0258 | 274.3 | "x 0.27" |
"time_reversal_asymmetry_statistic" | 0.0997 | 0.0235 | 0.0762 | 324.3 | "x 0.24" |
"mean_change" | 0.0481 | 0.0027 | 0.0454 | 1677.1 | "x 0.0563" |
"mean_second_derivative_central" | 0.0737 | 0.0028 | 0.0708 | 2500.3 | "x 0.0385" |
100k expr¶
df_expr_100k
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"mean_n_absolute_max" | 0.19 | 5.2 | -5.0 | -96.4 | "x 27.6" |
"benford_correlation2" | 2.2 | 58.4 | -56.3 | -96.3 | "x 27.1" |
"benford_correlation" | 5.0 | 57.6 | -52.6 | -91.3 | "x 11.5" |
"var_gt_std" | 0.0438 | 0.29 | -0.25 | -85.1 | "x 6.7" |
"variation_coefficient" | 0.0571 | 0.35 | -0.29 | -83.6 | "x 6.1" |
"longest_streak_below_mean" | 1.2 | 5.9 | -4.7 | -80.0 | "x 5.0" |
"longest_streak_above_mean" | 1.2 | 5.9 | -4.7 | -79.9 | "x 5.0" |
"change_quantiles" | 0.75 | 3.5 | -2.7 | -78.3 | "x 4.6" |
"large_standard_deviation" | 0.15 | 0.53 | -0.39 | -72.7 | "x 3.7" |
"energy_ratios" | 0.37 | 1.3 | -0.9 | -70.8 | "x 3.4" |
"absolute_maximum" | 0.044 | 0.15 | -0.1 | -69.9 | "x 3.3" |
"linear_trend" | 1.3 | 3.4 | -2.1 | -61.6 | "x 2.6" |
"count_below_mean" | 0.0576 | 0.15 | -0.0893 | -60.8 | "x 2.5" |
"ratio_beyond_r_sigma" | 0.21 | 0.52 | -0.31 | -60.1 | "x 2.5" |
"number_peaks" | 0.41 | 1.0 | -0.59 | -59.1 | "x 2.4" |
"ratio_n_unique_to_length" | 2.4 | 5.6 | -3.2 | -57.6 | "x 2.4" |
"symmetry_looking" | 0.49 | 1.1 | -0.62 | -55.6 | "x 2.3" |
"count_above_mean" | 0.0701 | 0.15 | -0.0773 | -52.4 | "x 2.1" |
"sum_reoccurring_points" | 3.1 | 5.9 | -2.8 | -46.8 | "x 1.9" |
"percent_reoccurring_values" | 3.0 | 5.7 | -2.7 | -46.7 | "x 1.9" |
"has_duplicate" | 3.1 | 5.6 | -2.5 | -45.0 | "x 1.8" |
"has_duplicate_max" | 0.1 | 0.17 | -0.0704 | -40.6 | "x 1.7" |
"has_duplicate_min" | 0.11 | 0.17 | -0.0625 | -36.0 | "x 1.6" |
"index_mass_quantile" | 0.33 | 0.41 | -0.0743 | -18.2 | "x 1.2" |
"permutation_entropy" | 46.7 | 55.1 | -8.5 | -15.3 | "x 1.2" |
"root_mean_square" | 0.0757 | 0.0856 | -0.01 | -11.6 | "x 1.1" |
"absolute_sum_of_changes" | 0.047 | 0.052 | -0.0049 | -9.5 | "x 1.1" |
"binned_entropy" | 0.69 | 0.76 | -0.0703 | -9.3 | "x 1.1" |
"mean_abs_change" | 0.0508 | 0.0559 | -0.0051 | -9.1 | "x 1.1" |
"sum_reoccurring_values" | 5.7 | 6.1 | -0.41 | -6.8 | "x 1.1" |
"autocorrelation" | 0.18 | 0.18 | -0.0056 | -3.1 | "x 1.0" |
"range_count" | 0.1 | 0.1 | -0.0019 | -1.8 | "x 1.0" |
"count_below" | 0.0542 | 0.0534 | 0.0008 | 1.4 | "x 0.99" |
"percent_reoccurring_points" | 3.2 | 3.1 | 0.13 | 4.3 | "x 0.96" |
"time_reversal_asymmetry_statistic" | 0.27 | 0.24 | 0.0319 | 13.3 | "x 0.88" |
"c3" | 0.18 | 0.16 | 0.0227 | 14.4 | "x 0.87" |
"absolute_energy" | 0.063 | 0.0508 | 0.0123 | 24.1 | "x 0.81" |
"count_above" | 0.0661 | 0.0532 | 0.0129 | 24.3 | "x 0.8" |
"cid_ce" | 0.63 | 0.49 | 0.14 | 29.0 | "x 0.78" |
"last_location_of_maximum" | 0.11 | 0.0554 | 0.057 | 102.8 | "x 0.49" |
"last_location_of_minimum" | 0.12 | 0.0556 | 0.0614 | 110.4 | "x 0.48" |
"first_location_of_minimum" | 0.094 | 0.042 | 0.052 | 123.6 | "x 0.45" |
"first_location_of_maximum" | 0.0943 | 0.0421 | 0.0522 | 123.8 | "x 0.45" |
"number_crossings" | 0.0828 | 0.0368 | 0.046 | 125.3 | "x 0.44" |
"mean_change" | 0.045 | 0.0027 | 0.0423 | 1587.3 | "x 0.0593" |
"mean_second_derivative_central" | 0.0733 | 0.0028 | 0.0705 | 2525.0 | "x 0.0381" |
1M expr¶
df_expr_1m
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"benford_correlation2" | 21.3 | 588.6 | -567.3 | -96.4 | "x 27.6" |
"mean_n_absolute_max" | 2.5 | 61.5 | -59.0 | -96.0 | "x 24.7" |
"absolute_maximum" | 0.0792 | 1.6 | -1.5 | -95.0 | "x 19.9" |
"benford_correlation" | 51.3 | 592.9 | -541.6 | -91.3 | "x 11.6" |
"large_standard_deviation" | 0.5 | 5.2 | -4.7 | -90.4 | "x 10.4" |
"variation_coefficient" | 0.35 | 3.3 | -3.0 | -89.3 | "x 9.4" |
"var_gt_std" | 0.36 | 2.9 | -2.5 | -87.4 | "x 7.9" |
"has_duplicate_min" | 0.24 | 1.4 | -1.2 | -83.7 | "x 6.1" |
"has_duplicate_max" | 0.25 | 1.4 | -1.2 | -82.6 | "x 5.8" |
"longest_streak_above_mean" | 11.2 | 59.6 | -48.3 | -81.1 | "x 5.3" |
"longest_streak_below_mean" | 11.6 | 60.6 | -49.0 | -80.8 | "x 5.2" |
"number_peaks" | 1.9 | 9.1 | -7.3 | -79.7 | "x 4.9" |
"change_quantiles" | 7.2 | 34.4 | -27.2 | -79.0 | "x 4.8" |
"count_above_mean" | 0.3 | 1.4 | -1.1 | -78.1 | "x 4.6" |
"count_below_mean" | 0.31 | 1.4 | -1.1 | -77.8 | "x 4.5" |
"linear_trend" | 8.9 | 38.1 | -29.2 | -76.7 | "x 4.3" |
"ratio_beyond_r_sigma" | 1.2 | 4.8 | -3.6 | -75.2 | "x 4.0" |
"ratio_n_unique_to_length" | 15.9 | 63.8 | -47.9 | -75.1 | "x 4.0" |
"symmetry_looking" | 3.4 | 13.1 | -9.7 | -74.1 | "x 3.9" |
"energy_ratios" | 2.4 | 7.5 | -5.1 | -67.6 | "x 3.1" |
"has_duplicate" | 21.5 | 63.5 | -42.0 | -66.1 | "x 3.0" |
"sum_reoccurring_points" | 23.1 | 66.1 | -43.0 | -65.1 | "x 2.9" |
"percent_reoccurring_values" | 24.3 | 65.2 | -40.9 | -62.8 | "x 2.7" |
"root_mean_square" | 0.35 | 0.93 | -0.59 | -62.7 | "x 2.7" |
"c3" | 0.99 | 2.2 | -1.2 | -55.0 | "x 2.2" |
"absolute_sum_of_changes" | 0.53 | 1.1 | -0.6 | -52.8 | "x 2.1" |
"mean_abs_change" | 0.47 | 0.94 | -0.48 | -50.4 | "x 2.0" |
"autocorrelation" | 1.4 | 2.6 | -1.2 | -47.4 | "x 1.9" |
"range_count" | 0.27 | 0.51 | -0.24 | -46.8 | "x 1.9" |
"count_below" | 0.17 | 0.3 | -0.13 | -42.9 | "x 1.8" |
"count_above" | 0.17 | 0.3 | -0.12 | -41.3 | "x 1.7" |
"index_mass_quantile" | 2.9 | 4.7 | -1.8 | -38.3 | "x 1.6" |
"sum_reoccurring_values" | 44.9 | 66.7 | -21.8 | -32.7 | "x 1.5" |
"time_reversal_asymmetry_statistic" | 2.1 | 3.1 | -0.96 | -31.2 | "x 1.5" |
"permutation_entropy" | 471.5 | 670.1 | -198.6 | -29.6 | "x 1.4" |
"absolute_energy" | 0.32 | 0.44 | -0.12 | -27.4 | "x 1.4" |
"cid_ce" | 4.4 | 5.5 | -1.1 | -19.4 | "x 1.2" |
"binned_entropy" | 5.3 | 6.6 | -1.2 | -19.0 | "x 1.2" |
"last_location_of_minimum" | 0.75 | 0.79 | -0.0394 | -5.0 | "x 1.1" |
"last_location_of_maximum" | 0.75 | 0.79 | -0.0385 | -4.9 | "x 1.1" |
"percent_reoccurring_points" | 22.1 | 21.2 | 0.95 | 4.5 | "x 0.96" |
"first_location_of_maximum" | 0.61 | 0.34 | 0.27 | 80.1 | "x 0.56" |
"first_location_of_minimum" | 0.61 | 0.34 | 0.28 | 81.7 | "x 0.55" |
"number_crossings" | 0.37 | 0.17 | 0.2 | 121.4 | "x 0.45" |
"mean_change" | 0.0425 | 0.0027 | 0.0398 | 1494.1 | "x 0.0627" |
"mean_second_derivative_central" | 0.066 | 0.0028 | 0.0632 | 2263.2 | "x 0.0423" |
9M expr¶
df_expr_9m
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"mean_n_absolute_max" | 16.1 | 601.7 | -585.6 | -97.3 | "x 37.4" |
"benford_correlation2" | 201.0 | 5350.0 | -5149.0 | -96.2 | "x 26.6" |
"absolute_maximum" | 0.97 | 16.6 | -15.6 | -94.1 | "x 17.1" |
"benford_correlation" | 458.0 | 5365.4 | -4907.4 | -91.5 | "x 11.7" |
"large_standard_deviation" | 5.4 | 47.2 | -41.8 | -88.5 | "x 8.7" |
"variation_coefficient" | 4.2 | 31.6 | -27.3 | -86.6 | "x 7.5" |
"number_peaks" | 15.0 | 102.0 | -87.0 | -85.3 | "x 6.8" |
"has_duplicate_min" | 2.1 | 14.0 | -11.9 | -85.2 | "x 6.8" |
"has_duplicate_max" | 2.1 | 13.9 | -11.9 | -85.1 | "x 6.7" |
"var_gt_std" | 4.0 | 26.0 | -22.1 | -84.8 | "x 6.6" |
"ratio_n_unique_to_length" | 94.8 | 612.3 | -517.5 | -84.5 | "x 6.5" |
"change_quantiles" | 52.7 | 305.8 | -253.0 | -82.8 | "x 5.8" |
"longest_streak_below_mean" | 95.4 | 543.3 | -447.9 | -82.4 | "x 5.7" |
"count_below_mean" | 2.6 | 15.0 | -12.4 | -82.4 | "x 5.7" |
"longest_streak_above_mean" | 95.3 | 541.5 | -446.1 | -82.4 | "x 5.7" |
"count_above_mean" | 2.7 | 14.9 | -12.3 | -82.2 | "x 5.6" |
"root_mean_square" | 2.4 | 11.2 | -8.8 | -78.7 | "x 4.7" |
"linear_trend" | 77.2 | 352.2 | -275.0 | -78.1 | "x 4.6" |
"has_duplicate" | 157.8 | 608.2 | -450.4 | -74.1 | "x 3.9" |
"sum_reoccurring_points" | 165.5 | 620.4 | -454.9 | -73.3 | "x 3.7" |
"ratio_beyond_r_sigma" | 13.4 | 47.9 | -34.5 | -72.0 | "x 3.6" |
"absolute_sum_of_changes" | 3.9 | 13.9 | -10.0 | -71.6 | "x 3.5" |
"symmetry_looking" | 30.3 | 105.5 | -75.2 | -71.3 | "x 3.5" |
"range_count" | 2.6 | 7.9 | -5.3 | -67.0 | "x 3.0" |
"mean_abs_change" | 4.5 | 13.2 | -8.7 | -66.1 | "x 2.9" |
"c3" | 8.6 | 24.1 | -15.5 | -64.3 | "x 2.8" |
"count_below" | 1.4 | 3.5 | -2.2 | -61.4 | "x 2.6" |
"count_above" | 1.4 | 3.6 | -2.2 | -61.2 | "x 2.6" |
"percent_reoccurring_values" | 251.4 | 617.6 | -366.1 | -59.3 | "x 2.5" |
"autocorrelation" | 12.9 | 29.0 | -16.1 | -55.5 | "x 2.2" |
"energy_ratios" | 32.7 | 69.5 | -36.8 | -52.9 | "x 2.1" |
"sum_reoccurring_values" | 342.7 | 624.1 | -281.5 | -45.1 | "x 1.8" |
"permutation_entropy" | 4250.8 | 7068.9 | -2818.2 | -39.9 | "x 1.7" |
"absolute_energy" | 2.4 | 3.9 | -1.5 | -38.5 | "x 1.6" |
"time_reversal_asymmetry_statistic" | 20.1 | 30.9 | -10.7 | -34.7 | "x 1.5" |
"index_mass_quantile" | 33.7 | 46.0 | -12.2 | -26.6 | "x 1.4" |
"percent_reoccurring_points" | 159.9 | 211.8 | -51.9 | -24.5 | "x 1.3" |
"binned_entropy" | 47.6 | 62.5 | -14.8 | -23.8 | "x 1.3" |
"last_location_of_maximum" | 6.8 | 8.7 | -1.9 | -22.0 | "x 1.3" |
"last_location_of_minimum" | 6.8 | 8.6 | -1.8 | -21.2 | "x 1.3" |
"cid_ce" | 51.5 | 55.7 | -4.3 | -7.6 | "x 1.1" |
"number_crossings" | 2.9 | 2.9 | 0.046 | 1.6 | "x 0.98" |
"first_location_of_minimum" | 5.2 | 4.1 | 1.2 | 28.9 | "x 0.78" |
"first_location_of_maximum" | 5.2 | 4.0 | 1.2 | 30.6 | "x 0.77" |
"mean_change" | 0.0421 | 0.0027 | 0.0394 | 1478.0 | "x 0.0634" |
"mean_second_derivative_central" | 0.0669 | 0.0028 | 0.0641 | 2262.1 | "x 0.0423" |
4.2 Results for pl.Series
¶
10k series¶
df_series_10k
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"approximate_entropy" | 43.1 | 5688.4 | -5645.2 | -99.2 | "x 131.9" |
"sample_entropy" | 35.1 | 3294.3 | -3259.2 | -98.9 | "x 93.8" |
"benford_correlation2" | 0.1 | 5.8 | -5.7 | -98.2 | "x 56.3" |
"energy_ratios" | 0.0865 | 1.0 | -0.96 | -91.7 | "x 12.1" |
"benford_correlation" | 0.62 | 5.9 | -5.3 | -89.5 | "x 9.5" |
"mean_n_absolute_max" | 0.0515 | 0.38 | -0.33 | -86.5 | "x 7.4" |
"has_duplicate_min" | 0.0067 | 0.0473 | -0.0405 | -85.8 | "x 7.0" |
"has_duplicate_max" | 0.0067 | 0.047 | -0.0403 | -85.7 | "x 7.0" |
"count_below_mean" | 0.0065 | 0.0425 | -0.036 | -84.6 | "x 6.5" |
"count_above_mean" | 0.0065 | 0.0421 | -0.0355 | -84.5 | "x 6.4" |
"absolute_maximum" | 0.0055 | 0.03 | -0.0246 | -81.8 | "x 5.5" |
"count_above" | 0.0059 | 0.0321 | -0.0262 | -81.6 | "x 5.4" |
"count_below" | 0.006 | 0.029 | -0.023 | -79.2 | "x 4.8" |
"longest_streak_below_mean" | 0.13 | 0.6 | -0.47 | -79.0 | "x 4.8" |
"longest_streak_above_mean" | 0.13 | 0.6 | -0.48 | -79.0 | "x 4.8" |
"large_standard_deviation" | 0.0157 | 0.0704 | -0.0547 | -77.7 | "x 4.5" |
"symmetry_looking" | 0.0278 | 0.12 | -0.0948 | -77.3 | "x 4.4" |
"linear_trend" | 0.0869 | 0.36 | -0.27 | -75.9 | "x 4.1" |
"range_count" | 0.0188 | 0.0644 | -0.0457 | -70.9 | "x 3.4" |
"variation_coefficient" | 0.0153 | 0.0501 | -0.0348 | -69.5 | "x 3.3" |
"absolute_energy" | 0.0044 | 0.0125 | -0.0081 | -65.0 | "x 2.9" |
"var_gt_std" | 0.0132 | 0.0369 | -0.0237 | -64.3 | "x 2.8" |
"cid_ce" | 0.0403 | 0.11 | -0.0674 | -62.6 | "x 2.7" |
"change_quantiles" | 0.19 | 0.5 | -0.31 | -61.9 | "x 2.6" |
"number_peaks" | 0.19 | 0.44 | -0.24 | -55.4 | "x 2.2" |
"first_location_of_minimum" | 0.0066 | 0.0126 | -0.006 | -47.7 | "x 1.9" |
"first_location_of_maximum" | 0.0065 | 0.0123 | -0.0057 | -46.6 | "x 1.9" |
"ratio_beyond_r_sigma" | 0.066 | 0.12 | -0.0535 | -44.8 | "x 1.8" |
"ratio_n_unique_to_length" | 0.23 | 0.4 | -0.17 | -42.3 | "x 1.7" |
"mean_change" | 0.0017 | 0.0027 | -0.001 | -38.4 | "x 1.6" |
"percent_reoccurring_values" | 0.31 | 0.43 | -0.12 | -27.8 | "x 1.4" |
"root_mean_square" | 0.0248 | 0.0338 | -0.009 | -26.6 | "x 1.4" |
"sum_reoccurring_points" | 0.33 | 0.45 | -0.12 | -26.3 | "x 1.4" |
"mean_second_derivative_central" | 0.0021 | 0.0028 | -0.0007 | -23.9 | "x 1.3" |
"percent_reoccurring_points" | 0.32 | 0.4 | -0.0793 | -19.9 | "x 1.2" |
"sum_reoccurring_values" | 0.37 | 0.45 | -0.0714 | -16.0 | "x 1.2" |
"binned_entropy" | 0.0997 | 0.12 | -0.0185 | -15.6 | "x 1.2" |
"has_duplicate" | 0.33 | 0.39 | -0.061 | -15.5 | "x 1.2" |
"spkt_welch_density" | 0.15 | 0.15 | -0.002 | -1.3 | "x 1.0" |
"number_cwt_peaks" | 206.7 | 205.9 | 0.83 | 0.4 | "x 1.0" |
"c3" | 0.0679 | 0.0621 | 0.0058 | 9.3 | "x 0.92" |
"permutation_entropy" | 5.7 | 4.2 | 1.5 | 35.0 | "x 0.74" |
"index_mass_quantile" | 0.0841 | 0.049 | 0.035 | 71.5 | "x 0.58" |
"last_location_of_minimum" | 0.0167 | 0.0097 | 0.007 | 72.9 | "x 0.58" |
"last_location_of_maximum" | 0.0167 | 0.0095 | 0.0071 | 74.7 | "x 0.57" |
"mean_abs_change" | 0.0226 | 0.0126 | 0.01 | 79.8 | "x 0.56" |
"number_crossings" | 0.0413 | 0.0227 | 0.0186 | 82.2 | "x 0.55" |
"absolute_sum_of_changes" | 0.0231 | 0.0116 | 0.0115 | 99.3 | "x 0.5" |
"autocorrelation" | 0.0948 | 0.0386 | 0.0561 | 145.3 | "x 0.41" |
"time_reversal_asymmetry_statistic" | 0.0772 | 0.0232 | 0.054 | 232.5 | "x 0.3" |
100k series¶
df_series_100k
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"sample_entropy" | 511.1 | 347925.6 | -347414.5 | -99.9 | "x 680.8" |
"benford_correlation2" | 0.4 | 58.0 | -57.6 | -99.3 | "x 146.4" |
"mean_n_absolute_max" | 0.19 | 5.2 | -5.0 | -96.3 | "x 27.4" |
"benford_correlation" | 5.0 | 58.2 | -53.2 | -91.4 | "x 11.7" |
"large_standard_deviation" | 0.0621 | 0.53 | -0.47 | -88.3 | "x 8.6" |
"has_duplicate_max" | 0.0232 | 0.17 | -0.15 | -86.6 | "x 7.5" |
"has_duplicate_min" | 0.0233 | 0.17 | -0.15 | -86.6 | "x 7.5" |
"linear_trend" | 0.46 | 3.4 | -3.0 | -86.5 | "x 7.4" |
"energy_ratios" | 0.19 | 1.3 | -1.1 | -85.5 | "x 6.9" |
"absolute_maximum" | 0.0213 | 0.15 | -0.12 | -85.4 | "x 6.9" |
"var_gt_std" | 0.0434 | 0.29 | -0.25 | -85.2 | "x 6.8" |
"variation_coefficient" | 0.054 | 0.35 | -0.29 | -84.5 | "x 6.5" |
"count_above_mean" | 0.0234 | 0.15 | -0.13 | -84.4 | "x 6.4" |
"count_below_mean" | 0.024 | 0.15 | -0.12 | -83.7 | "x 6.1" |
"longest_streak_above_mean" | 1.1 | 5.8 | -4.7 | -81.2 | "x 5.3" |
"longest_streak_below_mean" | 1.1 | 5.9 | -4.8 | -81.2 | "x 5.3" |
"change_quantiles" | 0.75 | 3.5 | -2.7 | -78.3 | "x 4.6" |
"symmetry_looking" | 0.27 | 1.1 | -0.84 | -75.6 | "x 4.1" |
"count_above" | 0.0144 | 0.0545 | -0.0401 | -73.6 | "x 3.8" |
"count_below" | 0.0148 | 0.0548 | -0.0401 | -73.1 | "x 3.7" |
"ratio_beyond_r_sigma" | 0.16 | 0.52 | -0.36 | -68.6 | "x 3.2" |
"cid_ce" | 0.16 | 0.49 | -0.33 | -67.3 | "x 3.1" |
"range_count" | 0.0358 | 0.1 | -0.0672 | -65.3 | "x 2.9" |
"ratio_n_unique_to_length" | 2.3 | 5.6 | -3.3 | -59.4 | "x 2.5" |
"percent_reoccurring_values" | 2.9 | 5.7 | -2.8 | -49.2 | "x 2.0" |
"sum_reoccurring_points" | 3.1 | 5.9 | -2.8 | -47.7 | "x 1.9" |
"absolute_energy" | 0.0269 | 0.0508 | -0.024 | -47.1 | "x 1.9" |
"has_duplicate" | 3.1 | 5.6 | -2.4 | -44.0 | "x 1.8" |
"number_peaks" | 0.48 | 0.84 | -0.36 | -42.4 | "x 1.7" |
"sum_reoccurring_values" | 3.6 | 6.1 | -2.5 | -41.6 | "x 1.7" |
"mean_change" | 0.0017 | 0.0027 | -0.001 | -36.9 | "x 1.6" |
"root_mean_square" | 0.0634 | 0.0892 | -0.0258 | -28.9 | "x 1.4" |
"mean_second_derivative_central" | 0.0022 | 0.0028 | -0.0006 | -22.4 | "x 1.3" |
"index_mass_quantile" | 0.33 | 0.4 | -0.0741 | -18.3 | "x 1.2" |
"time_reversal_asymmetry_statistic" | 0.18 | 0.21 | -0.0371 | -17.4 | "x 1.2" |
"permutation_entropy" | 47.0 | 54.9 | -7.9 | -14.5 | "x 1.2" |
"c3" | 0.14 | 0.16 | -0.0198 | -12.5 | "x 1.1" |
"binned_entropy" | 0.72 | 0.77 | -0.0468 | -6.1 | "x 1.1" |
"number_cwt_peaks" | 21119.7 | 21740.8 | -621.0 | -2.9 | "x 1.0" |
"percent_reoccurring_points" | 3.1 | 3.1 | -0.0895 | -2.8 | "x 1.0" |
"spkt_welch_density" | 1.2 | 1.2 | 0.0001 | 0.0108 | "x 1.0" |
"mean_abs_change" | 0.058 | 0.0558 | 0.0022 | 3.9 | "x 0.96" |
"autocorrelation" | 0.19 | 0.18 | 0.0148 | 8.4 | "x 0.92" |
"absolute_sum_of_changes" | 0.0608 | 0.052 | 0.0088 | 16.9 | "x 0.86" |
"first_location_of_minimum" | 0.0582 | 0.0421 | 0.016 | 38.1 | "x 0.72" |
"first_location_of_maximum" | 0.0582 | 0.042 | 0.0162 | 38.5 | "x 0.72" |
"number_crossings" | 0.0516 | 0.0359 | 0.0157 | 43.6 | "x 0.7" |
"last_location_of_minimum" | 0.0822 | 0.0558 | 0.0263 | 47.2 | "x 0.68" |
"last_location_of_maximum" | 0.0822 | 0.0555 | 0.0267 | 48.2 | "x 0.67" |
1M series¶
df_series_1m
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"benford_correlation2" | 3.4 | 588.9 | -585.5 | -99.4 | "x 172.4" |
"mean_n_absolute_max" | 2.6 | 61.2 | -58.6 | -95.8 | "x 23.7" |
"benford_correlation" | 51.0 | 589.7 | -538.7 | -91.4 | "x 11.6" |
"large_standard_deviation" | 0.55 | 5.2 | -4.6 | -89.3 | "x 9.3" |
"var_gt_std" | 0.35 | 2.9 | -2.5 | -87.9 | "x 8.3" |
"absolute_maximum" | 0.18 | 1.4 | -1.2 | -87.2 | "x 7.8" |
"has_duplicate_max" | 0.19 | 1.4 | -1.3 | -87.1 | "x 7.8" |
"has_duplicate_min" | 0.19 | 1.4 | -1.3 | -87.1 | "x 7.8" |
"linear_trend" | 5.0 | 38.3 | -33.3 | -87.0 | "x 7.7" |
"count_below_mean" | 0.2 | 1.4 | -1.2 | -86.3 | "x 7.3" |
"variation_coefficient" | 0.46 | 3.3 | -2.9 | -86.3 | "x 7.3" |
"count_above_mean" | 0.19 | 1.4 | -1.2 | -86.2 | "x 7.2" |
"longest_streak_below_mean" | 11.0 | 60.6 | -49.5 | -81.8 | "x 5.5" |
"longest_streak_above_mean" | 10.9 | 59.4 | -48.5 | -81.7 | "x 5.5" |
"energy_ratios" | 1.3 | 7.1 | -5.8 | -81.5 | "x 5.4" |
"number_peaks" | 1.9 | 9.6 | -7.8 | -80.5 | "x 5.1" |
"change_quantiles" | 7.2 | 33.8 | -26.6 | -78.7 | "x 4.7" |
"ratio_beyond_r_sigma" | 1.2 | 4.8 | -3.6 | -75.3 | "x 4.1" |
"symmetry_looking" | 3.3 | 13.1 | -9.9 | -75.2 | "x 4.0" |
"ratio_n_unique_to_length" | 16.0 | 64.0 | -48.0 | -74.9 | "x 4.0" |
"count_above" | 0.1 | 0.31 | -0.2 | -66.6 | "x 3.0" |
"count_below" | 0.1 | 0.3 | -0.2 | -66.3 | "x 3.0" |
"cid_ce" | 1.8 | 5.5 | -3.6 | -66.3 | "x 3.0" |
"sum_reoccurring_points" | 23.0 | 65.7 | -42.7 | -65.0 | "x 2.9" |
"has_duplicate" | 22.6 | 64.0 | -41.4 | -64.7 | "x 2.8" |
"percent_reoccurring_values" | 23.0 | 65.2 | -42.2 | -64.7 | "x 2.8" |
"range_count" | 0.21 | 0.53 | -0.33 | -61.0 | "x 2.6" |
"c3" | 0.88 | 2.1 | -1.3 | -59.3 | "x 2.5" |
"absolute_sum_of_changes" | 0.48 | 1.1 | -0.66 | -58.1 | "x 2.4" |
"mean_abs_change" | 0.47 | 1.0 | -0.54 | -53.2 | "x 2.1" |
"sum_reoccurring_values" | 33.8 | 66.4 | -32.6 | -49.0 | "x 2.0" |
"time_reversal_asymmetry_statistic" | 1.6 | 3.0 | -1.5 | -48.4 | "x 1.9" |
"autocorrelation" | 1.3 | 2.4 | -1.1 | -47.1 | "x 1.9" |
"root_mean_square" | 0.45 | 0.83 | -0.38 | -45.9 | "x 1.8" |
"index_mass_quantile" | 2.8 | 4.7 | -1.9 | -39.8 | "x 1.7" |
"mean_change" | 0.0017 | 0.0027 | -0.001 | -36.0 | "x 1.6" |
"permutation_entropy" | 470.2 | 675.4 | -205.2 | -30.4 | "x 1.4" |
"mean_second_derivative_central" | 0.0022 | 0.0028 | -0.0006 | -21.2 | "x 1.3" |
"binned_entropy" | 6.4 | 6.8 | -0.41 | -6.0 | "x 1.1" |
"number_crossings" | 0.17 | 0.17 | -0.0009 | -0.54 | "x 1.0" |
"spkt_welch_density" | 12.3 | 12.1 | 0.2 | 1.7 | "x 0.98" |
"absolute_energy" | 0.45 | 0.44 | 0.0122 | 2.8 | "x 0.97" |
"percent_reoccurring_points" | 22.0 | 20.7 | 1.3 | 6.5 | "x 0.94" |
"last_location_of_maximum" | 0.74 | 0.69 | 0.049 | 7.1 | "x 0.93" |
"last_location_of_minimum" | 0.74 | 0.68 | 0.0622 | 9.2 | "x 0.92" |
"first_location_of_minimum" | 0.57 | 0.34 | 0.23 | 69.2 | "x 0.59" |
"first_location_of_maximum" | 0.57 | 0.34 | 0.23 | 69.2 | "x 0.59" |
9M series¶
df_series_9m
Feature name | functime (ms) | tfresh (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"benford_correlation2" | 31.4 | 5368.5 | -5337.1 | -99.4 | "x 171.2" |
"mean_n_absolute_max" | 13.6 | 599.5 | -585.9 | -97.7 | "x 44.0" |
"benford_correlation" | 511.1 | 5381.9 | -4870.7 | -90.5 | "x 10.5" |
"large_standard_deviation" | 6.4 | 47.1 | -40.6 | -86.4 | "x 7.3" |
"linear_trend" | 50.5 | 368.1 | -317.6 | -86.3 | "x 7.3" |
"absolute_maximum" | 2.3 | 16.1 | -13.8 | -85.7 | "x 7.0" |
"number_peaks" | 15.1 | 102.9 | -87.7 | -85.3 | "x 6.8" |
"var_gt_std" | 3.9 | 26.1 | -22.2 | -85.0 | "x 6.7" |
"ratio_n_unique_to_length" | 94.0 | 610.2 | -516.2 | -84.6 | "x 6.5" |
"energy_ratios" | 11.0 | 70.1 | -59.1 | -84.3 | "x 6.4" |
"count_below_mean" | 2.4 | 15.0 | -12.7 | -84.3 | "x 6.4" |
"count_above_mean" | 2.4 | 14.8 | -12.4 | -83.9 | "x 6.2" |
"variation_coefficient" | 5.1 | 31.1 | -25.9 | -83.5 | "x 6.1" |
"has_duplicate_min" | 2.4 | 14.0 | -11.7 | -83.2 | "x 5.9" |
"has_duplicate_max" | 2.4 | 13.9 | -11.6 | -83.0 | "x 5.9" |
"longest_streak_below_mean" | 92.6 | 542.3 | -449.7 | -82.9 | "x 5.9" |
"longest_streak_above_mean" | 93.3 | 541.4 | -448.1 | -82.8 | "x 5.8" |
"change_quantiles" | 52.8 | 297.3 | -244.5 | -82.2 | "x 5.6" |
"sum_reoccurring_points" | 165.0 | 617.7 | -452.8 | -73.3 | "x 3.7" |
"has_duplicate" | 162.8 | 609.0 | -446.2 | -73.3 | "x 3.7" |
"ratio_beyond_r_sigma" | 13.5 | 48.1 | -34.6 | -71.9 | "x 3.6" |
"c3" | 6.9 | 23.7 | -16.8 | -70.8 | "x 3.4" |
"symmetry_looking" | 31.4 | 105.5 | -74.1 | -70.2 | "x 3.4" |
"range_count" | 2.5 | 7.8 | -5.3 | -68.6 | "x 3.2" |
"absolute_sum_of_changes" | 4.5 | 13.3 | -8.8 | -66.0 | "x 2.9" |
"count_above" | 1.2 | 3.5 | -2.3 | -65.7 | "x 2.9" |
"mean_abs_change" | 4.5 | 13.2 | -8.7 | -65.7 | "x 2.9" |
"count_below" | 1.2 | 3.5 | -2.3 | -65.4 | "x 2.9" |
"cid_ce" | 18.9 | 53.7 | -34.8 | -64.7 | "x 2.8" |
"root_mean_square" | 3.9 | 11.0 | -7.1 | -64.3 | "x 2.8" |
"percent_reoccurring_values" | 235.3 | 619.4 | -384.1 | -62.0 | "x 2.6" |
"time_reversal_asymmetry_statistic" | 12.1 | 29.5 | -17.4 | -58.9 | "x 2.4" |
"autocorrelation" | 12.3 | 28.7 | -16.4 | -57.2 | "x 2.3" |
"sum_reoccurring_values" | 278.0 | 623.7 | -345.7 | -55.4 | "x 2.2" |
"number_crossings" | 1.6 | 2.9 | -1.3 | -45.2 | "x 1.8" |
"permutation_entropy" | 4204.1 | 7104.6 | -2900.5 | -40.8 | "x 1.7" |
"index_mass_quantile" | 27.3 | 45.8 | -18.5 | -40.4 | "x 1.7" |
"mean_change" | 0.0017 | 0.0027 | -0.001 | -37.5 | "x 1.6" |
"percent_reoccurring_points" | 161.6 | 212.9 | -51.3 | -24.1 | "x 1.3" |
"last_location_of_maximum" | 6.7 | 8.6 | -1.9 | -21.8 | "x 1.3" |
"last_location_of_minimum" | 6.8 | 8.6 | -1.8 | -21.2 | "x 1.3" |
"mean_second_derivative_central" | 0.0022 | 0.0028 | -0.0006 | -20.9 | "x 1.3" |
"binned_entropy" | 59.7 | 61.8 | -2.1 | -3.4 | "x 1.0" |
"spkt_welch_density" | 117.5 | 116.8 | 0.7 | 0.6 | "x 0.99" |
"absolute_energy" | 3.9 | 3.9 | 0.0373 | 0.96 | "x 0.99" |
"first_location_of_minimum" | 5.2 | 4.0 | 1.1 | 28.2 | "x 0.78" |
"first_location_of_maximum" | 5.2 | 4.0 | 1.2 | 28.9 | "x 0.78" |
5. Benchmark Group by / Aggregation
context¶
Benchmark combining functime's feature extraction and polars' Group by / Aggregation
context.
_SP500_DATASET = "../../data/sp500.parquet"
SP500_PANDAS = pd.read_parquet(_SP500_DATASET)
SP500_PL_EAGER = pl.read_parquet(_SP500_DATASET)
SP500_PANDAS
ticker | time | price | |
---|---|---|---|
0 | A | 2022-06-01 | 122.278214 |
1 | A | 2022-06-02 | 128.248581 |
2 | A | 2022-06-03 | 127.642609 |
3 | A | 2022-06-06 | 126.788277 |
4 | A | 2022-06-07 | 128.049881 |
... | ... | ... | ... |
126248 | ZTS | 2023-05-24 | 169.139999 |
126249 | ZTS | 2023-05-25 | 165.240005 |
126250 | ZTS | 2023-05-26 | 164.740005 |
126251 | ZTS | 2023-05-30 | 160.940002 |
126252 | ZTS | 2023-05-31 | 163.009995 |
126253 rows × 3 columns
We want to compare tsfresh
using pandas' groupby
with functime
using polars' groupby
such as:
%%timeit
SP500_PANDAS.groupby(
by = "ticker"
)["price"].agg(
tsfresh.number_peaks,
n = 5
)
209 ms ± 673 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
%%timeit
SP500_PL_EAGER.group_by(
pl.col("ticker")
).agg(
pl.col("price").ts.number_peaks(support = 5)
)
22.1 ms ± 522 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
If we examine the previous benchmark, we can see that the number_peaks
operation is approximately 2.5 times faster when using functime
compared to tsfresh
.
In the groupby
context, it's 10 times faster!
def benchmark_groupby_context(
f_feat: Callable, ts_feat: Callable, f_params: dict, ts_params: dict
):
benchmark = perfplot.bench(
setup=lambda _n: (SP500_PL_EAGER, SP500_PANDAS),
kernels=[
lambda x, _y: x.group_by(pl.col("ticker")).agg(
f_feat(pl.col("price"), **f_params)
), # functime + polars groupby
lambda _x, y: y.groupby("ticker")["price"].agg(
ts_feat, **ts_params
), # tsfresh + pandas groupby
],
n_range=[1],
equality_check=False,
labels=["functime", "tsfresh"],
)
return benchmark
def all_benchmarks_groupby(params: list[tuple]) -> list:
bench_df = pl.DataFrame(
schema={
"Feature name": pl.Utf8,
"n": pl.Int64,
"functime + pl groupby (ms)": pl.Float64,
"tfresh + pd groupby (ms)": pl.Float64,
"diff (ms)": pl.Float64,
"diff %": pl.Float64,
"speedup": pl.Float64,
}
)
for x in params:
try:
print(f"Feature: {x[0].__name__}")
bench = benchmark_groupby_context(
f_feat=x[0], ts_feat=x[1], f_params=x[2], ts_params=x[3]
)
bench_df = pl.concat(
[
pl.DataFrame(
{
"Feature name": [x[0].__name__] * len(bench.n_range),
"n": bench.n_range,
"functime + pl groupby (ms)": bench.timings_s[0] * 1_000,
"tfresh + pd groupby (ms)": bench.timings_s[1] * 1_000,
"diff (ms)": (bench.timings_s[0] - bench.timings_s[1])
* 1_000,
"diff %": 100
* (bench.timings_s[0] - bench.timings_s[1])
/ bench.timings_s[1],
"speedup": bench.timings_s[1] / bench.timings_s[0],
}
),
bench_df,
]
)
except ValueError:
print(f"Failed to compute feature {x[0].__name__}")
except ImportError:
print(f"Failed to import feature {x[0].__name__}")
return bench_df
%%capture
bench_groupby = all_benchmarks_groupby(params=FUNC_PARAMS_BENCH)
df_groupby = table_prettifier(df=bench_groupby, n=1)
INFO:functime.feature_extraction.tsfresh:Expression version of approximate_entropy is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extraction.tsfresh:Expression version of autoregressive_coefficients is not yet implemented due to technical difficulty regarding Polars Expression Plugins. The predicate '[(col("price").abs()) == (1000.0)]' in 'when->then->otherwise' is not a valid aggregation and might produce a different number of rows than the group_by operation would. This behavior is experimental and may be subject to change INFO:functime.feature_extraction.tsfresh:Expression version of sample_entropy is not yet implemented due to technical difficulty regarding Polars Expression Plugins. INFO:functime.feature_extraction.tsfresh:Expression version of spkt_welch_density is not yet implemented due to technical difficulty regarding Polars Expression Plugins.
S&P500 groupby¶
df_groupby
Feature name | functime + pl groupby (ms) | tfresh + pd groupby (ms) | diff (ms) | diff % | speedup |
---|---|---|---|---|---|
"energy_ratios" | 2.7 | 634.9 | -632.3 | -99.6 | "x 238.8" |
"range_count" | 1.1 | 38.9 | -37.8 | -97.2 | "x 35.4" |
"percent_reoccurring_points" | 2.6 | 64.4 | -61.9 | -96.0 | "x 25.1" |
"root_mean_square" | 0.91 | 22.5 | -21.6 | -96.0 | "x 24.7" |
"symmetry_looking" | 1.1 | 25.3 | -24.3 | -95.8 | "x 23.7" |
"ratio_beyond_r_sigma" | 2.2 | 50.7 | -48.5 | -95.7 | "x 23.2" |
"count_above" | 0.9 | 19.2 | -18.3 | -95.3 | "x 21.3" |
"change_quantiles" | 5.7 | 114.9 | -109.2 | -95.0 | "x 20.0" |
"count_below" | 0.95 | 19.0 | -18.1 | -95.0 | "x 20.0" |
"cid_ce" | 2.3 | 40.9 | -38.6 | -94.4 | "x 17.9" |
"absolute_maximum" | 0.88 | 14.3 | -13.4 | -93.9 | "x 16.3" |
"variation_coefficient" | 0.95 | 15.1 | -14.2 | -93.7 | "x 15.9" |
"large_standard_deviation" | 1.0 | 16.3 | -15.2 | -93.6 | "x 15.5" |
"c3" | 2.1 | 31.9 | -29.8 | -93.5 | "x 15.4" |
"number_peaks" | 17.2 | 239.0 | -221.8 | -92.8 | "x 13.9" |
"benford_correlation" | 9.2 | 124.6 | -115.4 | -92.6 | "x 13.5" |
"autocorrelation" | 1.5 | 18.4 | -16.9 | -91.9 | "x 12.3" |
"has_duplicate_max" | 1.9 | 22.5 | -20.5 | -91.4 | "x 11.6" |
"has_duplicate_min" | 2.0 | 22.8 | -20.8 | -91.3 | "x 11.6" |
"binned_entropy" | 3.2 | 35.2 | -32.0 | -91.0 | "x 11.1" |
"var_gt_std" | 0.99 | 10.9 | -9.9 | -90.9 | "x 11.0" |
"count_below_mean" | 2.0 | 21.3 | -19.3 | -90.8 | "x 10.8" |
"count_above_mean" | 2.0 | 21.5 | -19.4 | -90.6 | "x 10.6" |
"ratio_n_unique_to_length" | 1.2 | 12.5 | -11.3 | -90.1 | "x 10.1" |
"mean_change" | 0.77 | 7.3 | -6.6 | -89.5 | "x 9.5" |
"mean_n_absolute_max" | 1.9 | 17.0 | -15.1 | -88.9 | "x 9.0" |
"first_location_of_maximum" | 1.2 | 10.3 | -9.1 | -88.7 | "x 8.8" |
"absolute_energy" | 1.1 | 9.5 | -8.5 | -88.6 | "x 8.8" |
"first_location_of_minimum" | 1.2 | 10.3 | -9.1 | -88.5 | "x 8.7" |
"longest_streak_above_mean" | 4.2 | 30.3 | -26.1 | -86.1 | "x 7.2" |
"longest_streak_below_mean" | 4.2 | 29.7 | -25.5 | -85.8 | "x 7.0" |
"linear_trend" | 4.2 | 26.4 | -22.2 | -84.1 | "x 6.3" |
"percent_reoccurring_values" | 2.7 | 15.4 | -12.7 | -82.6 | "x 5.7" |
"sum_reoccurring_points" | 2.8 | 15.6 | -12.8 | -82.2 | "x 5.6" |
"mean_abs_change" | 1.8 | 9.9 | -8.1 | -82.2 | "x 5.6" |
"absolute_sum_of_changes" | 1.7 | 8.9 | -7.3 | -81.1 | "x 5.3" |
"last_location_of_maximum" | 1.6 | 7.9 | -6.3 | -80.1 | "x 5.0" |
"index_mass_quantile" | 2.1 | 10.7 | -8.5 | -80.0 | "x 5.0" |
"time_reversal_asymmetry_statistic" | 2.2 | 11.1 | -8.9 | -79.8 | "x 4.9" |
"last_location_of_minimum" | 1.7 | 8.0 | -6.3 | -78.5 | "x 4.6" |
"has_duplicate" | 2.7 | 11.7 | -9.0 | -77.3 | "x 4.4" |
"mean_second_derivative_central" | 1.6 | 7.2 | -5.6 | -77.1 | "x 4.4" |
"number_crossings" | 4.4 | 16.9 | -12.4 | -73.7 | "x 3.8" |
"sum_reoccurring_values" | 5.4 | 16.1 | -10.8 | -66.8 | "x 3.0" |
"permutation_entropy" | 66.4 | 56.2 | 10.2 | 18.2 | "x 0.85" |