Examples¶
Bernstein regression¶
"""Bernstein polynomial features for non-linear regression."""
# %%
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import QuantileTransformer
from skpoly import BernsteinFeatures
# %%
# Create a smooth, non-linear target function on [0, 1]
random_state = np.random.RandomState(0)
X = np.linspace(0, 1, 200)[:, None]
y = (
np.sin(2 * np.pi * X[:, 0])
+ 0.3 * np.cos(6 * np.pi * X[:, 0])
+ random_state.normal(scale=0.1, size=X.shape[0])
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=random_state)
# %%
n_quantiles = min(100, X_train.shape[0])
pipeline = make_pipeline(
QuantileTransformer(
n_quantiles=n_quantiles,
output_distribution="uniform",
random_state=random_state,
),
BernsteinFeatures(degree=10, include_bias=True, tensor_product=False),
Ridge(alpha=1e-2),
)
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
print(f"MAE on the held-out set: {mean_absolute_error(y_test, y_pred):.3f}")
Univariate Bernstein regression¶
"""Visualize a Bernstein polynomial regression fit on a 1D function."""
# %%
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from skpoly import BernsteinFeatures
# %%
def target_function(x: np.ndarray) -> np.ndarray:
"""Smooth ground-truth signal defined on [-1, 1]."""
return np.sin(np.pi * x) + 0.3 * np.cos(4 * np.pi * x)
random_state = np.random.RandomState(0)
X = np.linspace(-1.0, 1.0, 200)[:, None]
y = target_function(X[:, 0]) + random_state.normal(scale=0.1, size=X.shape[0])
# %%
pipeline = make_pipeline(
MinMaxScaler(),
BernsteinFeatures(degree=12, include_bias=True, tensor_product=False),
LinearRegression(),
)
pipeline.fit(X, y)
# %%
X_plot = np.linspace(-1.0, 1.0, 400)[:, None]
y_true = target_function(X_plot[:, 0])
y_pred = pipeline.predict(X_plot)
print(f"MAE on the dense grid: {mean_absolute_error(y_true, y_pred):.3f}")
plt.figure(figsize=(7, 4))
plt.scatter(X[:, 0], y, color="#4c72b0", alpha=0.5, s=20, label="Noisy samples")
plt.plot(X_plot[:, 0], y_true, color="#55a868", linewidth=2, label="True function")
plt.plot(
X_plot[:, 0],
y_pred,
color="#c44e52",
linewidth=2,
label="Bernstein approximation",
)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Bernstein polynomial regression with feature scaling")
plt.legend()
plt.tight_layout()
plt.show()
Legendre classification¶
"""Legendre polynomial features for non-linear classification."""
# %%
from sklearn.datasets import make_moons
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from skpoly import LegendreFeatures
# %%
X, y = make_moons(noise=0.25, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.25, stratify=y, random_state=0
)
# %%
pipeline = make_pipeline(
MinMaxScaler(),
LegendreFeatures(degree=5, include_bias=False, tensor_product=True),
LogisticRegression(max_iter=2000),
)
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
print(f"Accuracy on the held-out set: {accuracy_score(y_test, y_pred):.3f}")