235 lines
7.0 KiB
Python
235 lines
7.0 KiB
Python
from unittest.mock import Mock
|
|
|
|
import numpy as np
|
|
import pytest
|
|
from numpy.testing import assert_allclose, assert_array_almost_equal, assert_equal
|
|
|
|
from sklearn.datasets import load_digits
|
|
from sklearn.manifold import _mds as mds
|
|
from sklearn.metrics import euclidean_distances
|
|
|
|
|
|
def test_smacof():
|
|
# test metric smacof using the data of "Modern Multidimensional Scaling",
|
|
# Borg & Groenen, p 154
|
|
sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
|
|
Z = np.array([[-0.266, -0.539], [0.451, 0.252], [0.016, -0.238], [-0.200, 0.524]])
|
|
X, _ = mds.smacof(sim, init=Z, n_components=2, max_iter=1, n_init=1)
|
|
X_true = np.array(
|
|
[[-1.415, -2.471], [1.633, 1.107], [0.249, -0.067], [-0.468, 1.431]]
|
|
)
|
|
assert_array_almost_equal(X, X_true, decimal=3)
|
|
|
|
|
|
def test_nonmetric_lower_normalized_stress():
|
|
# Testing that nonmetric MDS results in lower normalized stress compared
|
|
# compared to metric MDS (non-regression test for issue 27028)
|
|
sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
|
|
Z = np.array([[-0.266, -0.539], [0.451, 0.252], [0.016, -0.238], [-0.200, 0.524]])
|
|
|
|
_, stress1 = mds.smacof(
|
|
sim, init=Z, n_components=2, max_iter=1000, n_init=1, normalized_stress=True
|
|
)
|
|
|
|
_, stress2 = mds.smacof(
|
|
sim,
|
|
init=Z,
|
|
n_components=2,
|
|
max_iter=1000,
|
|
n_init=1,
|
|
normalized_stress=True,
|
|
metric=False,
|
|
)
|
|
assert stress1 > stress2
|
|
|
|
|
|
def test_nonmetric_mds_optimization():
|
|
# Test that stress is decreasing during nonmetric MDS optimization
|
|
# (non-regression test for issue 27028)
|
|
X, _ = load_digits(return_X_y=True)
|
|
rng = np.random.default_rng(seed=42)
|
|
ind_subset = rng.choice(len(X), size=200, replace=False)
|
|
X = X[ind_subset]
|
|
|
|
mds_est = mds.MDS(
|
|
n_components=2,
|
|
n_init=1,
|
|
max_iter=2,
|
|
metric=False,
|
|
random_state=42,
|
|
).fit(X)
|
|
stress_after_2_iter = mds_est.stress_
|
|
|
|
mds_est = mds.MDS(
|
|
n_components=2,
|
|
n_init=1,
|
|
max_iter=3,
|
|
metric=False,
|
|
random_state=42,
|
|
).fit(X)
|
|
stress_after_3_iter = mds_est.stress_
|
|
|
|
assert stress_after_2_iter > stress_after_3_iter
|
|
|
|
|
|
@pytest.mark.parametrize("metric", [True, False])
|
|
def test_mds_recovers_true_data(metric):
|
|
X = np.array([[1, 1], [1, 4], [1, 5], [3, 3]])
|
|
mds_est = mds.MDS(
|
|
n_components=2,
|
|
n_init=1,
|
|
eps=1e-15,
|
|
max_iter=1000,
|
|
metric=metric,
|
|
random_state=42,
|
|
).fit(X)
|
|
stress = mds_est.stress_
|
|
assert_allclose(stress, 0, atol=1e-6)
|
|
|
|
|
|
def test_smacof_error():
|
|
# Not symmetric similarity matrix:
|
|
sim = np.array([[0, 5, 9, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
|
|
|
|
with pytest.raises(ValueError):
|
|
mds.smacof(sim, n_init=1)
|
|
|
|
# Not squared similarity matrix:
|
|
sim = np.array([[0, 5, 9, 4], [5, 0, 2, 2], [4, 2, 1, 0]])
|
|
|
|
with pytest.raises(ValueError):
|
|
mds.smacof(sim, n_init=1)
|
|
|
|
# init not None and not correct format:
|
|
sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
|
|
|
|
Z = np.array([[-0.266, -0.539], [0.016, -0.238], [-0.200, 0.524]])
|
|
with pytest.raises(ValueError):
|
|
mds.smacof(sim, init=Z, n_init=1)
|
|
|
|
|
|
def test_MDS():
|
|
sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
|
|
mds_clf = mds.MDS(
|
|
metric=False,
|
|
n_jobs=3,
|
|
n_init=3,
|
|
dissimilarity="precomputed",
|
|
)
|
|
mds_clf.fit(sim)
|
|
|
|
|
|
# TODO(1.9): remove warning filter
|
|
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
|
@pytest.mark.parametrize("k", [0.5, 1.5, 2])
|
|
def test_normed_stress(k):
|
|
"""Test that non-metric MDS normalized stress is scale-invariant."""
|
|
sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
|
|
|
|
X1, stress1 = mds.smacof(sim, metric=False, max_iter=5, random_state=0)
|
|
X2, stress2 = mds.smacof(k * sim, metric=False, max_iter=5, random_state=0)
|
|
|
|
assert_allclose(stress1, stress2, rtol=1e-5)
|
|
assert_allclose(X1, X2, rtol=1e-5)
|
|
|
|
|
|
# TODO(1.9): remove warning filter
|
|
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
|
@pytest.mark.parametrize("metric", [True, False])
|
|
def test_normalized_stress_auto(metric, monkeypatch):
|
|
rng = np.random.RandomState(0)
|
|
X = rng.randn(4, 3)
|
|
dist = euclidean_distances(X)
|
|
|
|
mock = Mock(side_effect=mds._smacof_single)
|
|
monkeypatch.setattr("sklearn.manifold._mds._smacof_single", mock)
|
|
|
|
est = mds.MDS(metric=metric, normalized_stress="auto", random_state=rng)
|
|
est.fit_transform(X)
|
|
assert mock.call_args[1]["normalized_stress"] != metric
|
|
|
|
mds.smacof(dist, metric=metric, normalized_stress="auto", random_state=rng)
|
|
assert mock.call_args[1]["normalized_stress"] != metric
|
|
|
|
|
|
def test_isotonic_outofbounds():
|
|
# This particular configuration can trigger out of bounds error
|
|
# in the isotonic regression (non-regression test for issue 26999)
|
|
dis = np.array(
|
|
[
|
|
[0.0, 1.732050807568877, 1.7320508075688772],
|
|
[1.732050807568877, 0.0, 6.661338147750939e-16],
|
|
[1.7320508075688772, 6.661338147750939e-16, 0.0],
|
|
]
|
|
)
|
|
init = np.array(
|
|
[
|
|
[0.08665881585055124, 0.7939114643387546],
|
|
[0.9959834154297658, 0.7555546025640025],
|
|
[0.8766008278401566, 0.4227358815811242],
|
|
]
|
|
)
|
|
mds.smacof(dis, init=init, metric=False, n_init=1)
|
|
|
|
|
|
# TODO(1.9): remove warning filter
|
|
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
|
@pytest.mark.parametrize("normalized_stress", [True, False])
|
|
def test_returned_stress(normalized_stress):
|
|
# Test that the final stress corresponds to the final embedding
|
|
# (non-regression test for issue 16846)
|
|
X = np.array([[1, 1], [1, 4], [1, 5], [3, 3]])
|
|
D = euclidean_distances(X)
|
|
|
|
mds_est = mds.MDS(
|
|
n_components=2,
|
|
random_state=42,
|
|
normalized_stress=normalized_stress,
|
|
).fit(X)
|
|
|
|
Z = mds_est.embedding_
|
|
stress = mds_est.stress_
|
|
|
|
D_mds = euclidean_distances(Z)
|
|
stress_Z = ((D_mds.ravel() - D.ravel()) ** 2).sum() / 2
|
|
|
|
if normalized_stress:
|
|
stress_Z = np.sqrt(stress_Z / ((D_mds.ravel() ** 2).sum() / 2))
|
|
|
|
assert_allclose(stress, stress_Z)
|
|
|
|
|
|
# TODO(1.9): remove warning filter
|
|
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
|
@pytest.mark.parametrize("metric", [True, False])
|
|
def test_convergence_does_not_depend_on_scale(metric):
|
|
# Test that the number of iterations until convergence does not depend on
|
|
# the scale of the input data
|
|
X = np.array([[1, 1], [1, 4], [1, 5], [3, 3]])
|
|
|
|
mds_est = mds.MDS(
|
|
n_components=2,
|
|
random_state=42,
|
|
metric=metric,
|
|
)
|
|
|
|
mds_est.fit(X * 100)
|
|
n_iter1 = mds_est.n_iter_
|
|
|
|
mds_est.fit(X / 100)
|
|
n_iter2 = mds_est.n_iter_
|
|
|
|
assert_equal(n_iter1, n_iter2)
|
|
|
|
|
|
# TODO(1.9): delete this test
|
|
def test_future_warning_n_init():
|
|
X = np.array([[1, 1], [1, 4], [1, 5], [3, 3]])
|
|
sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
|
|
|
|
with pytest.warns(FutureWarning):
|
|
mds.smacof(sim)
|
|
|
|
with pytest.warns(FutureWarning):
|
|
mds.MDS().fit(X)
|