Skip to content

Smart Pareto Filter

smart_pareto

Smart Pareto Filter implementation.

Based on the paper: "Smart Pareto filter: obtaining a minimal representation of multiobjective design space" by Mattson, C. A., Mullur, A. A., & Messac, A. (2004)

The Smart Pareto Filter selects a minimal representative subset of Pareto-optimal solutions that maintains the essential trade-off information while reducing the number of points for visualization and decision making.

Functions

smart_pareto_filter

smart_pareto_filter(objectives: ndarray | DataFrame, epsilon: float | None = None, max_points: int | None = None, normalize: bool = True, metric: str = 'euclidean') -> ndarray

Apply Smart Pareto Filter to select representative Pareto-optimal points.

The algorithm iteratively selects points that are maximally separated in the objective space, ensuring a well-distributed representation of the Pareto front.

Parameters:

Name Type Description Default
objectives ndarray | DataFrame

Array or DataFrame of objective values (n_points, n_objectives) All objectives assumed to be minimized

required
epsilon float | None

Minimum normalized distance threshold. If None, calculated automatically

None
max_points int | None

Maximum number of points to select. If None, uses epsilon criterion

None
normalize bool

Whether to normalize objectives to [0, 1] range

True
metric str

Distance metric for scipy.spatial.distance.cdist

'euclidean'

Returns:

Type Description
ndarray

Array of indices of selected representative points

Note

Input objectives should all be for minimization. If you have maximization objectives, negate them before passing to this function.

Source code in optiscope/analysis/smart_pareto.py
def smart_pareto_filter(
    objectives: np.ndarray | pd.DataFrame,
    epsilon: float | None = None,
    max_points: int | None = None,
    normalize: bool = True,
    metric: str = "euclidean",
) -> np.ndarray:
    """
    Apply Smart Pareto Filter to select representative Pareto-optimal points.

    The algorithm iteratively selects points that are maximally separated in
    the objective space, ensuring a well-distributed representation of the
    Pareto front.

    Args:
        objectives: Array or DataFrame of objective values (n_points, n_objectives)
                   All objectives assumed to be minimized
        epsilon: Minimum normalized distance threshold. If None, calculated automatically
        max_points: Maximum number of points to select. If None, uses epsilon criterion
        normalize: Whether to normalize objectives to [0, 1] range
        metric: Distance metric for scipy.spatial.distance.cdist

    Returns:
        Array of indices of selected representative points

    Note:
        Input objectives should all be for minimization. If you have maximization
        objectives, negate them before passing to this function.
    """
    # Convert to numpy array if DataFrame
    if isinstance(objectives, pd.DataFrame):
        obj_array = objectives.values
    else:
        obj_array = np.asarray(objectives)

    n_points, n_objectives = obj_array.shape

    if n_points == 0:
        return np.array([], dtype=int)

    if n_points == 1:
        return np.array([0])

    # Normalize objectives to [0, 1] if requested
    if normalize:
        obj_min = obj_array.min(axis=0)
        obj_max = obj_array.max(axis=0)
        obj_range = obj_max - obj_min

        # Handle constant objectives
        obj_range[obj_range < 1e-10] = 1.0

        obj_normalized = (obj_array - obj_min) / obj_range
    else:
        obj_normalized = obj_array.copy()

    # Calculate epsilon if not provided
    if epsilon is None:
        # Use average distance between points divided by number of objectives
        # This provides a reasonable default based on problem dimensionality
        distances = cdist(obj_normalized, obj_normalized, metric=metric)
        np.fill_diagonal(distances, np.inf)
        avg_min_distance = np.mean(np.min(distances, axis=1))
        epsilon = avg_min_distance * np.sqrt(n_objectives)

    # Initialize with extreme points (anchor points)
    selected_indices = _get_anchor_points(obj_normalized)
    selected_set = set(selected_indices)

    if max_points and len(selected_indices) >= max_points:
        return np.array(selected_indices[:max_points])

    # Iteratively add points that are maximally separated from selected points
    with tqdm(total=max_points or n_points, desc="Applying Smart Pareto Filter") as pbar:
        pbar.update(len(selected_indices))
        while True:
            # Calculate minimum distance from each unselected point to selected points
            unselected_mask = np.ones(n_points, dtype=bool)
            unselected_mask[list(selected_set)] = False
            unselected_indices = np.where(unselected_mask)[0]

            if len(unselected_indices) == 0:
                break

            # Compute distances from unselected to selected points
            distances_to_selected = cdist(
                obj_normalized[unselected_indices],
                obj_normalized[list(selected_set)],
                metric=metric,
            )

            # Find minimum distance for each unselected point
            min_distances = distances_to_selected.min(axis=1)

            # Find point with maximum minimum distance (most isolated point)
            max_min_idx = min_distances.argmax()
            max_min_distance = min_distances[max_min_idx]

            # Check stopping criteria
            if max_min_distance < epsilon:
                break

            if max_points and len(selected_indices) >= max_points:
                break

            # Add the most isolated point
            new_point_idx = unselected_indices[max_min_idx]
            selected_indices.append(int(new_point_idx))
            selected_set.add(int(new_point_idx))
            pbar.update(1)

    return np.array(selected_indices)

adaptive_smart_pareto_filter

adaptive_smart_pareto_filter(objectives: ndarray | DataFrame, target_reduction: float = 0.5, min_points: int = 5, max_points: int | None = None, normalize: bool = True) -> ndarray

Adaptive Smart Pareto Filter that automatically determines epsilon.

Adjusts epsilon to achieve a target reduction in the number of points while ensuring a minimum number of representative points.

Parameters:

Name Type Description Default
objectives ndarray | DataFrame

Array or DataFrame of objective values (n_points, n_objectives)

required
target_reduction float

Target fraction of points to keep (0.5 = keep 50%)

0.5
min_points int

Minimum number of points to select

5
max_points int | None

Maximum number of points to select

None
normalize bool

Whether to normalize objectives

True

Returns:

Type Description
ndarray

Array of indices of selected representative points

Source code in optiscope/analysis/smart_pareto.py
def adaptive_smart_pareto_filter(
    objectives: np.ndarray | pd.DataFrame,
    target_reduction: float = 0.5,
    min_points: int = 5,
    max_points: int | None = None,
    normalize: bool = True,
) -> np.ndarray:
    """
    Adaptive Smart Pareto Filter that automatically determines epsilon.

    Adjusts epsilon to achieve a target reduction in the number of points
    while ensuring a minimum number of representative points.

    Args:
        objectives: Array or DataFrame of objective values (n_points, n_objectives)
        target_reduction: Target fraction of points to keep (0.5 = keep 50%)
        min_points: Minimum number of points to select
        max_points: Maximum number of points to select
        normalize: Whether to normalize objectives

    Returns:
        Array of indices of selected representative points
    """
    if isinstance(objectives, pd.DataFrame):
        obj_array = objectives.values
    else:
        obj_array = np.asarray(objectives)

    n_points = len(obj_array)
    target_n_points = max(min_points, int(n_points * target_reduction))

    if max_points:
        target_n_points = min(target_n_points, max_points)

    # Binary search for epsilon that gives desired number of points
    obj_normalized = obj_array.copy()
    if normalize:
        obj_min = obj_array.min(axis=0)
        obj_max = obj_array.max(axis=0)
        obj_range = obj_max - obj_min
        obj_range[obj_range < 1e-10] = 1.0
        obj_normalized = (obj_array - obj_min) / obj_range

    # Initial epsilon bounds
    distances = cdist(obj_normalized, obj_normalized, metric="euclidean")
    np.fill_diagonal(distances, np.inf)
    min_possible_epsilon = np.min(distances) * 0.1
    max_possible_epsilon = np.max(distances) * 2.0

    # Binary search
    epsilon_low = min_possible_epsilon
    epsilon_high = max_possible_epsilon
    best_epsilon = None
    best_diff = float("inf")

    for _ in range(20):  # Maximum iterations
        epsilon_mid = (epsilon_low + epsilon_high) / 2

        indices = smart_pareto_filter(objectives, epsilon=float(epsilon_mid), normalize=normalize)

        n_selected = len(indices)
        diff = abs(n_selected - target_n_points)

        if diff < best_diff:
            best_diff = diff
            best_epsilon = epsilon_mid

        if n_selected > target_n_points:
            # Too many points, increase epsilon
            epsilon_low = epsilon_mid
        elif n_selected < target_n_points:
            # Too few points, decrease epsilon
            epsilon_high = epsilon_mid
        else:
            # Perfect match
            break

        # Stop if we're close enough
        if diff <= 1:
            break

    return smart_pareto_filter(
        objectives,
        epsilon=float(best_epsilon) if best_epsilon is not None else None,
        normalize=normalize,
    )

visualize_filter_effect

visualize_filter_effect(objectives: ndarray | DataFrame, selected_indices: ndarray, obj_i: int = 0, obj_j: int = 1) -> dict

Create data for visualizing the effect of Smart Pareto Filter.

Parameters:

Name Type Description Default
objectives ndarray | DataFrame

Original objective array

required
selected_indices ndarray

Indices selected by filter

required
obj_i int

First objective index for 2D visualization

0
obj_j int

Second objective index for 2D visualization

1

Returns:

Type Description
dict

Dictionary with visualization data

Source code in optiscope/analysis/smart_pareto.py
def visualize_filter_effect(
    objectives: np.ndarray | pd.DataFrame,
    selected_indices: np.ndarray,
    obj_i: int = 0,
    obj_j: int = 1,
) -> dict:
    """
    Create data for visualizing the effect of Smart Pareto Filter.

    Args:
        objectives: Original objective array
        selected_indices: Indices selected by filter
        obj_i: First objective index for 2D visualization
        obj_j: Second objective index for 2D visualization

    Returns:
        Dictionary with visualization data
    """
    if isinstance(objectives, pd.DataFrame):
        obj_array = objectives.values
        obj_names = objectives.columns.tolist()
    else:
        obj_array = np.asarray(objectives)
        obj_names = [f"f{i + 1}" for i in range(obj_array.shape[1])]

    n_points = len(obj_array)
    all_indices = np.arange(n_points)
    filtered_indices = np.setdiff1d(all_indices, selected_indices)

    return {
        "all_points": obj_array,
        "selected_points": obj_array[selected_indices],
        "filtered_points": obj_array[filtered_indices],
        "selected_indices": selected_indices,
        "filtered_indices": filtered_indices,
        "obj_names": obj_names,
        "reduction_ratio": len(selected_indices) / n_points,
        "n_original": n_points,
        "n_filtered": len(selected_indices),
    }

calculate_coverage_metric

calculate_coverage_metric(objectives: ndarray | DataFrame, selected_indices: ndarray, normalize: bool = True) -> dict

Calculate coverage metrics for filtered Pareto front.

Measures how well the filtered set represents the original Pareto front.

Parameters:

Name Type Description Default
objectives ndarray | DataFrame

Original objective array

required
selected_indices ndarray

Indices selected by filter

required
normalize bool

Whether to normalize objectives

True

Returns:

Type Description
dict

Dictionary with coverage metrics

Source code in optiscope/analysis/smart_pareto.py
def calculate_coverage_metric(
    objectives: np.ndarray | pd.DataFrame,
    selected_indices: np.ndarray,
    normalize: bool = True,
) -> dict:
    """
    Calculate coverage metrics for filtered Pareto front.

    Measures how well the filtered set represents the original Pareto front.

    Args:
        objectives: Original objective array
        selected_indices: Indices selected by filter
        normalize: Whether to normalize objectives

    Returns:
        Dictionary with coverage metrics
    """
    if isinstance(objectives, pd.DataFrame):
        obj_array = objectives.values
    else:
        obj_array = np.asarray(objectives)

    if normalize:
        obj_min = obj_array.min(axis=0)
        obj_max = obj_array.max(axis=0)
        obj_range = obj_max - obj_min
        obj_range[obj_range < 1e-10] = 1.0
        obj_normalized = (obj_array - obj_min) / obj_range
    else:
        obj_normalized = obj_array

    selected_points = obj_normalized[selected_indices]

    # Calculate minimum distance from each original point to selected points
    distances = cdist(obj_normalized, selected_points, metric="euclidean")
    min_distances = distances.min(axis=1)

    # Coverage metrics
    max_distance = min_distances.max()
    avg_distance = min_distances.mean()
    std_distance = min_distances.std()

    # Uniformity: standard deviation of distances between consecutive selected points
    if len(selected_indices) > 1:
        selected_distances = cdist(selected_points, selected_points, metric="euclidean")
        np.fill_diagonal(selected_distances, np.inf)
        min_neighbor_distances = selected_distances.min(axis=1)
        uniformity = min_neighbor_distances.std()
    else:
        uniformity = 0.0

    return {
        "max_distance_to_selected": float(max_distance),
        "avg_distance_to_selected": float(avg_distance),
        "std_distance_to_selected": float(std_distance),
        "uniformity": float(uniformity),
        "coverage_ratio": 1.0 - (avg_distance / np.sqrt(obj_normalized.shape[1])),
    }