Smart Pareto Filter¶

smart_pareto ¶

Smart Pareto Filter implementation.

Based on the paper: "Smart Pareto filter: obtaining a minimal representation of multiobjective design space" by Mattson, C. A., Mullur, A. A., & Messac, A. (2004)

The Smart Pareto Filter selects a minimal representative subset of Pareto-optimal solutions that maintains the essential trade-off information while reducing the number of points for visualization and decision making.

Functions¶

smart_pareto_filter ¶

smart_pareto_filter(objectives: ndarray | DataFrame, epsilon: float | None = None, max_points: int | None = None, normalize: bool = True, metric: str = 'euclidean') -> ndarray

Apply Smart Pareto Filter to select representative Pareto-optimal points.

The algorithm iteratively selects points that are maximally separated in the objective space, ensuring a well-distributed representation of the Pareto front.

Parameters:

Name	Type	Description	Default
`objectives`	`ndarray \| DataFrame`	Array or DataFrame of objective values (n_points, n_objectives) All objectives assumed to be minimized	required
`epsilon`	`float \| None`	Minimum normalized distance threshold. If None, calculated automatically	`None`
`max_points`	`int \| None`	Maximum number of points to select. If None, uses epsilon criterion	`None`
`normalize`	`bool`	Whether to normalize objectives to [0, 1] range	`True`
`metric`	`str`	Distance metric for scipy.spatial.distance.cdist	`'euclidean'`

Returns:

Type	Description
`ndarray`	Array of indices of selected representative points

Note

Input objectives should all be for minimization. If you have maximization objectives, negate them before passing to this function.

Source code in optiscope/analysis/smart_pareto.py

def smart_pareto_filter(
    objectives: np.ndarray | pd.DataFrame,
    epsilon: float | None = None,
    max_points: int | None = None,
    normalize: bool = True,
    metric: str = "euclidean",
) -> np.ndarray:
    """
    Apply Smart Pareto Filter to select representative Pareto-optimal points.

    The algorithm iteratively selects points that are maximally separated in
    the objective space, ensuring a well-distributed representation of the
    Pareto front.

    Args:
        objectives: Array or DataFrame of objective values (n_points, n_objectives)
                   All objectives assumed to be minimized
        epsilon: Minimum normalized distance threshold. If None, calculated automatically
        max_points: Maximum number of points to select. If None, uses epsilon criterion
        normalize: Whether to normalize objectives to [0, 1] range
        metric: Distance metric for scipy.spatial.distance.cdist

    Returns:
        Array of indices of selected representative points

    Note:
        Input objectives should all be for minimization. If you have maximization
        objectives, negate them before passing to this function.
    """
    # Convert to numpy array if DataFrame
    if isinstance(objectives, pd.DataFrame):
        obj_array = objectives.values
    else:
        obj_array = np.asarray(objectives)

    n_points, n_objectives = obj_array.shape

    if n_points == 0:
        return np.array([], dtype=int)

    if n_points == 1:
        return np.array([0])

    # Normalize objectives to [0, 1] if requested
    if normalize:
        obj_min = obj_array.min(axis=0)
        obj_max = obj_array.max(axis=0)
        obj_range = obj_max - obj_min

        # Handle constant objectives
        obj_range[obj_range < 1e-10] = 1.0

        obj_normalized = (obj_array - obj_min) / obj_range
    else:
        obj_normalized = obj_array.copy()

    # Calculate epsilon if not provided
    if epsilon is None:
        # Use average distance between points divided by number of objectives
        # This provides a reasonable default based on problem dimensionality
        distances = cdist(obj_normalized, obj_normalized, metric=metric)
        np.fill_diagonal(distances, np.inf)
        avg_min_distance = np.mean(np.min(distances, axis=1))
        epsilon = avg_min_distance * np.sqrt(n_objectives)

    # Initialize with extreme points (anchor points)
    selected_indices = _get_anchor_points(obj_normalized)
    selected_set = set(selected_indices)

    if max_points and len(selected_indices) >= max_points:
        return np.array(selected_indices[:max_points])

    # Iteratively add points that are maximally separated from selected points
    with tqdm(total=max_points or n_points, desc="Applying Smart Pareto Filter") as pbar:
        pbar.update(len(selected_indices))
        while True:
            # Calculate minimum distance from each unselected point to selected points
            unselected_mask = np.ones(n_points, dtype=bool)
            unselected_mask[list(selected_set)] = False
            unselected_indices = np.where(unselected_mask)[0]

            if len(unselected_indices) == 0:
                break

            # Compute distances from unselected to selected points
            distances_to_selected = cdist(
                obj_normalized[unselected_indices],
                obj_normalized[list(selected_set)],
                metric=metric,
            )

            # Find minimum distance for each unselected point
            min_distances = distances_to_selected.min(axis=1)

            # Find point with maximum minimum distance (most isolated point)
            max_min_idx = min_distances.argmax()
            max_min_distance = min_distances[max_min_idx]

            # Check stopping criteria
            if max_min_distance < epsilon:
                break

            if max_points and len(selected_indices) >= max_points:
                break

            # Add the most isolated point
            new_point_idx = unselected_indices[max_min_idx]
            selected_indices.append(int(new_point_idx))
            selected_set.add(int(new_point_idx))
            pbar.update(1)

    return np.array(selected_indices)

adaptive_smart_pareto_filter ¶

adaptive_smart_pareto_filter(objectives: ndarray | DataFrame, target_reduction: float = 0.5, min_points: int = 5, max_points: int | None = None, normalize: bool = True) -> ndarray

Adaptive Smart Pareto Filter that automatically determines epsilon.

Adjusts epsilon to achieve a target reduction in the number of points while ensuring a minimum number of representative points.

Parameters:

Name	Type	Description	Default
`objectives`	`ndarray \| DataFrame`	Array or DataFrame of objective values (n_points, n_objectives)	required
`target_reduction`	`float`	Target fraction of points to keep (0.5 = keep 50%)	`0.5`
`min_points`	`int`	Minimum number of points to select	`5`
`max_points`	`int \| None`	Maximum number of points to select	`None`
`normalize`	`bool`	Whether to normalize objectives	`True`

Returns:

Type	Description
`ndarray`	Array of indices of selected representative points

Source code in optiscope/analysis/smart_pareto.py

def adaptive_smart_pareto_filter(
    objectives: np.ndarray | pd.DataFrame,
    target_reduction: float = 0.5,
    min_points: int = 5,
    max_points: int | None = None,
    normalize: bool = True,
) -> np.ndarray:
    """
    Adaptive Smart Pareto Filter that automatically determines epsilon.

    Adjusts epsilon to achieve a target reduction in the number of points
    while ensuring a minimum number of representative points.

    Args:
        objectives: Array or DataFrame of objective values (n_points, n_objectives)
        target_reduction: Target fraction of points to keep (0.5 = keep 50%)
        min_points: Minimum number of points to select
        max_points: Maximum number of points to select
        normalize: Whether to normalize objectives

    Returns:
        Array of indices of selected representative points
    """
    if isinstance(objectives, pd.DataFrame):
        obj_array = objectives.values
    else:
        obj_array = np.asarray(objectives)

    n_points = len(obj_array)
    target_n_points = max(min_points, int(n_points * target_reduction))

    if max_points:
        target_n_points = min(target_n_points, max_points)

    # Binary search for epsilon that gives desired number of points
    obj_normalized = obj_array.copy()
    if normalize:
        obj_min = obj_array.min(axis=0)
        obj_max = obj_array.max(axis=0)
        obj_range = obj_max - obj_min
        obj_range[obj_range < 1e-10] = 1.0
        obj_normalized = (obj_array - obj_min) / obj_range

    # Initial epsilon bounds
    distances = cdist(obj_normalized, obj_normalized, metric="euclidean")
    np.fill_diagonal(distances, np.inf)
    min_possible_epsilon = np.min(distances) * 0.1
    max_possible_epsilon = np.max(distances) * 2.0

    # Binary search
    epsilon_low = min_possible_epsilon
    epsilon_high = max_possible_epsilon
    best_epsilon = None
    best_diff = float("inf")

    for _ in range(20):  # Maximum iterations
        epsilon_mid = (epsilon_low + epsilon_high) / 2

        indices = smart_pareto_filter(objectives, epsilon=float(epsilon_mid), normalize=normalize)

        n_selected = len(indices)
        diff = abs(n_selected - target_n_points)

        if diff < best_diff:
            best_diff = diff
            best_epsilon = epsilon_mid

        if n_selected > target_n_points:
            # Too many points, increase epsilon
            epsilon_low = epsilon_mid
        elif n_selected < target_n_points:
            # Too few points, decrease epsilon
            epsilon_high = epsilon_mid
        else:
            # Perfect match
            break

        # Stop if we're close enough
        if diff <= 1:
            break

    return smart_pareto_filter(
        objectives,
        epsilon=float(best_epsilon) if best_epsilon is not None else None,
        normalize=normalize,
    )

visualize_filter_effect ¶

visualize_filter_effect(objectives: ndarray | DataFrame, selected_indices: ndarray, obj_i: int = 0, obj_j: int = 1) -> dict

Create data for visualizing the effect of Smart Pareto Filter.

Parameters:

Name	Type	Description	Default
`objectives`	`ndarray \| DataFrame`	Original objective array	required
`selected_indices`	`ndarray`	Indices selected by filter	required
`obj_i`	`int`	First objective index for 2D visualization	`0`
`obj_j`	`int`	Second objective index for 2D visualization	`1`

Returns:

Type	Description
`dict`	Dictionary with visualization data

Source code in optiscope/analysis/smart_pareto.py

def visualize_filter_effect(
    objectives: np.ndarray | pd.DataFrame,
    selected_indices: np.ndarray,
    obj_i: int = 0,
    obj_j: int = 1,
) -> dict:
    """
    Create data for visualizing the effect of Smart Pareto Filter.

    Args:
        objectives: Original objective array
        selected_indices: Indices selected by filter
        obj_i: First objective index for 2D visualization
        obj_j: Second objective index for 2D visualization

    Returns:
        Dictionary with visualization data
    """
    if isinstance(objectives, pd.DataFrame):
        obj_array = objectives.values
        obj_names = objectives.columns.tolist()
    else:
        obj_array = np.asarray(objectives)
        obj_names = [f"f{i + 1}" for i in range(obj_array.shape[1])]

    n_points = len(obj_array)
    all_indices = np.arange(n_points)
    filtered_indices = np.setdiff1d(all_indices, selected_indices)

    return {
        "all_points": obj_array,
        "selected_points": obj_array[selected_indices],
        "filtered_points": obj_array[filtered_indices],
        "selected_indices": selected_indices,
        "filtered_indices": filtered_indices,
        "obj_names": obj_names,
        "reduction_ratio": len(selected_indices) / n_points,
        "n_original": n_points,
        "n_filtered": len(selected_indices),
    }

calculate_coverage_metric ¶

calculate_coverage_metric(objectives: ndarray | DataFrame, selected_indices: ndarray, normalize: bool = True) -> dict

Calculate coverage metrics for filtered Pareto front.

Measures how well the filtered set represents the original Pareto front.

Parameters:

Name	Type	Description	Default
`objectives`	`ndarray \| DataFrame`	Original objective array	required
`selected_indices`	`ndarray`	Indices selected by filter	required
`normalize`	`bool`	Whether to normalize objectives	`True`

Returns:

Type	Description
`dict`	Dictionary with coverage metrics

Source code in optiscope/analysis/smart_pareto.py

def calculate_coverage_metric(
    objectives: np.ndarray | pd.DataFrame,
    selected_indices: np.ndarray,
    normalize: bool = True,
) -> dict:
    """
    Calculate coverage metrics for filtered Pareto front.

    Measures how well the filtered set represents the original Pareto front.

    Args:
        objectives: Original objective array
        selected_indices: Indices selected by filter
        normalize: Whether to normalize objectives

    Returns:
        Dictionary with coverage metrics
    """
    if isinstance(objectives, pd.DataFrame):
        obj_array = objectives.values
    else:
        obj_array = np.asarray(objectives)

    if normalize:
        obj_min = obj_array.min(axis=0)
        obj_max = obj_array.max(axis=0)
        obj_range = obj_max - obj_min
        obj_range[obj_range < 1e-10] = 1.0
        obj_normalized = (obj_array - obj_min) / obj_range
    else:
        obj_normalized = obj_array

    selected_points = obj_normalized[selected_indices]

    # Calculate minimum distance from each original point to selected points
    distances = cdist(obj_normalized, selected_points, metric="euclidean")
    min_distances = distances.min(axis=1)

    # Coverage metrics
    max_distance = min_distances.max()
    avg_distance = min_distances.mean()
    std_distance = min_distances.std()

    # Uniformity: standard deviation of distances between consecutive selected points
    if len(selected_indices) > 1:
        selected_distances = cdist(selected_points, selected_points, metric="euclidean")
        np.fill_diagonal(selected_distances, np.inf)
        min_neighbor_distances = selected_distances.min(axis=1)
        uniformity = min_neighbor_distances.std()
    else:
        uniformity = 0.0

    return {
        "max_distance_to_selected": float(max_distance),
        "avg_distance_to_selected": float(avg_distance),
        "std_distance_to_selected": float(std_distance),
        "uniformity": float(uniformity),
        "coverage_ratio": 1.0 - (avg_distance / np.sqrt(obj_normalized.shape[1])),
    }