# -*- coding: utf-8 -*- # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Python functions which run only within a Jupyter or Colab notebook.""" import random import string import sys from typing import List, Optional, Tuple from vertexai.preview.evaluation import _base from vertexai.preview.evaluation import constants try: import pandas as pd except ImportError: pandas = None _MARKDOWN_H2 = "##" _MARKDOWN_H3 = "###" _DEFAULT_COLUMNS_TO_DISPLAY = [ constants.Dataset.MODEL_RESPONSE_COLUMN, constants.Dataset.BASELINE_MODEL_RESPONSE_COLUMN, constants.Dataset.PROMPT_COLUMN, constants.MetricResult.ROW_COUNT_KEY, ] _DEFAULT_RADAR_RANGE = (0, 5) def _get_ipython_shell_name() -> str: if "IPython" in sys.modules: from IPython import get_ipython return get_ipython().__class__.__name__ return "" def is_ipython_available() -> bool: return _get_ipython_shell_name() def _filter_df( df: pd.DataFrame, substrings: Optional[List[str]] = None ) -> pd.DataFrame: """Filters a DataFrame to include only columns containing the given substrings.""" if substrings is None: return df return df.copy().filter( [ column_name for column_name in df.columns if any(substring in column_name for substring in substrings) ] ) def display_eval_result( *, eval_result: _base.EvalResult, title: Optional[str] = None, metrics: Optional[List[str]] = None, ) -> None: """Displays evaluation results in a notebook using IPython.display. Args: eval_result: An object containing evaluation results with `summary_metrics` and `metrics_table` attributes. title: A string title to display above the results. metrics: A list of metric name substrings to filter displayed columns. If provided, only metrics whose names contain any of these strings will be displayed. """ if not is_ipython_available(): return # pylint: disable=g-import-not-at-top, g-importing-member from IPython.display import display from IPython.display import Markdown summary_metrics, metrics_table = ( eval_result.summary_metrics, eval_result.metrics_table, ) summary_metrics_df = pd.DataFrame.from_dict(summary_metrics, orient="index").T if metrics: columns_to_keep = metrics + _DEFAULT_COLUMNS_TO_DISPLAY summary_metrics_df = _filter_df(summary_metrics_df, columns_to_keep) metrics_table = _filter_df(metrics_table, columns_to_keep) # Display the title in Markdown. if title: display(Markdown(f"{_MARKDOWN_H2} {title}")) # Display the summary metrics. display(Markdown(f"{_MARKDOWN_H3} Summary Metrics")) display(summary_metrics_df) # Display the metrics table. display(Markdown(f"{_MARKDOWN_H3} Row-based Metrics")) display(metrics_table) def display_explanations( *, eval_result: _base.EvalResult, num: int = 1, metrics: Optional[List[str]] = None, ) -> None: """Displays the explanations in a notebook using IPython.display. Args: eval_result: An object containing evaluation results. It is expected to have attributes `summary_metrics` and `metrics_table`. num: The number of row samples to display. Defaults to 1. If the number of rows is less than `num`, all rows will be displayed. metrics: A list of metric name substrings to filter displayed columns. If provided, only metrics whose names contain any of these strings will be displayed. """ if not is_ipython_available(): return # pylint: disable=g-import-not-at-top, g-importing-member from IPython.display import display from IPython.display import HTML style = "white-space: pre-wrap; width: 1500px; overflow-x: auto;" metrics_table = eval_result.metrics_table if num < 1: raise ValueError("Num must be greater than 0.") num = min(num, len(metrics_table)) df = metrics_table.sample(n=num) if metrics: columns_to_keep = metrics + _DEFAULT_COLUMNS_TO_DISPLAY df = _filter_df(df, columns_to_keep) for _, row in df.iterrows(): for col in df.columns: display(HTML(f"

{col}:

{row[col]}
")) display(HTML("
")) def display_radar_plot( eval_results_with_title: List[Tuple[str, _base.EvalResult]], metrics: List[str], radar_range: Tuple[float, float] = _DEFAULT_RADAR_RANGE, ) -> None: """Plots a radar plot comparing evaluation results. Args: eval_results_with_title: List of (title, eval_result) tuples. metrics: A list of metrics whose mean values will be plotted. radar_range: Range of the radar plot axes. """ # pylint: disable=g-import-not-at-top try: import plotly.graph_objects as go except ImportError as exc: raise ImportError( '`plotly` is not installed. Please install using "!pip install plotly"' ) from exc fig = go.Figure() for title, eval_result in eval_results_with_title: summary_metrics = eval_result.summary_metrics if metrics: summary_metrics = { key.replace("/mean", ""): summary_metrics[key] for key in summary_metrics if any(selected_metric + "/mean" in key for selected_metric in metrics) } fig.add_trace( go.Scatterpolar( r=list(summary_metrics.values()), theta=list(summary_metrics.keys()), fill="toself", name=title, ) ) fig.update_layout( polar=dict(radialaxis=dict(visible=True, range=radar_range)), showlegend=True, ) fig.show() def display_bar_plot( eval_results_with_title: List[Tuple[str, _base.EvalResult]], metrics: List[str], ) -> None: """Plots a bar plot comparing evaluation results. Args: eval_results_with_title: List of (title, eval_result) tuples. metrics: A list of metrics whose mean values will be plotted. """ # pylint: disable=g-import-not-at-top try: import plotly.graph_objects as go except ImportError as exc: raise ImportError( '`plotly` is not installed. Please install using "!pip install plotly"' ) from exc data = [] for title, eval_result in eval_results_with_title: summary_metrics = eval_result.summary_metrics mean_summary_metrics = [f"{metric}/mean" for metric in metrics] updated_summary_metrics = [] if metrics: for k, v in summary_metrics.items(): if k in mean_summary_metrics: updated_summary_metrics.append((k, v)) summary_metrics = dict(updated_summary_metrics) data.append( go.Bar( x=list(summary_metrics.keys()), y=list(summary_metrics.values()), name=title, ) ) fig = go.Figure(data=data) fig.update_layout(barmode="group", showlegend=True) fig.show() def generate_uuid(length: int = 8) -> str: """Generates a uuid of a specified length (default=8).""" return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))