# -*- coding: utf-8 -*- # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Python functions which run only within a Jupyter or Colab notebook.""" import random import string import sys from typing import List, Optional, Tuple from vertexai.preview.evaluation import _base from vertexai.preview.evaluation import constants try: import pandas as pd except ImportError: pandas = None _MARKDOWN_H2 = "##" _MARKDOWN_H3 = "###" _DEFAULT_COLUMNS_TO_DISPLAY = [ constants.Dataset.MODEL_RESPONSE_COLUMN, constants.Dataset.BASELINE_MODEL_RESPONSE_COLUMN, constants.Dataset.PROMPT_COLUMN, constants.MetricResult.ROW_COUNT_KEY, ] _DEFAULT_RADAR_RANGE = (0, 5) def _get_ipython_shell_name() -> str: if "IPython" in sys.modules: from IPython import get_ipython return get_ipython().__class__.__name__ return "" def is_ipython_available() -> bool: return _get_ipython_shell_name() def _filter_df( df: pd.DataFrame, substrings: Optional[List[str]] = None ) -> pd.DataFrame: """Filters a DataFrame to include only columns containing the given substrings.""" if substrings is None: return df return df.copy().filter( [ column_name for column_name in df.columns if any(substring in column_name for substring in substrings) ] ) def display_eval_result( *, eval_result: _base.EvalResult, title: Optional[str] = None, metrics: Optional[List[str]] = None, ) -> None: """Displays evaluation results in a notebook using IPython.display. Args: eval_result: An object containing evaluation results with `summary_metrics` and `metrics_table` attributes. title: A string title to display above the results. metrics: A list of metric name substrings to filter displayed columns. If provided, only metrics whose names contain any of these strings will be displayed. """ if not is_ipython_available(): return # pylint: disable=g-import-not-at-top, g-importing-member from IPython.display import display from IPython.display import Markdown summary_metrics, metrics_table = ( eval_result.summary_metrics, eval_result.metrics_table, ) summary_metrics_df = pd.DataFrame.from_dict(summary_metrics, orient="index").T if metrics: columns_to_keep = metrics + _DEFAULT_COLUMNS_TO_DISPLAY summary_metrics_df = _filter_df(summary_metrics_df, columns_to_keep) metrics_table = _filter_df(metrics_table, columns_to_keep) # Display the title in Markdown. if title: display(Markdown(f"{_MARKDOWN_H2} {title}")) # Display the summary metrics. display(Markdown(f"{_MARKDOWN_H3} Summary Metrics")) display(summary_metrics_df) # Display the metrics table. display(Markdown(f"{_MARKDOWN_H3} Row-based Metrics")) display(metrics_table) def display_explanations( *, eval_result: _base.EvalResult, num: int = 1, metrics: Optional[List[str]] = None, ) -> None: """Displays the explanations in a notebook using IPython.display. Args: eval_result: An object containing evaluation results. It is expected to have attributes `summary_metrics` and `metrics_table`. num: The number of row samples to display. Defaults to 1. If the number of rows is less than `num`, all rows will be displayed. metrics: A list of metric name substrings to filter displayed columns. If provided, only metrics whose names contain any of these strings will be displayed. """ if not is_ipython_available(): return # pylint: disable=g-import-not-at-top, g-importing-member from IPython.display import display from IPython.display import HTML style = "white-space: pre-wrap; width: 1500px; overflow-x: auto;" metrics_table = eval_result.metrics_table if num < 1: raise ValueError("Num must be greater than 0.") num = min(num, len(metrics_table)) df = metrics_table.sample(n=num) if metrics: columns_to_keep = metrics + _DEFAULT_COLUMNS_TO_DISPLAY df = _filter_df(df, columns_to_keep) for _, row in df.iterrows(): for col in df.columns: display(HTML(f"