# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tool for web browse.""" import requests def load_web_page(url: str) -> str: """Fetches the content in the url and returns the text in it. Args: url (str): The url to browse. Returns: str: The text content of the url. """ from bs4 import BeautifulSoup response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.content, 'lxml') text = soup.get_text(separator='\n', strip=True) else: text = f'Failed to fetch url: {url}' # Split the text into lines, filtering out very short lines # (e.g., single words or short subtitles) return '\n'.join(line for line in text.splitlines() if len(line.split()) > 3)