import requests from bs4 import BeautifulSoup import re from tabulate import tabulate def count_words(url): # Send a GET request to the URL response = requests.get(url) # Check if the request was successful if response.status_code == 200: # Extract the HTML content from the response html = response.text # Use BeautifulSoup to parse the HTML soup = BeautifulSoup(html, 'html.parser') # Remove script and style tags for script in soup(["script", "style"]): script.extract() # Get the text content from the remaining HTML text = soup.get_text() # Remove multiple whitespaces and newlines text = re.sub('\s+', ' ', text) # Split the text into words words = text.split() # Count the number of words word_count = len(words) return word_count else: # Request was not successful print(f"Failed to retrieve content from {url}. Error code: {response.status_code}") return 0 # List of URLs to process urls = [ '', ] # Table headers headers = ['URL', 'Word Count'] # Table rows rows = [] # Process each URL and get word count for url in urls: word_count = count_words(url) rows.append([url, word_count]) # Print the table print(tabulate(rows, headers=headers, tablefmt='grid'))