split word and url

This commit is contained in:
Rubbit 2024-09-18 19:16:54 +08:00
parent 5556bef66a
commit e37ce389aa
1 changed files with 14 additions and 14 deletions

View File

@ -1,28 +1,28 @@
import requests import requests
from bs4 import BeautifulSoup from lxml import html
word = 'vocabulary'
# Define the URL # Define the URL
url = 'https://dictionary.cambridge.org/dictionary/essential-american-english/wrist' url = 'https://dictionary.cambridge.org/dictionary/essential-american-english/' + word
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
} }
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
# Check if the request was successful # Check if the request was successful
if response.status_code == 200: if response.status_code == 200:
# Parse the content with BeautifulSoup # Parse the content with lxml
soup = BeautifulSoup(response.content, 'html.parser') tree = html.fromstring(response.content)
# Find the meta description tag # Use XPath to find the element
meta_description = soup.find('meta', attrs={'name': 'description'}) xpath_expression = '/html/body/div[2]/div/div[1]/div[2]/article/div[2]/div[2]/div/span/div/div[3]'
elements = tree.xpath(xpath_expression)
if meta_description:
# Extract the content attribute # Check if the element was found and print the text content
description_content = meta_description.get('content') if elements:
print(description_content) for element in elements:
print(element.text_content().strip())
else: else:
print("Meta description tag not found.") print("Element not found.")
else: else:
print(f"Failed to retrieve the page. Status code: {response.status_code}") print(f"Failed to retrieve the page. Status code: {response.status_code}")