split word and url
This commit is contained in:
parent
5556bef66a
commit
e37ce389aa
26
http-get.py
26
http-get.py
|
@ -1,28 +1,28 @@
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from lxml import html
|
||||||
|
word = 'vocabulary'
|
||||||
# Define the URL
|
# Define the URL
|
||||||
url = 'https://dictionary.cambridge.org/dictionary/essential-american-english/wrist'
|
url = 'https://dictionary.cambridge.org/dictionary/essential-american-english/' + word
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||||
}
|
}
|
||||||
response = requests.get(url, headers=headers)
|
response = requests.get(url, headers=headers)
|
||||||
|
|
||||||
|
|
||||||
# Check if the request was successful
|
# Check if the request was successful
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
# Parse the content with BeautifulSoup
|
# Parse the content with lxml
|
||||||
soup = BeautifulSoup(response.content, 'html.parser')
|
tree = html.fromstring(response.content)
|
||||||
|
|
||||||
# Find the meta description tag
|
# Use XPath to find the element
|
||||||
meta_description = soup.find('meta', attrs={'name': 'description'})
|
xpath_expression = '/html/body/div[2]/div/div[1]/div[2]/article/div[2]/div[2]/div/span/div/div[3]'
|
||||||
|
elements = tree.xpath(xpath_expression)
|
||||||
|
|
||||||
if meta_description:
|
# Check if the element was found and print the text content
|
||||||
# Extract the content attribute
|
if elements:
|
||||||
description_content = meta_description.get('content')
|
for element in elements:
|
||||||
print(description_content)
|
print(element.text_content().strip())
|
||||||
else:
|
else:
|
||||||
print("Meta description tag not found.")
|
print("Element not found.")
|
||||||
else:
|
else:
|
||||||
print(f"Failed to retrieve the page. Status code: {response.status_code}")
|
print(f"Failed to retrieve the page. Status code: {response.status_code}")
|
||||||
|
|
Loading…
Reference in New Issue