diff --git a/http-get.py b/http-get.py index a8331e0..fd51db2 100644 --- a/http-get.py +++ b/http-get.py @@ -1,28 +1,28 @@ import requests -from bs4 import BeautifulSoup - +from lxml import html +word = 'vocabulary' # Define the URL -url = 'https://dictionary.cambridge.org/dictionary/essential-american-english/wrist' +url = 'https://dictionary.cambridge.org/dictionary/essential-american-english/' + word headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers) - # Check if the request was successful if response.status_code == 200: - # Parse the content with BeautifulSoup - soup = BeautifulSoup(response.content, 'html.parser') + # Parse the content with lxml + tree = html.fromstring(response.content) - # Find the meta description tag - meta_description = soup.find('meta', attrs={'name': 'description'}) - - if meta_description: - # Extract the content attribute - description_content = meta_description.get('content') - print(description_content) + # Use XPath to find the element + xpath_expression = '/html/body/div[2]/div/div[1]/div[2]/article/div[2]/div[2]/div/span/div/div[3]' + elements = tree.xpath(xpath_expression) + + # Check if the element was found and print the text content + if elements: + for element in elements: + print(element.text_content().strip()) else: - print("Meta description tag not found.") + print("Element not found.") else: print(f"Failed to retrieve the page. Status code: {response.status_code}")