split word and url

2024-09-18 19:16:54 +08:00 · 2024-09-18 19:16:54 +08:00 · e37ce389aa
parent 5556bef66a
commit e37ce389aa
1 changed files with 14 additions and 14 deletions
--- a/http-get.py
+++ b/http-get.py
@ -1,28 +1,28 @@
 import requests
-from bs4 import BeautifulSoup
+from lxml import html
-
+word = 'vocabulary'
 # Define the URL
-url = 'https://dictionary.cambridge.org/dictionary/essential-american-english/wrist'
+url = 'https://dictionary.cambridge.org/dictionary/essential-american-english/' + word
 headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 }
 response = requests.get(url, headers=headers)
 # Check if the request was successful
 if response.status_code == 200:
-    # Parse the content with BeautifulSoup
+    # Parse the content with lxml
-    soup = BeautifulSoup(response.content, 'html.parser')
+    tree = html.fromstring(response.content)
-    # Find the meta description tag
+    # Use XPath to find the element
-    meta_description = soup.find('meta', attrs={'name': 'description'})
+    xpath_expression = '/html/body/div[2]/div/div[1]/div[2]/article/div[2]/div[2]/div/span/div/div[3]'
    elements = tree.xpath(xpath_expression)
-    if meta_description:
+    # Check if the element was found and print the text content
-        # Extract the content attribute
+    if elements:
-        description_content = meta_description.get('content')
+        for element in elements:
-        print(description_content)
+            print(element.text_content().strip())
    else:
-        print("Meta description tag not found.")
+        print("Element not found.")
 else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")