Witajcie, próbuję pobrać zawartość strony tym skryptem:
import logging
import re
from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.common.exceptions import TimeoutException, NoSuchElementException
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
)
url = f"https://announcements.bybit.com/?page=1&category="
with webdriver.Chrome(options=chrome_options) as driver:
driver.get(url)
time.sleep(1)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
element_present = ec.presence_of_element_located(
(By.CLASS_NAME, "article-item")
)
WebDriverWait(driver, 10).until(element_present)
except TimeoutException as te:
print(f"Timeout: {te}")
logging.error(f"Timeout: {te}")
except NoSuchElementException as nse:
print(f"Element not found: {nse}")
logging.error(f"Element not found: {nse}")
except Exception as e:
print(f"Other error occurred: {e}")
logging.error(f"Other error occurred: {e}")
page_source = driver.page_source
soup = BeautifulSoup(page_source, "html.parser")
class_pattern = re.compile(r"article-item")
print(soup)
I dopóki mam parametr chrome_options.add_argument("--headless") to zwraca mi
<html><head></head><body></body></html>dopiero po wywaleniu tego parametru otwiera się normalna przeglądarka i reszta skryptu działa poprawnie. Podpowiecie coś?