вам нужно использовать
find('div', id="page-shibor-history")
а не
find('div', id_="page-shibor-history")
вот рабочий код можете его проанализировать.
import requests
from bs4 import BeautifulSoup
import re
URL = 'https://www.shibor.org/shibor/dataservicesen/'
HEADERS = {'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"}
def get_html(url):
response = requests.get(url, headers=HEADERS)
if response.status_code == 200:
return response
else:
print(response.status_code)
def get_content_iframe(html):
soup = BeautifulSoup(html.text, 'html.parser')
pattern = r'(src="/[A-za-z/-.]+shibor-def-down-iframe-e.html")'
src = re.search(pattern, str(soup))
new_url = 'https://www.shibor.org' + src[0].split('"')[1]
response = get_html(new_url)
return response
def parser(url):
response = get_html(url)
if response.status_code == 200:
return get_content_iframe(response)
else:
print(response.status_code)
def main():
html = parser(URL)
soup = BeautifulSoup(html.text, 'html.parser')
qw = soup.find('div', id="page-shibor-history")
print(qw)
if name == "main":
main()
Так как сайт использует js для загрузги сайта будем использовать playwright
Устанавливаем playwright
pip install --upgrade pip
pip install playwright
playwright install
Вот обнов код
import requests
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import re
URL = 'https://www.shibor.org/shibor/dataservicesen/'
HEADERS = {'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"}
def get_html(url):
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.set_default_timeout(0)
page.goto(url)
page.wait_for_selector("div[id='shibor-his-cn']")
return page.content()
def get_content_iframe(html):
soup = BeautifulSoup(html.text, 'html.parser')
pattern = r'(src="/[A-za-z/-.]+shibor-def-down-iframe-e.html")'
src = re.search(pattern, str(soup))
new_url = 'https://www.shibor.org' + src[0].split('"')[1]
response = get_html(new_url)
return response
def parser(url):
response = requests.get(url)
if response.status_code == 200:
return get_content_iframe(response)
else:
print(response.status_code)
def main():
html = parser(URL)
soup = BeautifulSoup(html, 'html.parser')
qw = soup.find('div', id="page-shibor-history")
print(qw)
if name == "main":
main()