Ответ:
import requests
from bs4 import BeautifulSoup
import json
def parse_gazeta_ru():
url = "https://www.gazeta.ru/"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
news_list = []
for news_item in soup.find_all('div', class_='b-news__item'):
title = news_item.find('span', class_='b-news__item__title').text.strip()
summary = news_item.find('span', class_='b-news__item__summary').text.strip()
date = news_item.find('span', class_='b-news__item__time-date').text.strip()
image_url = news_item.find('img')['src']
page_url = news_item.find('a')['href']
news_data = {
'title': title,
'summary': summary,
'date': date,
'image_url': image_url,
'page_url': page_url
}
news_list.append(news_data)
return news_list
if __name__ == "__main__":
news_data = parse_gazeta_ru()
# Сохраняем данные в JSON файл
with open('gazeta_ru_news.json', 'w', encoding='utf-8') as json_file:
json.dump(news_data, json_file, ensure_ascii=False, indent=4)
Объяснение:
Copyright © 2024 SCHOLAR.TIPS - All rights reserved.
Answers & Comments
Ответ:
import requests
from bs4 import BeautifulSoup
import json
def parse_gazeta_ru():
url = "https://www.gazeta.ru/"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
news_list = []
for news_item in soup.find_all('div', class_='b-news__item'):
title = news_item.find('span', class_='b-news__item__title').text.strip()
summary = news_item.find('span', class_='b-news__item__summary').text.strip()
date = news_item.find('span', class_='b-news__item__time-date').text.strip()
image_url = news_item.find('img')['src']
page_url = news_item.find('a')['href']
news_data = {
'title': title,
'summary': summary,
'date': date,
'image_url': image_url,
'page_url': page_url
}
news_list.append(news_data)
return news_list
if __name__ == "__main__":
news_data = parse_gazeta_ru()
# Сохраняем данные в JSON файл
with open('gazeta_ru_news.json', 'w', encoding='utf-8') as json_file:
json.dump(news_data, json_file, ensure_ascii=False, indent=4)
Объяснение: