简单爬虫例子1

简单爬虫例子1
import requests
from bs4 import BeautifulSoup


def fetch_and_parse(url):
    # 发送HTTP请求到指定URL
    response = requests.get(url)

    # 检查请求是否成功
    if response.status_code == 200:
        # 使用BeautifulSoup解析HTML内容
        soup = BeautifulSoup(response.text, 'html.parser')

        # 查找所有的class为"url-card"的div元素
        url_cards = soup.find_all('div', {'class': 'url-card'})

        for url_card in url_cards:
            # 提取链接的href属性值
            link = url_card.find('a')['href']
            # 提取图片src属性值
            img_src = url_card.find('img')['data-src']
            # 提取强标签文本
            title = url_card.find('strong').get_text()
            # 提取段落文本
            description = url_card.find('p').get_text()

            print("链接:", link)
            print("图片地址:", img_src)
            print("标题:", title)
            print("描述:", description)
            print("-" * 50)  # 打印分隔线以区分不同卡片的信息
    else:
        print(f"无法访问{url}，状态码：{response.status_code}")


# 示例：将你想要爬取的网页URL替换下面的'YOUR_TARGET_URL'
fetch_and_parse('https://xxx.com/ai-chatbots/')
智享笔记

最近发表

热门文章