简单爬虫例子1

import requests
from bs4 import BeautifulSoup


def fetch_and_parse(url):
# 发送HTTP请求到指定URL
response = requests.get(url)

# 检查请求是否成功
if response.status_code == 200:
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(response.text, 'html.parser')

# 查找所有的class为"url-card"的div元素
url_cards = soup.find_all('div', {'class': 'url-card'})

for url_card in url_cards:
# 提取链接的href属性值
link = url_card.find('a')['href']
# 提取图片src属性值
img_src = url_card.find('img')['data-src']
# 提取强标签文本
title = url_card.find('strong').get_text()
# 提取段落文本
description = url_card.find('p').get_text()

print("链接:", link)
print("图片地址:", img_src)
print("标题:", title)
print("描述:", description)
print("-" * 50) # 打印分隔线以区分不同卡片的信息
else:
print(f"无法访问{url},状态码:{response.status_code}")


# 示例:将你想要爬取的网页URL替换下面的'YOUR_TARGET_URL'
fetch_and_parse('https://xxx.com/ai-chatbots/')


相关推荐

  • 生成图片

    from PIL import Image, ImageColor, ImageDraw, ImageFont, ImageFilterdef create_image_with_text(size, color, text, font_path, font_size, text_color, shadow_color, output_path): """ Create a new image of specified size and color with centered text that has a border and shadow. :param size: A tuple con

  • 获取指定目录下的所有图片信息

    1 获取指定目录下的所有图片信息// 获取指定目录下的所有图片信息 public function getImagesInfo($directory) { $images = []; // 创建递归目录迭代器 $iterator = new \RecursiveIteratorIterator( new \RecursiveDirectoryIterator($directory, \RecursiveDirectoryIterator::SKIP_DOTS), \RecursiveIteratorIterator::LEAVES_ONLY ); // 遍历目录中的每个文件 foreach (

  • Thinkphp各版本的PHP要求

    ThinkPHP 8.0:运行环境要求PHP8.0+,兼容PHP8.3ThinkPHP 6.1:运行环境要求PHP7.2+,兼容PHP8.1ThinkPHP 6.0:运行环境要求PHP7.2+,兼容PHP8.1ThinkPHP 5.1:运行环境要求PHP5.6+,兼容PHP8.0ThinkPHP 5.0:运行环境要求PHP5.4+,兼容PHP7.3