「python学习」使用python爬取壁纸

代码实现

话不多说,直接上代码,拿走不谢!

手机壁纸爬取(国外)

import requestsimport jsonimport timedef test():    list = []    for i in range(1, 10):        if i == 1:            url = "https://unsplash.com/napi/landing_pages/wallpapers/iphone?page=1&per_page=20"        else:            url = f"https://unsplash.com/napi/landing_pages/wallpapers/iphone?page={i}&per_page=20"        list.append(url)    # 模拟浏览器    headers = {        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"}    i = 1    for uri in list:        # 请求数据        request = requests.get(uri, headers=headers, verify=False)        # 设置charset格式        resp = request.content.decode('utf-8')        json_res = json.loads(resp)        photos = json_res["photos"]        j = 1        for photo in photos:            urls = photo["urls"]            img_url = urls["regular"]            img_content = requests.get(img_url)            # 获取图片内容            img_byte = img_content.content            path = f"D:/project_workspace/crawling_data/unsplash/{i}_{j}.jpg"            with open(path, "wb") as f:                f.write(img_byte)                time.sleep(0.1)            print(f"第{i}页第{j}张图片你下载成功")            j = j + 1        i = i + 1    return "success"if __name__ == '__main__':    test()

图片下载如下

电脑壁纸爬取(国内)

    base_url = "https://pic.netbian.com"    list = []    for i in range(1, 100):        if i == 1:            url = "https://pic.netbian.com/4kmeinv/index.html"        else:            url = f"https://pic.netbian.com/4kmeinv/index_{i}.html"        list.append(url)    # 模拟浏览器    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"}    i = 1    for uri in list:        # 请求数据        request = requests.get(uri, headers=headers, verify=False)        # 设置charset格式        resp = request.content.decode('gbk')        # 通过BeautifulSoup进行数据解析        soup = BeautifulSoup(resp, "lxml")        divs = soup.find_all("div", class_="slist")        j = 1        for imgs in divs:            img = imgs.find_all("img")            for im in img:                img_src = im["src"]                img_uri = base_url + img_src                img_content = requests.get(img_uri)                # 获取图片内容                img_byte = img_content.content                path = f"D:/project_workspace/crawling_data/{i}_{j}.jpg"                with open(path, "wb") as f:                    f.write(img_byte)                    time.sleep(0.1)                print(f"第{i}页第{j}张图片你下载成功")                j = j + 1        i = i + 1

代码很简单,由于国外网站,网速较慢。这里附上壁纸。

手机壁纸分享

壁纸太多,随机附上数张,可自己使用程序下载。

电脑壁纸分享

发表评论
留言与评论(共有 0 条评论) “”
   
验证码:

相关文章

推荐文章