第一周课后作业内容
- 针对Imagenet数据(s3://ai-cultivate/1percent_ImageNet.txt),进行基本的数据统计(统计框/图片的平均,最大,最小宽高),数据预处理(对数据做旋转增强,randomcrop),并做可视化分析对比数据处理前后的变化
- 针对外部同学,不指定图片来源,可以使用任意来源的100张图片,进行基本的数据统计(统计框/图片的平均,最大,最小宽高),数据预处理(对数据做旋转增强,randomcrop),并做可视化分析对比数据处理前后的变化
结果:w_max: 4368.000000, w_min: 46.000000, w_avg: 471.499180 ,h_max: 5065.000000, h_min: 21.000000, h_avg: 405.450550
图片:
import cv2
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmenters.flip import Fliplr
import nori2 as nori
import numpy as np
import refile
import boto3
from meghair.utils import io
from meghair.utils.imgproc import imdecode
fetcher = nori.Fetcher()
ia.seed(1)
H,W = 256,128
NUM = 6
seq = iaa.Sequential([
iaa.Crop(px=(0, 16)), # crop操作,随机在距离边缘(0-16)像素中选择crop范围
iaa.OneOf([
iaa.Affine(translate_percent={"x":(-0.2,0.2),"y":(-0.2,0.2)}),
iaa.Affine(rotate=(-45,45)),
iaa.Affine(shear=(-16,16))
]),
iaa.Resize({"height":H, "width":W})
# iaa.GaussianBlur((0,1.0)) # 0均值1方差进行高斯模糊
],random_order=True)
datas = refile.smart_open("s3://ai-cultivate/1percent_ImageNet.txt")
#print(datas)
h_avg = 0
h_max = 0
h_min = 10000
w_avg = 0
w_max = 0
w_min = 10000
for j, item in enumerate(datas):
#print(item)
img = imdecode(fetcher.get(item.split()[0]))[...,:3]
h_t,w_t,g = img.shape
if h_t > h_max:
h_max = h_t
elif h_t < h_min:
h_min = h_t
if w_t > w_max:
w_max = w_t
elif w_t < w_min:
w_min = w_t
h_avg = (j*h_avg+h_t)/(j+1)
w_avg = (j*w_avg+w_t)/(j+1)
if j>= NUM:
continue;
#cv2.imwrite(str(j)+".jpg",img)
images = np.array(
[img]*NUM,
dtype = np.uint8
)
write_img = np.zeros((H, (W+10)*(NUM+1),3),dtype=np.uint8)
images_ang = seq(images=images)
imgHW=cv2.resize(img,(W,H))
write_img[:,0:W:] = imgHW
for i,imgt in enumerate(images_ang):
write_img[:,(i+1)*(W+10):(i+1)*(W+10)+W:] = imgt
if j==0:
image4 = write_img
else:
image4 = np.vstack([image4,write_img])
cv2.imwrite("final.jpg", image4)
print("w_max: %f, w_min: %f, w_avg: %f \nh_max: %f, h_min: %f, h_avg: %f" %(w_max,w_min,w_avg,h_max,h_min,h_avg))
statistics result: {‘avg_height’: 405.4505503083288, ‘max_height’: 5065, ‘min_height’: 21, ‘avg_width’: 471.49918039185076, ‘max_width’: 4368, ‘min_width’: 46, ‘img_size’: 12811}
import boto3
import cv2
import nori2 as nori
import numpy as np
import collections
from imgaug import augmenters as iaa
from meghair.utils.imgproc import imdecode
from refile import smart_open
s3_client = boto3.client("s3", endpoint_url="http://oss.i.brainpp.cn")
bucket = "ai-cultivate"
key = "1percent_ImageNet.txt"
def read_img(bucket, key):
resp = s3_client.get_object(Bucket=bucket, Key=key)
res = resp["Body"].read().decode("utf8")
data = res.split("\n")[:-1]
print("data len=", len(data))
print(data[1])
nori_ids = list(map(lambda x: x.split("\t")[0], data))
# 读取10张图片
# 从倒数第-20张到最后一张,并跳2取数据
#nori_ids = list(map(lambda x: x.split("\t")[0], data))[-10:-1:2]
nori_ids = list(map(lambda x: x.split("\t")[0], data))
fetcher = nori.Fetcher()
img_list = list(map(lambda x: imdecode(fetcher.get(x)), nori_ids))
return img_list
def statistics(images):
imgs_list = []
for i, img in enumerate(images):
imgs_list.append(
{
"num": "img_num_{}".format(i),
"height": img.shape[0],
"width": img.shape[1],
}
)
height_list = [img_size["height"] for img_size in imgs_list]
width_list = [img_size["width"] for img_size in imgs_list]
avg_height = np.mean(height_list)
max_height = max(height_list)
min_height = min(height_list)
avg_width = np.mean(width_list)
max_width = max(width_list)
min_width = min(width_list)
info = collections.OrderedDict()
img_info = {
"avg_height": avg_height,
"max_height": max_height,
"min_height": min_height,
"avg_width": avg_width,
"max_width": max_width,
"min_width": min_width,
"img_size": len(imgs_list),
}
print("statistics result: {}".format(img_info))
return img_info
def enhanceimg(images):
images = images[-10::2]
H, W = 128, 128
NUM = 6
seq = iaa.Sequential(
[
iaa.Fliplr(0.5), # 图像的50%概率水平翻转
iaa.Crop(percent=(0, 0.05)), # 四边以0 - 0.05之间的比例像素剪裁
iaa.Affine(
scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # 图像缩放
translate_px={"x": (-16, 16), "y": (-16, 16)}, # 像素随机平移
rotate=(-45, 45), # 旋转-45度或45度
),
iaa.GaussianBlur(sigma=(0, 2.0)), # 高斯模糊
iaa.Resize({"height": H, "width": W}),
],
random_order=True,
)
res = np.zeros(shape=((H + 10) * len(images), (W + 10) * NUM, 3), dtype=np.uint8)
for i, img in enumerate(images):
img_array = np.array([img] * NUM, dtype=np.uint8)
write_img = np.zeros(shape=(H, (W + 10) * NUM, 3), dtype=np.uint8)
images_aug = seq.augment_images(images=img_array)
for j, item in enumerate(images_aug):
write_img[:, j * (W + 10) : j * (W + 10) + W, :] = item
res[i * (H + 10) : i * (H + 10) + H, :, :] = write_img
cv2.imwrite("result.jpg", res)
if __name__ == "__main__":
images = read_img(bucket, key)
statistics(images)
enhanceimg(images)
Code:
import cv2
import imgaug as ia
import imgaug.augmenters as iaa
import nori2 as nori
import numpy as np
import numpy as np
from meghair.utils import io
from meghair.utils.imgproc import imdecode
fetcher = nori.Fetcher()
ia.seed(1)
H, W = 256, 128
NUM = 10
ARRAY_H =
ARRAY_W =
seq = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Crop(px=(0,16)),
iaa.Resize({“height”:H, “width”: W})],random_order=True)
def read_data(dir_str):
‘’’
此函数读取txt文件中的数据
数据内容:科学计数法保存的多行两列数据
输入:txt文件的路径
输出:小数格式的数组,行列与txt文件中相同
‘’’
data_temp=
with open(dir_str) as fdata:
while True:
line=fdata.readline()
if not line:
break
line = line.strip(’\n’)
data_temp.append(line)
return np.array(data_temp)
#datas = io.load(“s3://ai-cultivate/task1/pics.pkl”)
datas = read_data("/home/liyitu/ImageNet.txt")
for j, item in enumerate(datas):
img = imdecode(fetcher.get(item))[…, :3]
ARRAY_H.append(img.shape[0])
ARRAY_W.append(img.shape[1])
images = np.array(
[img] * NUM, dtype=np.uint8
)
write_img = np.zeros((H, (W+10)*NUM, 3), dtype = np.uint8)
images_aug = seq(images=images)
for i, img in enumerate(images_aug):
write_img[:, i*(W+10):i*(W+10)+W, :] = img
cv2.imwrite("final_%d.jpg" % j, write_img)
if j > 10:
break;
print(“max height %d, min height %d, average height %d” % (max(ARRAY_H) ,min(ARRAY_H), sum(ARRAY_H)/len(ARRAY_H)))
print(“max width %d, min width %d, average width %d” % (max(ARRAY_W), min(ARRAY_W),sum(ARRAY_W)/len(ARRAY_W)))
code
import cv2
import numpy as np
import nori2 as nori
from meghair.utils.imgproc import imdecode
import imgaug as ia
import imgaug.augmenters as iaa
from PIL import Image
fetcher = nori.Fetcher()
seq = iaa.Sequential([
iaa.Crop(px=(20, 20)),
iaa.Affine(
rotate=(-45, 45)
)
], random_order=True)
COUNT = 3
with open(“1percent_ImageNet.txt”, “r”) as f:
lines = f.readlines()
shapes =
for idx, line in enumerate(lines[:20]):
img = imdecode(fetcher.get(line.split()[0]))[…, :3]
images = np.array([img] * COUNT, dtype=np.uint8)
images_aug = seq(images=images)
new_image = np.zeros((img.shape[0], img.shape[1] * COUNT, img.shape[2]), dtype=np.uint8)
for i, image in enumerate(images_aug):
Image.fromarray(image, “RGB”).save("%s.jpg" % i, “jpeg”)
new_image[:, i * img.shape[1]:(i + 1) * img.shape[1], :] = image
shapes.append(img.shape)
cv2.imwrite(“compose-%s.jpg” % idx, new_image)
shapes = np.asarray(shapes)
min_width = np.amin(shapes[np.arange(20), np.zeros((20),dtype=np.uint8)])
max_width = np.amax(shapes[np.arange(20), np.zeros((20),dtype=np.uint8)])
mean_width = np.mean(shapes[np.arange(20), np.zeros((20),dtype=np.uint8)])
min_height = np.amin(shapes[np.arange(20), np.ones((20),dtype=np.uint8)])
max_height = np.amax(shapes[np.arange(20), np.ones((20),dtype=np.uint8)])
mean_height = np.mean(shapes[np.arange(20), np.ones((20),dtype=np.uint8)])
可视化结果
统计结果
avg_height: 401.25, max_height: 480, min_height: 375, avg_width: 535.0, max_width: 640, min_width: 500
图片增强
代码
import cv2
import nori2 as nori
import boto3
import imgaug as ia
from imgaug import augmenters as iaa
import numpy as np
from meghair.utils import io
from meghair.utils.imgproc import imdecode
client = boto3.client(‘s3’, endpoint_url=“http://oss.i.brainpp.cn”)
bucket, key = “ai-cultivate”, “1percent_ImageNet.txt”
def GetImageList(bucket, key):
resp = client.get_object(Bucket=bucket, Key=key)
data = resp[‘Body’].read().decode(‘utf-8’).split(’\n’)
nori_ids = list(map(lambda x:x.split(’\t’)[0], data))[1:5]
print(len(nori_ids), nori_ids[0])
fetcher = nori.Fetcher()
img_list = list(map(lambda x:imdecode(fetcher.get(x)), nori_ids))
print(type(img_list[0]))
return img_list
image_list = GetImageList(bucket, key)
def GetImageStatistics(img_list):
img_size_list =
for i, img in enumerate(img_list):
img_size_list.append({“img_num”: “img_{}”.format(i), “height”: img.shape[0], “width”: img.shape[1]})
height_list = [img_size["height"] for img_size in img_size_list]
width_list = [img_size["width"] for img_size in img_size_list]
avg_height = np.mean(height_list)
max_height = max(height_list)
min_height = min(height_list)
avg_width = np.mean(width_list)
max_width = max(width_list)
min_width = min(width_list)
return "avg_height: {}, max_height: {}, min_height: {}, avg_width: {}, max_width: {}, min_width: {}".format(avg_height, max_height, min_height, avg_width, max_width, min_width)
print(GetImageStatistics(image_list))
def EnhanceImgs(img_list):
H, W = 128, 128
NUM = 7
seq = iaa.Sequential([
iaa.Fliplr(0.5), # 左右翻转
iaa.Crop(percent=(0, 0.2)), # 剪裁
iaa.Affine(
scale={"x": (0.8, 1.2), "y": (0.6, 1.6)}, # 图像缩放
translate_px={"x": (-56, 45), "y": (-64, 74)}, # 平移
shear=(-16, 16), # 剪切变换
rotate=(-32, 58) # 旋转
),
iaa.GaussianBlur(sigma=(0, 2.0)), # 高斯模糊
iaa.Resize({"height": H, "width": W})
], random_order=True)
result = np.zeros(shape=((H + 10) * len(image_list), (W + 10) * NUM, 3), dtype=np.uint8)
for i, img in enumerate(img_list):
img_array = np.array([img] * NUM, dtype=np.uint8)
enhanced_img = np.zeros(shape=(H, (W + 10) * NUM, 3), dtype=np.uint8)
images_aug = seq.augment_images(images=img_array)
for j, item in enumerate(images_aug):
enhanced_img[:, j * (W + 10): j * (W + 10) + W, :] = item
# 结果写在一张图片里
result[i * (H + 10): i * (H + 10) + H, :, :] = enhanced_img
cv2.imwrite("result2.jpg", result)
EnhanceImgs(image_list)
import os
import cv2
import boto3
import imgaug.augmenters as iaa
import nori2 as nori
import numpy as np
from meghair.utils.imgproc import imdecode
def ReadImage():
images = []
try:
s3_client = boto3.client('s3', endpoint_url="http://oss.i.brainpp.cn")
context = s3_client.get_object(Bucket="ai-cultivate", Key="1percent_ImageNet.txt")['Body'].read().decode("utf8")
fetcher = nori.Fetcher()
for line in context.split('\n'):
noriId = line.split('\t')[0].strip()
if len(noriId) > 0:
print(noriId)
images.append(imdecode(fetcher.get(noriId)))
except Exception as e:
print("failed to read image from object storage,", e)
exit(-1)
return images
def Statistic(images):
heights = []
widths = []
for iter in images:
heights.append(iter.shape[0])
widths.append(iter.shape[1])
print("max height", max(heights))
print("min height", min(heights))
print("avg height", np.average(heights))
print("max width", max(widths))
print("min width", min(widths))
print("avg width", np.average(widths))
def Enhance(images, index, N=6, H=256, W=128):
seq = iaa.Sequential([
iaa.Affine(
rotate=(-30, 30), # 旋转
),
iaa.Crop(px=(0, 8)), # random crop
iaa.Resize({"height": H, "width": W})
], random_order=True)
Gap = 10
res = np.zeros(((H + Gap) * N, (W + Gap) * N, 3), dtype=np.int)
for i, iter in enumerate(images):
if i > index:
break
img_array = np.array([iter] * N, dtype=np.uint8)
write_img = np.zeros(shape=(H, (W + 10) * N, 3), dtype=np.uint8)
images_aug = seq.augment_images(images=img_array)
for j, item in enumerate(images_aug):
write_img[:, j * (W + Gap):j * (W + Gap) + W, :] = item
res[(H + Gap) * i:(H + Gap) * i + H, :, :] = write_img
cv2.imwrite(os.path.join(os.path.dirname(__file__), "result.jpg"), res)
if __name__ == "__main__":
images = ReadImage()
Statistic(images)
Enhance(images, 5)
max height 5065
min height 21
avg height 405.4505503083288
max width 4368
min width 46
avg width 471.49918039185076
第一周作业-田琨
引用
import cv2
import os
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
from statistics import mean
def image_augument(images):
ia.seed(1)
seq=iaa.Sequential([
iaa.Sometimes(0.5,iaa.Crop(percent=(0,0.2))),
iaa.Sometimes(0.75,iaa.Affine(rotate=(-45,45)))
],random_order=True)
image_aug=seq(images=images)
return image_aug
file_pathname=’./image’
img_high0=
img_wide0=
img_high_x=
img_wide_x=
for idx,filename in enumerate(os.listdir(file_pathname)): #listdir的参数是文件夹的路径
img = cv2.imread(file_pathname+’/’+filename)
img_x=image_augument(img)
img_high0.append(img.shape[0])
img_wide0.append(img.shape[1])
img_high_x.append(img_x.shape[0])
img_wide_x.append(img_x.shape[1])
print(‘原始图像平均高度=’,mean(img_high),‘原始图像平均宽度=’,mean(img_wide),‘原始图像高度最高=’,max(img_high),‘原始图像宽度最小=’,min(img_wide))
print(‘处理图像平均高度=’,mean(img_high_x),‘处理图像平均宽度=’,mean(img_wide_x),‘处理图像高度最高=’,max(img_high_x),‘处理图像宽度最小=’,min(img_wide_x))
第一周作业-丁胜夺
‘’’
不指定图片来源,可以使用任意来源的100张图片,进行基本的数据统计(统计框/图片的平均,最大,最小宽高),
数据预处理(对数据做旋转增强,randomcrop),并做可视化分析对比数据处理前后的变化
‘’’
import cv2
import os
import numpy as np
from imgaug import augmenters as iaa
import random
#求图片的平均/最大/最小宽高
high=
width=
imgList=
for i in range(1,10):
img = cv2.imread(’./pic/’+format(str(i))+’.jpeg’)
imgList.append(img)
high.append(img.shape[0])
width.append(img.shape[1])
print(“图片最大高为:%d 图片最大宽为:%d” %(max(high),max(width)))
print(“图片最小高为:%d 图片最小宽为:%d” %(min(width),min(high)))
print(“图片平均高为:%d 图片平均宽为:%d” %(np.mean(width),np.mean(high)))
def image_augument(images):
seq=iaa.Sequential([ #建立seq的实例,定义增强方法,用于增强
iaa.Crop(px=(0,16)), #对图像进行crop操作,随即在距离边缘的0-16像素中选择crop范围
iaa.Fliplr(0.5), #对50%的图像进行左右翻转
iaa.Affine(
scale={“x”: (0.8, 1.2), “y”: (0.8, 1.2)}, # 图像缩放
translate_px={“x”: (-16, 16), “y”: (-16, 16)}, # 像素随机平移
rotate=(-45, 45), # 旋转-45度或45度
),
iaa.GaussianBlur((0,1.0)) #在模型上使用0均值1方差进行高斯模糊
],
random_order=True
)
image_aug = seq.augment_images(images)
return image_aug
images_aug=image_augument(imgList)
for i in range(0,9):
cv2.imwrite(‘a’+format(str(i+1))+’.jpeg’,images_aug[i])
统计分析:
statistics result: {‘avg_height’: 128.0, ‘max_height’: 128, ‘min_height’: 128, ‘avg_width’: 256.0, ‘max_width’: 256, ‘min_width’: 256}
import cv2
import imgaug as ia
import imgaug.augmenters as iaa
import nori2 as nori
import numpy as np
from meghair.utils import io
from meghair.utils.imgproc import imdecode
fetcher = nori.Fetcher()
ia.seed(1)
H, W = 256, 128
NUM = 9
seq = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Crop(px=(0, 16)),
iaa.Resize({"height": H, "width": W}),
iaa.Affine(
rotate=(-45, 45)
)
], random_order=True)
#datas = io.load("s3://ai-cultivate/1percent_ImageNet.txt")
datas = open("./testdir/1percent_ImageNet.txt")
lines = datas.readlines()
shapes = []
for i, l in enumerate(lines):
aar = l.split()
img = imdecode(fetcher.get(aar[0]))[..., :3]
images = np.array(
[img] * NUM,
dtype = np.uint8
)
write_img = np.zeros((H, (W+10)*NUM, 3), dtype=np.uint8)
images_aug = seq(images = images)
for p, img in enumerate(images_aug):
write_img[:, p * (W+10): p * (W+10) + W, :] = img
shapes.append(img.shape)
cv2.imwrite("augimg/final_%d.jpg" % i, write_img)
shapes = np.asarray(shapes)
min_width = np.amin(shapes[np.arange(20), np.zeros((20),dtype=np.uint8)])
max_width = np.amax(shapes[np.arange(20), np.zeros((20),dtype=np.uint8)])
mean_width = np.mean(shapes[np.arange(20), np.zeros((20),dtype=np.uint8)])
min_height = np.amin(shapes[np.arange(20), np.ones((20),dtype=np.uint8)])
max_height = np.amax(shapes[np.arange(20), np.ones((20),dtype=np.uint8)])
mean_height = np.mean(shapes[np.arange(20), np.ones((20),dtype=np.uint8)])
datas.close()
数据爬取代码
import requests
import urllib
import json
from bs4 import BeautifulSoup
def getImages(category, num, path):
sessions = requests.session()
sessions.headers[‘User-Agent’] = ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36’
rsp = sessions.get('https://image.baidu.com/search/acjson?tn=resultjson_com&logid=8084350429552238728&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E5%85%85%E6%B0%94%E6%8B%B1%E9%97%A8&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=©right=&word=%E5%85%85%E6%B0%94%E6%8B%B1%E9%97%A8&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&nojc=&pn=530&rn=100&gsm=1e&1622705416055=')
# soup = BeautifulSoup(rsp.text, 'html.parser')
# print(json.loads(rsp.text))
formatRsp = json.loads(rsp.text)
images = formatRsp['data']
m = 100
for img in images:
if img == '':
continue
m = m + 1
img_url = img['hoverURL']
print(img_url)
urllib.request.urlretrieve(img_url, path+str(m)+'.jpg')
getImages(1, 2, ‘/home/guoqiang/projects/ai-train/results/arcs/’)
图片
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from urllib import error
from urllib import request
import os
import time
import sys
import ssl
url = "https://www.google.com"
explorer = "Chrome"
imgs_dir = "./images"
ssl._create_default_https_context = ssl._create_unverified_context
def progress_callback(count_of_blocks, block_size, total_size):
progress = int(50 * (count_of_blocks * block_size) / total_size)
if progress > 50:
progress = 50
sys.stdout.write("\r[%s%s] %d%%" % ('█' * progress, ' ' * (50 - progress), progress * 2))
sys.stdout.flush()
class CrawlSelenium:
def __init__(self, explorer="Chrome", url="https://www.google.com"):
self.url = url
self.explorer = explorer
def set_loading_strategy(self, strategy="normal"):
self.options = Options()
self.options.page_load_strategy = strategy
def crawl(self):
if self.explorer == "Chrome":
self.driver = webdriver.Chrome(options=self.options)
if self.explorer == "Opera":
self.driver = webdriver.Opera(options=self.options)
if self.explorer == "Firefox":
self.driver = webdriver.Firefox(options=self.options)
if self.explorer == "Edge":
self.driver = webdriver.Edge(options=self.options)
self.driver.get(self.url)
search_input = self.driver.find_element(By.NAME, 'q')
webdriver.ActionChains(self.driver).move_to_element(search_input).send_keys("充气拱门" + Keys.ENTER).perform()
self.driver.find_element(By.LINK_TEXT, '图片').click()
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
show_more_button = self.driver.find_element(By.CSS_SELECTOR, "input[value='显示更多搜索结果']")
try:
while True:
message = self.driver.find_element(By.CSS_SELECTOR, 'div.OuJzKb.Bqq24e').get_attribute('textContent')
if message == '正在加载更多内容,请稍候':
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
elif message == '新内容已成功加载。向下滚动即可查看更多内容。':
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
if show_more_button.is_displayed():
show_more_button.click()
elif message == '看来您已经看完了所有内容':
break
elif message == '无法加载更多内容,点击即可重试。':
show_more_button.click()
else:
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
except Exception as err:
print(err)
imgs = self.driver.find_elements(By.CSS_SELECTOR, "img.rg_i.Q4LuWd")
img_count = 0
for img in imgs:
try:
time.sleep(1)
print('\ndownloading image ' + str(img_count) + ': ')
img_url = img.get_attribute("src")
if img_url == None:
continue
path = os.path.join(imgs_dir, str(img_count) + "_img.jpg")
request.urlretrieve(url=img_url, filename=path, reporthook=progress_callback, data=None)
img_count = img_count + 1
except error.HTTPError as http_err:
print(http_err)
except Exception as err:
print(err)
def main():
crawl_s = CrawlSelenium(explorer, url)
crawl_s.set_loading_strategy("normal")
if not os.path.exists(imgs_dir):
os.mkdir(imgs_dir)
crawl_s.crawl()
if __name__ == "__main__":
main()
import requests
from bs4 import BeautifulSoup
import re
pic_list = []
UA_header = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
}
def jingdong_parser(enter):
sessions = requests.session()
context = sessions.get(enter, headers=UA_header, timeout=5)
bs = BeautifulSoup(context.content,"html.parser")
for item in bs.find_all("img"):
url = item.get("data-lazy-img")
if url is not None:
pic_list.append("http://{}".format(url))
print(len(pic_list))
def google_parser(enter):
rule = r'"http[^"]+?"'
sessions = requests.session()
context = sessions.get(enter, headers=UA_header, timeout=5)
items = re.findall(rule, context.text)
for iter in items:
iter = iter.replace('"', '', -1)
pic_list.append(iter)
print(len(pic_list))
source_list = [
{
"name": "京东",
"enterPoint":"https://search.jd.com/Search?keyword=%E5%85%85%E6%B0%94%E6%8B%B1%E9%97%A8&enc=utf-8&wq=%E5%85%85%E6%B0%94%E6%8B%B1%E9%97%A8&pvid=7ba4143e1c9f4d10a01cb91ffe83e172",
"parser":jingdong_parser
},
{
"name":"谷歌",
"enterPoint":"https://www.google.com.hk/search?q=%E5%85%85%E6%B0%94%E6%8B%B1%E9%97%A8&tbm=isch&hl=zh-CN&safe=strict&chips=q:%E5%85%85%E6%B0%94+%E6%8B%B1%E9%97%A8,online_chips:%E4%BA%AC%E4%B8%9C:WfJytJ9zkYQ%3D,online_chips:%E5%BA%86%E5%85%B8:yHw4K_VA60E%3D,online_chips:%E5%8F%8C%E6%8B%B1:Pr743zcO4_w%3D&sa=X&ved=2ahUKEwiTwoecq_vwAhWqJaYKHSzKBIgQ4lYoBXoECAEQJg&biw=1434&bih=763",
"parser":google_parser
}
]
for item in source_list:
print("begin new search batch task {}".format(item["name"]))
enterPoint = item["enterPoint"]
item["parser"](enterPoint)
with open("./pics.txt", 'w') as outfile:
for iter in pic_list:
outfile.write(iter)
outfile.write('\n')
http:////img13.360buyimg.com/n7/jfs/t1/173481/37/10735/94828/60a64d93Eb87b15cb/9925c36848b2017e.jpg
http:////img13.360buyimg.com/n7/jfs/t1/173481/37/10735/94828/60a64d93Eb87b15cb/9925c36848b2017e.jpg
http:////img11.360buyimg.com/n7/jfs/t17323/155/84442374/180138/81dd9aae/5a5c234aNa4efcbb9.jpg
http:////img11.360buyimg.com/n7/jfs/t17323/155/84442374/180138/81dd9aae/5a5c234aNa4efcbb9.jpg
http:////img10.360buyimg.com/n7/jfs/t17323/155/84442374/180138/81dd9aae/5a5c234aNa4efcbb9.jpg
http:////img14.360buyimg.com/n7/jfs/t1/150805/34/17988/228344/6018c6c6Ed3110add/4dd495ca8ebb24ba.jpg
http:////img14.360buyimg.com/n7/jfs/t1/150805/34/17988/228344/6018c6c6Ed3110add/4dd495ca8ebb24ba.jpg
http:////img14.360buyimg.com/n7/jfs/t1/28726/3/9435/307564/5c7e2f5aE62401eaa/d90cb63d364d0317.png
http:////img14.360buyimg.com/n7/jfs/t1/28726/3/9435/307564/5c7e2f5aE62401eaa/d90cb63d364d0317.png
http:////img11.360buyimg.com/n7/jfs/t1/51174/2/14567/145962/5db3c134Ef347f5b0/94d53fde0bdfd6e0.jpg
http:////img13.360buyimg.com/n7/jfs/t1/101427/19/776/195226/5db3c603Ee108fcda/cadd09948544ef02.jpg
http:////img10.360buyimg.com/n7/jfs/t1/182556/27/4292/176519/60a098fdE8768dee6/6ba02c8dc5bbc95c.jpg
http:////img13.360buyimg.com/n7/jfs/t1/133100/40/14211/204257/5f9bc883E17e7fa9f/4bdead6c49a1c3ad.jpg
http:////img14.360buyimg.com/n7/jfs/t1/146380/31/4795/190751/5f2d5cc5Ebbfac39c/64dc67c65119a745.jpg
http:////img10.360buyimg.com/n7/jfs/t1/52376/13/2664/203050/5d079507E1e3a9063/a6a7d7a091141e2f.jpg
http:////img12.360buyimg.com/n7/jfs/t1/19008/8/13995/213784/5ca426d1Ef73d0447/bd6b4d67b7d4d7c9.jpg
http:////img12.360buyimg.com/n7/jfs/t1/19008/8/13995/213784/5ca426d1Ef73d0447/bd6b4d67b7d4d7c9.jpg
http:////img10.360buyimg.com/n7/jfs/t1/105508/2/16261/77070/5e798354E22dbeded/f18f9b11029ee41f.jpg
http:////img14.360buyimg.com/n7/jfs/t1/110316/28/9975/77411/5e7982edE9de68bbc/29f175fbafc6a2c7.jpg
http:////img12.360buyimg.com/n7/jfs/t1/86330/35/16190/106031/5e7982bbE2dd4ddb4/4fd0aed9db8bb9d9.jpg
http:////img13.360buyimg.com/n7/jfs/t1/110612/36/10164/98317/5e7982e1E54afcc1a/e7876413e5ed7e3f.jpg
http:////img10.360buyimg.com/n7/jfs/t1/106398/17/16297/98517/5e798300E7cf718a5/d4527730b47df580.jpg
http:////img11.360buyimg.com/n7/jfs/t1/93090/11/16365/77931/5e798308E9e0f025b/3be1ddf0a879e6ce.jpg
http:////img13.360buyimg.com/n7/jfs/t12517/132/1302468238/365250/48e8dba1/5a1e61ffN0e14c2db.jpg
http:////img13.360buyimg.com/n7/jfs/t12517/132/1302468238/365250/48e8dba1/5a1e61ffN0e14c2db.jpg
http:////img10.360buyimg.com/n7/jfs/t13045/159/1303144441/339532/8f71acaf/5a1e61e3Na06ce502.jpg
http:////img12.360buyimg.com/n7/jfs/t13537/275/1319164367/330076/e0fc5dec/5a1e61d4N3946d1db.jpg
http:////img12.360buyimg.com/n7/jfs/t13315/224/1293363794/332790/e7181959/5a1e61f7N1881a324.jpg
http:////img10.360buyimg.com/n7/jfs/t13315/224/1293363794/332790/e7181959/5a1e61f7N1881a324.jpg
http:////img11.360buyimg.com/n7/jfs/t13387/281/79679848/328432/6f061f45/5a1e61f9Ne31b3bbb.jpg
http:////img12.360buyimg.com/n7/jfs/t12703/142/1317318527/321382/ef9b94ac/5a1e61ffN6b9f620e.jpg
http:////img12.360buyimg.com/n7/jfs/t1/53105/30/12228/125130/5d905697E18616ac8/1d9b166e78ce7365.jpg
http:////img12.360buyimg.com/n7/jfs/t1/53105/30/12228/125130/5d905697E18616ac8/1d9b166e78ce7365.jpg
http:////img10.360buyimg.com/n7/jfs/t1/196307/12/4848/260069/60ace397Ecd6e1823/55ba6d72183b1260.jpg
http:////img10.360buyimg.com/n7/jfs/t1/196307/12/4848/260069/60ace397Ecd6e1823/55ba6d72183b1260.jpg
http:////img12.360buyimg.com/n7/jfs/t1/167114/22/16406/162331/6069eb1bEe566c8c4/174bcbe28f5d254a.jpg
http:////img11.360buyimg.com/n7/jfs/t1/158811/25/17032/161701/6069eb10E98e951d8/6d154496eebb4695.jpg
http:////img10.360buyimg.com/n7/jfs/t1/174250/39/2049/165217/6069eb07Ef8d27e0a/efce31ec62620322.jpg
http:////img13.360buyimg.com/n7/jfs/t1/155843/6/19818/156740/6069eaacEc03e35e3/104709507bf2e80a.jpg
http:////img11.360buyimg.com/n7/jfs/t1/170005/14/16603/158469/6069eac6E75bd58dc/71c23c59922a75b4.jpg
http:////img13.360buyimg.com/n7/jfs/t1/170487/33/16350/159268/6069eafbE144b118a/23259d14ddd40c17.jpg
http:////img11.360buyimg.com/n7/jfs/t1/121988/4/11526/229158/5f4f694aE98272db5/84c2cc7c1fcdbf43.jpg
http:////img11.360buyimg.com/n7/jfs/t1/121988/4/11526/229158/5f4f694aE98272db5/84c2cc7c1fcdbf43.jpg
http:////img12.360buyimg.com/n7/jfs/t1/26286/36/1867/627357/5c17096dEe1e06aeb/b83e73e64095490b.jpg
http:////img12.360buyimg.com/n7/jfs/t1/26286/36/1867/627357/5c17096dEe1e06aeb/b83e73e64095490b.jpg
http:////img13.360buyimg.com/n7/jfs/t16429/181/490029572/352982/391c7253/5a333f34Nf0f3bf97.jpg
http:////img13.360buyimg.com/n7/jfs/t16429/181/490029572/352982/391c7253/5a333f34Nf0f3bf97.jpg
http:////img10.360buyimg.com/n7/jfs/t11626/269/2292714812/326906/fbd8356a/5a151bdcN2c5e2ea8.jpg
http:////img12.360buyimg.com/n7/jfs/t11626/269/2292714812/326906/fbd8356a/5a151bdcN2c5e2ea8.jpg
http:////img14.360buyimg.com/n7/jfs/t11626/269/2292714812/326906/fbd8356a/5a151bdcN2c5e2ea8.jpg
http:////img11.360buyimg.com/n7/jfs/t11626/269/2292714812/326906/fbd8356a/5a151bdcN2c5e2ea8.jpg
http:////img12.360buyimg.com/n7/jfs/t16429/181/490029572/352982/391c7253/5a333f34Nf0f3bf97.jpg
http:////img13.360buyimg.com/n7/jfs/t11626/269/2292714812/326906/fbd8356a/5a151bdcN2c5e2ea8.jpg
http:////img10.360buyimg.com/n7/jfs/t1/98836/9/19715/224004/5ea0061bE4ad310b8/1e603ef654968be1.jpg
http:////img10.360buyimg.com/n7/jfs/t1/98836/9/19715/224004/5ea0061bE4ad310b8/1e603ef654968be1.jpg
http:////img12.360buyimg.com/n7/jfs/t1/98836/9/19715/224004/5ea0061bE4ad310b8/1e603ef654968be1.jpg
http:////img14.360buyimg.com/n7/jfs/t1/98836/9/19715/224004/5ea0061bE4ad310b8/1e603ef654968be1.jpg
http:////img11.360buyimg.com/n7/jfs/t1/98836/9/19715/224004/5ea0061bE4ad310b8/1e603ef654968be1.jpg
http:////img14.360buyimg.com/n7/jfs/t1/98836/9/19715/224004/5ea0061bE4ad310b8/1e603ef654968be1.jpg
http:////img13.360buyimg.com/n7/jfs/t1/98836/9/19715/224004/5ea0061bE4ad310b8/1e603ef654968be1.jpg
http:////img12.360buyimg.com/n7/jfs/t1/98836/9/19715/224004/5ea0061bE4ad310b8/1e603ef654968be1.jpg
http:////img12.360buyimg.com/n7/jfs/t14080/162/1267506310/199341/19bceac5/5a1d1fa0N2e53734d.jpg
http:////img12.360buyimg.com/n7/jfs/t14080/162/1267506310/199341/19bceac5/5a1d1fa0N2e53734d.jpg
http:////img14.360buyimg.com/n7/jfs/t1/55540/12/12042/125508/5d903fe6E08794100/fd4e096aa606b461.jpg
http:////img14.360buyimg.com/n7/jfs/t1/55540/12/12042/125508/5d903fe6E08794100/fd4e096aa606b461.jpg
http:////img11.360buyimg.com/n7/jfs/t12124/179/1249866081/237322/9c55b6b5/5a1d1fa0N51064458.jpg
http:////img10.360buyimg.com/n7/jfs/t1/55540/12/12042/125508/5d903fe6E08794100/fd4e096aa606b461.jpg
http:////img11.360buyimg.com/n7/jfs/t13558/193/1294258463/296117/7f3fcc5c/5a1d1f74N9adbae68.jpg
http:////img12.360buyimg.com/n7/jfs/t14080/162/1267506310/199341/19bceac5/5a1d1fa0N2e53734d.jpg
http:////img13.360buyimg.com/n7/jfs/t12124/179/1249866081/237322/9c55b6b5/5a1d1fa0N51064458.jpg
http:////img10.360buyimg.com/n7/jfs/t13558/193/1294258463/296117/7f3fcc5c/5a1d1f74N9adbae68.jpg
http:////img13.360buyimg.com/n7/jfs/t12916/295/1387743398/387379/a8cc13f4/5a1f720aN2b94896d.jpg
http:////img13.360buyimg.com/n7/jfs/t12916/295/1387743398/387379/a8cc13f4/5a1f720aN2b94896d.jpg
http:////img14.360buyimg.com/n7/jfs/t12916/295/1387743398/387379/a8cc13f4/5a1f720aN2b94896d.jpg
http:////img14.360buyimg.com/n7/jfs/t14056/248/1400603771/371465/286a4481/5a1f720dN38578c83.jpg
http:////img10.360buyimg.com/n7/jfs/t13066/41/1360393163/352518/6a459f07/5a1f720cN90a988dc.jpg
http:////img12.360buyimg.com/n7/jfs/t13054/338/1360093597/377686/12c7518c/5a1f71efNdcb8a139.jpg
http:////img12.360buyimg.com/n7/jfs/t12916/295/1387743398/387379/a8cc13f4/5a1f720aN2b94896d.jpg
http:////img11.360buyimg.com/n7/jfs/t12187/311/1377172701/393150/f1d05703/5a1f720bN17ac63c3.jpg
http:////img14.360buyimg.com/n7/jfs/t1/130013/12/106/189600/5ec8c6d1Ed49e448d/7ec8cd7e6cc18bb3.jpg
http:////img14.360buyimg.com/n7/jfs/t1/130013/12/106/189600/5ec8c6d1Ed49e448d/7ec8cd7e6cc18bb3.jpg
http:////img11.360buyimg.com/n7/jfs/t1/136457/39/101/114671/5ec8c6d3E10656e09/2de8889dea0d4b84.jpg
http:////img10.360buyimg.com/n7/jfs/t1/137472/1/104/119989/5ec8c6d3Ef01c2e01/0db8225c46dca343.jpg
http:////img12.360buyimg.com/n7/jfs/t1/135047/12/96/116493/5ec8c6d3Eb2f9fde0/ba8e9f6957c00c89.jpg
http:////img14.360buyimg.com/n7/jfs/t1/123556/37/2735/124637/5ec8c6d3E4ab0b0ff/28d553d67421311d.jpg
http:////img11.360buyimg.com/n7/jfs/t1/128947/39/2804/124732/5ec8c6d3E797daa27/6e186bedcee3bdb4.jpg
http:////img14.360buyimg.com/n7/jfs/t1/123957/38/2773/132728/5ec8c6d3E5769e19a/996e76f4b5cb0cdf.jpg
http:////img14.360buyimg.com/n7/jfs/t22777/6/2592901867/373506/b3f7dc07/5b8a3c7bN0d3d9a99.jpg
http:////img14.360buyimg.com/n7/jfs/t22777/6/2592901867/373506/b3f7dc07/5b8a3c7bN0d3d9a99.jpg
http:////img10.360buyimg.com/n7/jfs/t27874/63/229353259/413046/18eb33a1/5b8a3f57Nd10e1728.jpg
http:////img12.360buyimg.com/n7/jfs/t1/138249/35/3409/116868/5f16aa8aE998ffeb8/fa8787d0f9022376.jpg
http:////img12.360buyimg.com/n7/jfs/t1/138249/35/3409/116868/5f16aa8aE998ffeb8/fa8787d0f9022376.jpg
http:////img11.360buyimg.com/n7/jfs/t1/143368/30/3489/116893/5f1799e9E8febf854/bcc3769faafb12e1.jpg
http:////img14.360buyimg.com/n7/jfs/t1/102152/12/17990/240065/5e8eb52dE483fb84b/4ab5d8a31da64166.jpg
http:////img11.360buyimg.com/n7/jfs/t1/138410/7/3520/40378/5f16ac75Edd489607/bb6111967de4b75d.jpg
http:////img10.360buyimg.com/n7/jfs/t1/117655/24/13012/82766/5f16ac75E4db679dd/da99b51cd7ce76df.jpg
http:////img14.360buyimg.com/n7/jfs/t1/127129/1/7767/82777/5f16ac75Ed9beba16/9509cb41e6c8ea3f.jpg
http:////img14.360buyimg.com/n7/jfs/t1/142190/1/3430/70265/5f169c95Ee95512be/ce6ef399e0c99543.jpg
http:////img13.360buyimg.com/n7/jfs/t26707/105/217148222/413046/18eb33a1/5b8a3f78N608504e0.jpg
http:////img13.360buyimg.com/n7/jfs/t26707/105/217148222/413046/18eb33a1/5b8a3f78N608504e0.jpg
http:////img14.360buyimg.com/n7/jfs/t13690/191/402246790/397731/e8647e83/5a0a96e6N0720290d.jpg
http:////img14.360buyimg.com/n7/jfs/t13690/191/402246790/397731/e8647e83/5a0a96e6N0720290d.jpg
http:////img13.360buyimg.com/n7/jfs/t13327/234/403340198/398396/467924ff/5a0a96c5Nd8fb1744.jpg
http:////img14.360buyimg.com/n7/jfs/t13690/191/402246790/397731/e8647e83/5a0a96e6N0720290d.jpg
http:////img12.360buyimg.com/n7/jfs/t13066/254/417747087/86034/bb7c4e8e/5a0b8f51N9239e1a6.jpg
http:////img13.360buyimg.com/n7/jfs/t13690/191/402246790/397731/e8647e83/5a0a96e6N0720290d.jpg
http:////img11.360buyimg.com/n7/jfs/t13690/191/402246790/397731/e8647e83/5a0a96e6N0720290d.jpg
http:////img12.360buyimg.com/n7/jfs/t12589/336/430002450/273810/77af4f4a/5a0b8f3bN15606f3d.jpg
http:////img14.360buyimg.com/n7/jfs/t12247/188/1776023591/316534/1811a595/5a275188N220a6e05.jpg
http:////img14.360buyimg.com/n7/jfs/t12247/188/1776023591/316534/1811a595/5a275188N220a6e05.jpg
http:////img12.360buyimg.com/n7/jfs/t12247/188/1776023591/316534/1811a595/5a275188N220a6e05.jpg
http:////img10.360buyimg.com/n7/jfs/t12247/188/1776023591/316534/1811a595/5a275188N220a6e05.jpg
http:////img13.360buyimg.com/n7/jfs/t12247/188/1776023591/316534/1811a595/5a275188N220a6e05.jpg
http:////img13.360buyimg.com/n7/jfs/t1/52032/18/11607/548823/5d882104Ee25a3fc0/702ad9bdbd4b9e94.jpg
http:////img11.360buyimg.com/n7/jfs/t12247/188/1776023591/316534/1811a595/5a275188N220a6e05.jpg
http:////img11.360buyimg.com/n7/jfs/t12247/188/1776023591/316534/1811a595/5a275188N220a6e05.jpg
http:////img12.360buyimg.com/n7/jfs/t1/23385/1/1574/653162/5c14785bEc8cba9d2/6b6cc1c841c19448.jpg
http:////img12.360buyimg.com/n7/jfs/t1/23385/1/1574/653162/5c14785bEc8cba9d2/6b6cc1c841c19448.jpg
http:////img12.360buyimg.com/n7/jfs/t1/139702/14/13718/203981/5fa8f930E759acca0/e9d6e7844868ceb3.jpg
http:////img12.360buyimg.com/n7/jfs/t1/139702/14/13718/203981/5fa8f930E759acca0/e9d6e7844868ceb3.jpg
http:////img12.360buyimg.com/n7/jfs/t1/139702/14/13718/203981/5fa8f930E759acca0/e9d6e7844868ceb3.jpg
http:////img10.360buyimg.com/n7/jfs/t1/139702/14/13718/203981/5fa8f930E759acca0/e9d6e7844868ceb3.jpg
http:////img14.360buyimg.com/n7/jfs/t1/139702/14/13718/203981/5fa8f930E759acca0/e9d6e7844868ceb3.jpg
http:////img11.360buyimg.com/n7/jfs/t1/139702/14/13718/203981/5fa8f930E759acca0/e9d6e7844868ceb3.jpg
http:////img11.360buyimg.com/n7/jfs/t1/139702/14/13718/203981/5fa8f930E759acca0/e9d6e7844868ceb3.jpg
http:////img13.360buyimg.com/n7/jfs/t1/139702/14/13718/203981/5fa8f930E759acca0/e9d6e7844868ceb3.jpg
http:////img11.360buyimg.com/n7/jfs/t12316/70/1953205849/360829/3d388c13/5a2e23e8Ndb771cac.jpg
http:////img11.360buyimg.com/n7/jfs/t12316/70/1953205849/360829/3d388c13/5a2e23e8Ndb771cac.jpg
http:////img11.360buyimg.com/n7/jfs/t14116/217/1946644347/308008/9e522a52/5a2e23caN2a791b95.jpg
http:////img12.360buyimg.com/n7/jfs/t14116/217/1946644347/308008/9e522a52/5a2e23caN2a791b95.jpg
http:////img10.360buyimg.com/n7/jfs/t12316/70/1953205849/360829/3d388c13/5a2e23e8Ndb771cac.jpg
http:////img12.360buyimg.com/n7/jfs/t14116/217/1946644347/308008/9e522a52/5a2e23caN2a791b95.jpg
http:////img13.360buyimg.com/n7/jfs/t14116/217/1946644347/308008/9e522a52/5a2e23caN2a791b95.jpg
http:////img14.360buyimg.com/n7/jfs/t13561/188/2018121916/229140/4726cfd6/5a2f5345Nc2a1d91a.jpg
http:////img11.360buyimg.com/n7/jfs/t12364/35/1344759540/354927/8190b9ff/5a1f718eNffdc3d51.jpg
http:////img11.360buyimg.com/n7/jfs/t12364/35/1344759540/354927/8190b9ff/5a1f718eNffdc3d51.jpg
http:////img12.360buyimg.com/n7/jfs/t1/159798/25/13419/616031/60500e5aE6cd7b897/4b33593742b9a5d9.jpg
http:////img12.360buyimg.com/n7/jfs/t1/159798/25/13419/616031/60500e5aE6cd7b897/4b33593742b9a5d9.jpg
http:////img13.360buyimg.com/n7/jfs/t1/164755/24/12289/516639/60500e37Ef0f3bde9/39830aca72d5abaa.jpg
http:////img12.360buyimg.com/n7/jfs/t1/164755/24/12289/516639/60500e37Ef0f3bde9/39830aca72d5abaa.jpg
http:////img12.360buyimg.com/n7/jfs/t1/170101/31/12859/776141/60500e40Ee98c2e35/3b14bb484ffd8bdc.png
http:////img12.360buyimg.com/n7/jfs/t1/170101/31/12859/776141/60500e40Ee98c2e35/3b14bb484ffd8bdc.png
http:////img11.360buyimg.com/n7/jfs/t1/167946/26/12799/179961/60500e4aEcc9d23d7/503d84d56e7b1ccf.jpg
http:////img13.360buyimg.com/n7/jfs/t1/167008/10/12855/771412/60500e44E34bedb41/6d7a7f50aaeefcbd.png
http:////img10.360buyimg.com/n7/jfs/t1/124346/33/15024/219260/5f89149cE1e2cd04d/90b3735ff2242701.jpg
http:////img10.360buyimg.com/n7/jfs/t1/124346/33/15024/219260/5f89149cE1e2cd04d/90b3735ff2242701.jpg
http:////img11.360buyimg.com/n7/jfs/t1/124346/33/15024/219260/5f89149cE1e2cd04d/90b3735ff2242701.jpg
http:////img12.360buyimg.com/n7/jfs/t1/122907/4/15199/216579/5f8914c1Effd0e851/acb177c9ee8b2b40.jpg
http:////img13.360buyimg.com/n7/jfs/t1/122907/4/15199/216579/5f8914c1Effd0e851/acb177c9ee8b2b40.jpg
http:////img14.360buyimg.com/n7/jfs/t1/124346/33/15024/219260/5f89149cE1e2cd04d/90b3735ff2242701.jpg
http:////img13.360buyimg.com/n7/jfs/t1/122907/4/15199/216579/5f8914c1Effd0e851/acb177c9ee8b2b40.jpg
http:////img12.360buyimg.com/n7/jfs/t1/124346/33/15024/219260/5f89149cE1e2cd04d/90b3735ff2242701.jpg
http:////img13.360buyimg.com/n7/jfs/t1/118307/25/378/51498/5e8bcf47E7c6bb9d8/d066e843a2259094.jpg
http:////img13.360buyimg.com/n7/jfs/t1/118307/25/378/51498/5e8bcf47E7c6bb9d8/d066e843a2259094.jpg
http:////img11.360buyimg.com/n7/jfs/t1/88511/9/17911/57830/5e8bcfbbE14143f21/ef5b0fec354b5115.jpg
http:////img12.360buyimg.com/n7/jfs/t1/104880/38/17633/60172/5e8bcfafEe094cedd/5a9d7c895fb054da.jpg
http:////img13.360buyimg.com/n7/jfs/t1/116275/3/382/54477/5e8bcf9eEfa7779dd/fcb72ed8badbb4ca.jpg
http:////img14.360buyimg.com/n7/jfs/t1/112969/23/404/63251/5e8bcf8fE4548b0d3/7468268dd2e54115.jpg
http:////img11.360buyimg.com/n7/jfs/t1/93258/19/17704/72858/5e8bcf6dEa569259d/fb53072b662f91a8.jpg
http:////img12.360buyimg.com/n7/jfs/t1/99957/31/17705/60105/5e8bcf54E727f4d2d/58f14a3783a49ae0.jpg
http:////img11.360buyimg.com/n7/jfs/t1/146028/2/18887/508519/5fdc5b12Ef533180f/d053114dc6e3c9d6.jpg
http:////img11.360buyimg.com/n7/jfs/t1/146028/2/18887/508519/5fdc5b12Ef533180f/d053114dc6e3c9d6.jpg
http:////img10.360buyimg.com/n7/jfs/t1/146028/2/18887/508519/5fdc5b12Ef533180f/d053114dc6e3c9d6.jpg
http:////img11.360buyimg.com/n7/jfs/t1/146028/2/18887/508519/5fdc5b12Ef533180f/d053114dc6e3c9d6.jpg
http:////img12.360buyimg.com/n7/jfs/t1/146028/2/18887/508519/5fdc5b12Ef533180f/d053114dc6e3c9d6.jpg
http:////img12.360buyimg.com/n7/jfs/t1/146028/2/18887/508519/5fdc5b12Ef533180f/d053114dc6e3c9d6.jpg
http:////img13.360buyimg.com/n7/jfs/t1/146028/2/18887/508519/5fdc5b12Ef533180f/d053114dc6e3c9d6.jpg
http:////img14.360buyimg.com/n7/jfs/t1/146028/2/18887/508519/5fdc5b12Ef533180f/d053114dc6e3c9d6.jpg
http:////img11.360buyimg.com/n7/jfs/t1/28743/15/12540/407209/5c988dd1E3bdc942a/95fdde85738d1985.jpg
http:////img11.360buyimg.com/n7/jfs/t1/28743/15/12540/407209/5c988dd1E3bdc942a/95fdde85738d1985.jpg
http:////img13.360buyimg.com/n7/jfs/t1/15209/25/12374/126331/5c98918dE59243599/c7683d3c97560792.jpg
http:////img12.360buyimg.com/n7/jfs/t1/15209/25/12374/126331/5c98918dE59243599/c7683d3c97560792.jpg
http:////img13.360buyimg.com/n7/jfs/t1/23404/11/12514/335834/5c988dcfEfb47aa35/dac940c83ff6370b.jpg
http:////img14.360buyimg.com/n7/jfs/t1/23404/11/12514/335834/5c988dcfEfb47aa35/dac940c83ff6370b.jpg
http:////img14.360buyimg.com/n7/jfs/t1/23404/11/12514/335834/5c988dcfEfb47aa35/dac940c83ff6370b.jpg
http:////img14.360buyimg.com/n7/jfs/t1/30028/28/7625/450776/5c988dceE239ca0fe/d68a609452037682.jpg
Code:
from icrawler.builtin import GoogleImageCrawler,BingImageCrawler
from icrawler.builtin import BaiduImageCrawler
from icrawler.builtin import GoogleImageCrawler
google_crawler = GoogleImageCrawler(
feeder_threads=1,
parser_threads=4,
downloader_threads=4,
storage={‘root_dir’: ‘your_image_dir’})
google_crawler.crawl(keyword=‘充气拱门’, max_num=10000, file_idx_offset=0)
Image: