#coding=utf-8
import os
import sys
import re
import urllib
URL_REG = (r'(http://[^///]+)', )
IMG_REG = (r'<img[^>]*?src=([/'"])([^/1]*?)/1', )
def download(dir, url):
'''Downloading images from web pages
@dir Save to local path
@url webpage url
'''
global URL_REG, IMG_REG
m = URL_REG.match(url)
if not m:
print '[Error]Invalid URL: ', url
return
host = (1)
if not (dir):
(dir)
# Get html, extract image url
html = (url).read()
imgs = [item[1].lower() for item in IMG_REG.findall(html)]
f = lambda path: path if ('http://') else /
host + path if ('/') else url + '/' + path
imgs = list(set(map(f, imgs)))
print '[Info]Find %d images.' % len(imgs)
# Download images
for idx, img in enumerate(imgs):
name = ('/')[-1]
path = (dir, name)
try:
print '[Info]Download(%d): %s'% (idx + 1, img)
(img, path)
except:
print "[Error]Cant't download(%d): %s" % (idx + 1, img)
def main():
if len() != 3:
print 'Invalid argument count.'
return
dir, url = [1:]
download(dir, url)
if __name__ == '__main__':
# download('D://Imgs', 'http://')
main()