#-*- coding: UTF-8 -*-
from pyquery import PyQuery as py
from lxml import etree
import urllib
import re
import os
import sys
import logging
def format(filename):
tuple=(' ',''','\'')
for char in tuple:
if ((char)!=-1):
filename=(char,"_")
return filename
def download_mp3(mp3_url, filename,dir):
f = dir+"\\"+filename
if (f):
(f+" is existed.")
return
try:
open(f, 'wb').write((mp3_url).read())
( filename + ' is downloaded.')
except:
( filename + ' is not downloaded.')
def download_all_mp3(start,end,dir,logger):
for x in range(start,end):
try:
url = "/mp3-d" + str(x) + ".html"
(str(x) + ": "+url)
doc = py(url=url)
e = doc('.mp3downloadbox')
if e is None or e == '':
(url+" is not existed.")
return
e = unicode(e)
#( e)
regex = (ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?\"(.*?)\"",|)
m = (e)
if m is not None:
title = (1).strip()
title2 = str(x)+"_"+title + ".mp3"
#title2 = (' ','_',title2)
title2 = format(title2)
link = (2)
#( "title:" + title + " link:" + link)
if link == '' or title == '':
(url + " is not useful")
continue
(str(x)+": "+link)
download_mp3(link,title2,dir)
except:
(url+" met exception.")
continue
if __name__ == "__main__":
dir_root = "e:\\song"
if [3] != '': dir_root=[3]
start,end = 1,8000
if [1] >= 0 and [2]>=0:
start,end = int([1]),int([2])
print ("Download from %s to %s.\n" % (start,end))
dir = dir_root + "\\"+str(start)+"-"+str(end)
if not (dir):
(dir)
print "Download to " + dir + ".\n"
logger = ("simple")
()
fh = (dir+"\\"+"")
ch = ()
formatter = ("%(message)s")
(formatter)
(formatter)
(ch)
(fh)
download_all_mp3(start,end,dir,logger)