Use urllib2, it's so powerful!
Tried logging in with a proxy to pull cookies and jump to grab images ......
Documentation:/library/
Directly on the demo code
Includes: direct pull, using Reuqest(post/get), using proxies, cookies, jump processing
#!/usr/bin/python # -*- coding:utf-8 -*- # urllib2_test.py # author: wklken # 2012-03-17 wklken@ import urllib,urllib2,cookielib,socket url = "....." #change yourself # The easiest way def use_urllib2(): try: f = (url, timeout=5).read() except , e: print print len(f) #Use Request def get_request(): # Can set a timeout (5) # can be parameterized [no parameters, use get, this way, use post]. params = {"wd":"a","b":"2"} # Can include request header information to identify i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5", "Accept": "text/plain"} #use post,have some params post to server,if not support ,will throw exception #req = (url, data=(params), headers=i_headers) req = (url, headers=i_headers) #After creating the request, you can also add others, if the key is duplicated, the latter will take effect. #request.add_header('Accept','application/json') # Can specify the submission method #request.get_method = lambda: 'PUT' try: page = (req) print len(()) #like get #url_params = ({"a":"1", "b":"2"}) #final_url = url + "?" + url_params #print final_url #data = (final_url).read() #print "Method:get ", len(data) except , e: print "Error Code:", except , e: print "Error Reason:", def use_proxy(): enable_proxy = False proxy_handler = ({"http":":8080"}) null_proxy_handler = ({}) if enable_proxy: opener = urllib2.build_opener(proxy_handler, ) else: opener = urllib2.build_opener(null_proxy_handler, ) #This sets the global opener for urllib2. urllib2.install_opener(opener) content = (url).read() print "proxy len:",len(content) class NoExceptionCookieProcesser(): def http_error_403(self, req, fp, code, msg, hdrs): return fp def http_error_400(self, req, fp, code, msg, hdrs): return fp def http_error_500(self, req, fp, code, msg, hdrs): return fp def hand_cookie(): cookie = () #cookie_handler = (cookie) #after add error exception handler cookie_handler = NoExceptionCookieProcesser(cookie) opener = urllib2.build_opener(cookie_handler, ) url_login = "/?login" params = {"username":"user","password":"111111"} (url_login, (params)) for item in cookie: print , #urllib2.install_opener(opener) #content = (url).read() #print len(content) # Get the last page URL after N redirects def get_request_direct(): import httplib = 1 request = ("") request.add_header("Accept", "text/html,*/*") request.add_header("Connection", "Keep-Alive") opener = urllib2.build_opener() f = (request) print print print len(()) if __name__ == "__main__": use_urllib2() get_request() get_request_direct() use_proxy() hand_cookie()