Process: simulate login → get Html page → regular parse all the eligible rows → one by one all the columns of the eligible rows into the CSVData[] temporary variable → write to the CSV file
Core Code:
#### writes to Csv file with open(, 'wb') as csvfile: spamwriter = (csvfile, dialect='excel') #Set Title (["Game account.","User type","Name of the game.","Channels.","Recharge type","Top-up amount","Rebate amount","Single number.","Date"]) # Loop write data from CsvData to CsvFileName file for item in : (item)
Full Code:
# coding=utf-8 import urllib import urllib2 import cookielib import re import csv import sys class Pyw(): # Initialization data def __init__(self): # Login Url Address ="/login/check" # Url address to be fetched ="/Data/accountdetail/%s" # Transmitted data: user name, password, whether to remember user name = ({ "username": "15880xxxxxx", "password": "a123456", "remember": "1" }) # of records =0; # Loop to get a total of 4 pages of content =1 # Regularly parsing out tr =("(?isu)<tr[^>]*>(.*?)</tr>") # Regularly parsing out td = ("(?isu)<td[^>]*>(.*?)</td>") #Creating cookies = () #Build opener =urllib2.build_opener(()) # of total pages parsed =4 ##### setup csv file ="" ##### stores Csv data =[] # Parsing the content of a web page def GetPageItem(self,PageHtml): # Loop through all the rows in the Table for row in (PageHtml): # Take out all the columns of the current row coloumn=(row) # of records judged to be in compliance if len(coloumn) == 9: # print "gamertag:%s" % coloumn[0].strip() # print "User type:%s" % coloumn[1].strip() # print "Game name: %s" % coloumn[2].strip() # print "channel:%s" % coloumn[3].strip() # print "Recharge type:%s" % coloumn[4].strip() # print "Recharge amount:%s" % coloumn[5].strip().replace("¥", "") # print "rebate amount:%s" % coloumn[6].strip().replace("¥", "") # print "single number:%s" % coloumn[7].strip() # print "date:%s" % coloumn[8].strip() # Patchwork rows of data d=[coloumn[0].strip(), coloumn[1].strip(), coloumn[2].strip(), coloumn[3].strip(), coloumn[4].strip(), coloumn[5].strip().replace("¥", ""), coloumn[6].strip().replace("¥", ""), coloumn[7].strip(), coloumn[8].strip()] (d) # Simulate login and get page data def GetPageHtml(self): try: # Simulate login request=(url=,data=) ResultHtml=(request) # Start execution to fetch page data while <=: # Dynamically scramble the Url to be parsed m_PageUrl = % # Calculate current page = + 1 # Get all the content of the currently parsed page ResultHtml=(m_PageUrl) # Parsing the content of a web page (()) #### writes to Csv file with open(, 'wb') as csvfile: spamwriter = (csvfile, dialect='excel') #Set Title (["Game account.","User type","Name of the game.","Channels.","Recharge type","Top-up amount","Rebate amount","Single number.","Date"]) # Loop write data from CsvData to CsvFileName file for item in : (item) print "Successfully exported CSV file!" except Exception,e: print "404 error!%s" % e # Instantiate the class p=Pyw() # Implementation methods ()
Export results
Above this Python simple crawler export CSV file example to explain is all I share with you, I hope to give you a reference, and I hope you support me more.