SoFunction
Updated on 2024-11-20

Python Simple Crawler Export CSV File Example Explained

Process: simulate login → get Html page → regular parse all the eligible rows → one by one all the columns of the eligible rows into the CSVData[] temporary variable → write to the CSV file

Core Code:

 #### writes to Csv file
      with open(, 'wb') as csvfile:
        spamwriter = (csvfile, dialect='excel')
        #Set Title
        (["Game account.","User type","Name of the game.","Channels.","Recharge type","Top-up amount","Rebate amount","Single number.","Date"])
        # Loop write data from CsvData to CsvFileName file
        for item in :
          (item)

Full Code:

# coding=utf-8
import urllib
import urllib2
import cookielib
import re
import csv
import sys
 
class Pyw():
  # Initialization data
  def __init__(self):
    # Login Url Address
    ="/login/check"
    # Url address to be fetched
    ="/Data/accountdetail/%s"
    # Transmitted data: user name, password, whether to remember user name
     = ({
      "username": "15880xxxxxx",
      "password": "a123456",
      "remember": "1"
    })
    # of records
    =0;
    # Loop to get a total of 4 pages of content
    =1
    # Regularly parsing out tr
    =("(?isu)<tr[^>]*>(.*?)</tr>")
    # Regularly parsing out td
     = ("(?isu)<td[^>]*>(.*?)</td>")
    #Creating cookies
     = ()
    #Build opener
    =urllib2.build_opener(())
    # of total pages parsed
    =4
    ##### setup csv file
    =""
    ##### stores Csv data
    =[]
 
   # Parsing the content of a web page
  def GetPageItem(self,PageHtml):
    # Loop through all the rows in the Table
    for row in (PageHtml):
      # Take out all the columns of the current row
      coloumn=(row)
      # of records judged to be in compliance
      if len(coloumn) == 9:
        # print "gamertag:%s" % coloumn[0].strip()
        # print "User type:%s" % coloumn[1].strip()
        # print "Game name: %s" % coloumn[2].strip()
        # print "channel:%s" % coloumn[3].strip()
        # print "Recharge type:%s" % coloumn[4].strip()
        # print "Recharge amount:%s" % coloumn[5].strip().replace("¥", "")
        # print "rebate amount:%s" % coloumn[6].strip().replace("¥", "")
        # print "single number:%s" % coloumn[7].strip()
        # print "date:%s" % coloumn[8].strip()
        # Patchwork rows of data
        d=[coloumn[0].strip(),
          coloumn[1].strip(),
          coloumn[2].strip(),
          coloumn[3].strip(),
          coloumn[4].strip(),
          coloumn[5].strip().replace("¥", ""),
          coloumn[6].strip().replace("¥", ""),
          coloumn[7].strip(),
          coloumn[8].strip()]
        (d)
 
  # Simulate login and get page data
  def GetPageHtml(self):
    try:
      # Simulate login
      request=(url=,data=)
      ResultHtml=(request)
      # Start execution to fetch page data
      while <=:
        # Dynamically scramble the Url to be parsed
        m_PageUrl =  % 
        # Calculate current page
         =  + 1
        # Get all the content of the currently parsed page
        ResultHtml=(m_PageUrl)
        # Parsing the content of a web page
        (())
 
      #### writes to Csv file
      with open(, 'wb') as csvfile:
        spamwriter = (csvfile, dialect='excel')
        #Set Title
        (["Game account.","User type","Name of the game.","Channels.","Recharge type","Top-up amount","Rebate amount","Single number.","Date"])
        # Loop write data from CsvData to CsvFileName file
        for item in :
          (item)
 
      print "Successfully exported CSV file!"
    except Exception,e:
      print "404 error!%s" % e
# Instantiate the class
p=Pyw()
# Implementation methods
()

Export results

Above this Python simple crawler export CSV file example to explain is all I share with you, I hope to give you a reference, and I hope you support me more.