SoFunction
Updated on 2024-11-17

Python crawler introductory case of crawling where to go tourist attractions and visual analysis of the guide

point of knowledge (math.)

  • requests Send network requests
  • parsel Parsing data
  • csv save data

third-party repository

  • requests >>> pip install requests
  • parsel >>> pip install parsel

Development Environment:

  • Version: python 3.8
  • Editor: pycharm 2021.2

[Paid VIP Full Version] Just watch and learn the tutorials, 80 episodes of Python basic introductory video teaching

Click here to watch online for free

crawler

import module

# Modules for sending web requests
import requests
# Modules for parsing data
import parsel
import csv
import time
import random

Send Request

url = f'/travelbook/?page=1&order=hot_heat'
# <Response [200]>: Tells us the request was successful.
response = (url)

Access to data (web page source code)

html_data = 

Parsing web pages (re regular expressions, css selectors, xpath, bs4/haven't updated in 6 years, json)

# html_data: string
# We're going to turn this string into an object #
selector = (html_data)
# ::attr(href) url_list:list
url_list = ('.b_strategy_list li h2 a::attr(href)').getall()
for detail_url in url_list:
    # String substitution methods
    detail_id = detail_url.replace('/youji/', '')
    url_1 = '/travelbook/note/' + detail_id
    print(url_1)

Send a request to the detail page website (get, post)

# /travelbook/note/7701502
response_1 = (url_1).text

parse a web page

selector_1 = (response_1)
# :nth-child(): pseudo-class selector
# ::text Extracts text content
# * Represents all
# Location
title = selector_1.css('.b_crumb_cont *:nth-child(3)::text').get().replace('Travel Tips', '')
# Short reviews
comment = selector_1.css('.::text').get()
# Departure date
date = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.when > p > ::text').get()
# of days
days = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.howlong > p > ::text').get()
# Per capita consumption
money = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.howmuch > p > ::text').get()
# People
character = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.who > p > ::text').get()
# Play
play_list = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.how > p >  span::text').getall()
play = ' '.join(play_list)
# Views
count = selector_1.css('.view_count::text').get()
print(title, comment, date, days, money, character, play, count)

Save data

# Save as csv
csv_qne = open('Where to go.csv', mode='a', encoding='utf-8', newline='')
csv_writer = (csv_qne)
# Write data
csv_writer.writerow(['Location', 'Short Review', 'Departure time', 'Days', 'Per capita consumption', 'Characters', 'Play', 'Views'])

data visualization

import module

import pandas as pd
from  import JsCode
from  import *
from pyecharts import options as opts

Import data

data = pd.read_csv('Go_Count_Score.csv')
data

Top 10 tourist destinations and their corresponding costs

bar=(
    Bar(init_opts=(height='500px',width='1000px',theme='dark'))
    .add_xaxis(m2)
    .add_yaxis(
        'Destination Top 10',
        n2,
        label_opts=(is_show=True,position='top'),
        itemstyle_opts=(
            color=JsCode("""new (
            0, 0, 0, 1,[{offset: 0,color: 'rgb(255,99,71)'}, {offset: 1,color: 'rgb(32,178,170)'}])
            """
            )
        )
    )
    .set_global_opts(
        title_opts=(
            title='Destination Top 10'),
            xaxis_opts=(name='Name of attraction',
            type_='category',                                           
            axislabel_opts=(rotate=90),
        ),
        yaxis_opts=(
            name='Number',
            min_=0,
            max_=120.0,
            splitline_opts=(is_show=True,linestyle_opts=(type_='dash'))
        ),
        tooltip_opts=(trigger='axis',axis_pointer_type='cross')
    )

    .set_series_opts(
        markline_opts=(
            data=[
                (type_='average',name='Mean value'),
                (type_='max',name='Maximum'),
                (type_='min',name='Minimum'),
            ]
        )
    )
)
bar.render_notebook()

bar=(
    Bar(init_opts=(height='500px',width='1000px',theme='dark'))
    .add_xaxis(loc)
    .add_yaxis(
        'Cost per capita',
        price_mean2,
        label_opts=(is_show=True,position='top'),
        itemstyle_opts=(
            color=JsCode("""new (
            0, 0, 0, 1,[{offset: 0,color: 'rgb(255,99,71)'}, {offset: 1,color: 'rgb(32,178,170)'}])
            """
            )
        )
    )
    .set_global_opts(
        title_opts=(
            title='Cost per capita for each attraction'),
            xaxis_opts=(name='Name of attraction',
            type_='category',                                           
            axislabel_opts=(rotate=90),
        ),
        yaxis_opts=(
            name='Number',
            min_=0,
            max_=2000.0,
            splitline_opts=(is_show=True,linestyle_opts=(type_='dash'))
        ),
        tooltip_opts=(trigger='axis',axis_pointer_type='cross')
    )

    .set_series_opts(
        markline_opts=(
            data=[
                (type_='average',name='Mean value'),
                (type_='max',name='Maximum'),
                (type_='min',name='Minimum'),
            ]
        )
    )
)
bar.render_notebook()

Analysis of travel modes

pie = (Pie(init_opts=(theme='dark', width='1000px', height='800px'))
       .add("", [z for z in zip(m1,n1)],
            radius=["40%", "65%"])
       .set_global_opts(title_opts=(title="Where to go \n\n the way to travel companionship", pos_left='center', pos_top='center',
                                               title_textstyle_opts=(
                                                   color='#FF6A6A', font_size=30, font_weight='bold'),
                                               ),
                        visualmap_opts=(is_show=False, 
                                          min_=38,
                                          max_=641,
                                          is_piecewise=False,
                                          dimension=0,
                                          range_color=['#9400D3', '#008afb', '#ffec4a', '#FFA500','#ce5777']),
                        legend_opts=(is_show=False, pos_top='5%'),
                        )
       .set_series_opts(label_opts=(formatter="{b}: {c}", font_size=12),
                        tooltip_opts=(trigger="item", formatter="{b}: {c}"),
                        itemstyle_opts={"normal": {
                                                    "barBorderRadius": [30, 30, 30, 30],
                                                    'shadowBlur': 10,
                                                    'shadowColor': 'rgba(0,191,255,0.5)',
                                                    'shadowOffsetY': 1,
                                                    'opacity': 0.8
                                                }
                                       })
        
                        )
pie.render_notebook()

Travel time analysis

line = (
    Line()
    .add_xaxis(())
    .add_yaxis('',())
)
line.render_notebook()

The travel time curve for 2021 rises and falls the most around the first of May, and the reason for this is surely because the holiday transfer has been extended to four days, and in order to adjust to their life and work, many people take advantage of this holiday to travel and relax.

Travel play analysis

m5 = []
n5 = []
for i in range(20):
    (list[i][0])
    (list[i][1])
()
m6 = m5
()
n6 = n5

bar = (
    Bar(init_opts=(theme='dark', width='1000px',height ='500px'))
    .add_xaxis(m6)
    .add_yaxis('', n6)
    .set_series_opts(label_opts=(is_show=True, 
                                                       position='insideRight',
                                                       font_style='italic'),
                            itemstyle_opts=(
                                color=JsCode("""new (1, 0, 0, 0, 
                                             [{
                                                 offset: 0,
                                                 color: 'rgb(255,99,71)'
                                             }, {
                                                 offset: 1,
                                                 color: 'rgb(32,178,170)'
                                             }])"""))
                            )
    .set_global_opts(
        title_opts=(title="Outbound Play Analysis"),
        xaxis_opts=(axislabel_opts=(rotate=45)),
        legend_opts=(is_show=True))
    .reversal_axis()
)
bar.render_notebook()

"Photography" and "food" can be said to be closely related to the travel, a complete trip can not be missing is the "photography", shoot food to send to the circle of friends, shoot the scenery to send to the circle of friends, shoot the perfect self to send to the circle of friends; after work, there is no summer and winter vacation, so use the weekend to take a short trip has become the preferred choice of most people. After work, there is no more summer and winter vacation, so using the weekend to take a short trip has become the first choice of most people.

to this article on the Python crawler introductory case of crawling to go where the tourist attractions guide and visual analysis of the article is introduced to this, more related Python crawling to go where the content of the search for my previous articles or continue to browse the following related articles I hope that you will support me more in the future!