point of knowledge (math.)
- requests Send network requests
- parsel Parsing data
- csv save data
third-party repository
- requests >>> pip install requests
- parsel >>> pip install parsel
Development Environment:
- Version: python 3.8
- Editor: pycharm 2021.2
[Paid VIP Full Version] Just watch and learn the tutorials, 80 episodes of Python basic introductory video teaching
Click here to watch online for free
crawler
import module
# Modules for sending web requests import requests # Modules for parsing data import parsel import csv import time import random
Send Request
url = f'/travelbook/?page=1&order=hot_heat' # <Response [200]>: Tells us the request was successful. response = (url)
Access to data (web page source code)
html_data =
Parsing web pages (re regular expressions, css selectors, xpath, bs4/haven't updated in 6 years, json)
# html_data: string # We're going to turn this string into an object # selector = (html_data) # ::attr(href) url_list:list url_list = ('.b_strategy_list li h2 a::attr(href)').getall() for detail_url in url_list: # String substitution methods detail_id = detail_url.replace('/youji/', '') url_1 = '/travelbook/note/' + detail_id print(url_1)
Send a request to the detail page website (get, post)
# /travelbook/note/7701502 response_1 = (url_1).text
parse a web page
selector_1 = (response_1) # :nth-child(): pseudo-class selector # ::text Extracts text content # * Represents all # Location title = selector_1.css('.b_crumb_cont *:nth-child(3)::text').get().replace('Travel Tips', '') # Short reviews comment = selector_1.css('.::text').get() # Departure date date = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.when > p > ::text').get() # of days days = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.howlong > p > ::text').get() # Per capita consumption money = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.howmuch > p > ::text').get() # People character = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.who > p > ::text').get() # Play play_list = selector_1.css('#js_mainleft > div.b_foreword > ul > li.f_item.how > p > span::text').getall() play = ' '.join(play_list) # Views count = selector_1.css('.view_count::text').get() print(title, comment, date, days, money, character, play, count)
Save data
# Save as csv csv_qne = open('Where to go.csv', mode='a', encoding='utf-8', newline='') csv_writer = (csv_qne) # Write data csv_writer.writerow(['Location', 'Short Review', 'Departure time', 'Days', 'Per capita consumption', 'Characters', 'Play', 'Views'])
data visualization
import module
import pandas as pd from import JsCode from import * from pyecharts import options as opts
Import data
data = pd.read_csv('Go_Count_Score.csv') data
Top 10 tourist destinations and their corresponding costs
bar=( Bar(init_opts=(height='500px',width='1000px',theme='dark')) .add_xaxis(m2) .add_yaxis( 'Destination Top 10', n2, label_opts=(is_show=True,position='top'), itemstyle_opts=( color=JsCode("""new ( 0, 0, 0, 1,[{offset: 0,color: 'rgb(255,99,71)'}, {offset: 1,color: 'rgb(32,178,170)'}]) """ ) ) ) .set_global_opts( title_opts=( title='Destination Top 10'), xaxis_opts=(name='Name of attraction', type_='category', axislabel_opts=(rotate=90), ), yaxis_opts=( name='Number', min_=0, max_=120.0, splitline_opts=(is_show=True,linestyle_opts=(type_='dash')) ), tooltip_opts=(trigger='axis',axis_pointer_type='cross') ) .set_series_opts( markline_opts=( data=[ (type_='average',name='Mean value'), (type_='max',name='Maximum'), (type_='min',name='Minimum'), ] ) ) ) bar.render_notebook()
bar=( Bar(init_opts=(height='500px',width='1000px',theme='dark')) .add_xaxis(loc) .add_yaxis( 'Cost per capita', price_mean2, label_opts=(is_show=True,position='top'), itemstyle_opts=( color=JsCode("""new ( 0, 0, 0, 1,[{offset: 0,color: 'rgb(255,99,71)'}, {offset: 1,color: 'rgb(32,178,170)'}]) """ ) ) ) .set_global_opts( title_opts=( title='Cost per capita for each attraction'), xaxis_opts=(name='Name of attraction', type_='category', axislabel_opts=(rotate=90), ), yaxis_opts=( name='Number', min_=0, max_=2000.0, splitline_opts=(is_show=True,linestyle_opts=(type_='dash')) ), tooltip_opts=(trigger='axis',axis_pointer_type='cross') ) .set_series_opts( markline_opts=( data=[ (type_='average',name='Mean value'), (type_='max',name='Maximum'), (type_='min',name='Minimum'), ] ) ) ) bar.render_notebook()
Analysis of travel modes
pie = (Pie(init_opts=(theme='dark', width='1000px', height='800px')) .add("", [z for z in zip(m1,n1)], radius=["40%", "65%"]) .set_global_opts(title_opts=(title="Where to go \n\n the way to travel companionship", pos_left='center', pos_top='center', title_textstyle_opts=( color='#FF6A6A', font_size=30, font_weight='bold'), ), visualmap_opts=(is_show=False, min_=38, max_=641, is_piecewise=False, dimension=0, range_color=['#9400D3', '#008afb', '#ffec4a', '#FFA500','#ce5777']), legend_opts=(is_show=False, pos_top='5%'), ) .set_series_opts(label_opts=(formatter="{b}: {c}", font_size=12), tooltip_opts=(trigger="item", formatter="{b}: {c}"), itemstyle_opts={"normal": { "barBorderRadius": [30, 30, 30, 30], 'shadowBlur': 10, 'shadowColor': 'rgba(0,191,255,0.5)', 'shadowOffsetY': 1, 'opacity': 0.8 } }) ) pie.render_notebook()
Travel time analysis
line = ( Line() .add_xaxis(()) .add_yaxis('',()) ) line.render_notebook()
The travel time curve for 2021 rises and falls the most around the first of May, and the reason for this is surely because the holiday transfer has been extended to four days, and in order to adjust to their life and work, many people take advantage of this holiday to travel and relax.
Travel play analysis
m5 = [] n5 = [] for i in range(20): (list[i][0]) (list[i][1]) () m6 = m5 () n6 = n5 bar = ( Bar(init_opts=(theme='dark', width='1000px',height ='500px')) .add_xaxis(m6) .add_yaxis('', n6) .set_series_opts(label_opts=(is_show=True, position='insideRight', font_style='italic'), itemstyle_opts=( color=JsCode("""new (1, 0, 0, 0, [{ offset: 0, color: 'rgb(255,99,71)' }, { offset: 1, color: 'rgb(32,178,170)' }])""")) ) .set_global_opts( title_opts=(title="Outbound Play Analysis"), xaxis_opts=(axislabel_opts=(rotate=45)), legend_opts=(is_show=True)) .reversal_axis() ) bar.render_notebook()
"Photography" and "food" can be said to be closely related to the travel, a complete trip can not be missing is the "photography", shoot food to send to the circle of friends, shoot the scenery to send to the circle of friends, shoot the perfect self to send to the circle of friends; after work, there is no summer and winter vacation, so use the weekend to take a short trip has become the preferred choice of most people. After work, there is no more summer and winter vacation, so using the weekend to take a short trip has become the first choice of most people.
to this article on the Python crawler introductory case of crawling to go where the tourist attractions guide and visual analysis of the article is introduced to this, more related Python crawling to go where the content of the search for my previous articles or continue to browse the following related articles I hope that you will support me more in the future!