import
import re
import time
def movie(movieTag):
tagUrl=(url)
tagUrl_read = ().decode('utf-8')
return tagUrl_read
def subject(tagUrl_read):
'''
There are still problems here:
①This only sorts for a single page, not for all the movies on the page.
②Add movie links in the next update and consider adding movie posters
③Additional list required
④Import to local txt or excel
⑤ Whether it is possible to match the name of a movie with an array of links and names, ratings, and reviews at the same time.
⑥
'''
#RegularExpressionsMatchMovieNames (Links), Ratings & Reviews
nameURL = (r'(/subject/[0-9.]+)\/"\s+title="(.+)"',tagUrl_read)
scoreURL = (r'<span\s+class="rating_nums">([0-9.]+)<\/span>',tagUrl_read)
evaluateURL = (r'<span\s+class="pl">\((\w+)people evaluate\)<\/span>',tagUrl_read)
movieLists = list(zip(nameURL,scoreURL,evaluateURL))
(movieLists)
return newlist
# Handling special (Chinese) characters with quote
movie_type = (input('Please enter a movie type (e.g. drama, comedy, suspense):'))
page_end=int(input('Please enter the page number at the end of the search:'))
num_end=page_end*20
num=0
page_num=1
newlist=[]
while num<num_end:
url=r'/tag/%s?start=%d'%(movie_type,num)
movie_url = movie(url)
subject_url=subject(movie_url)
num=page_num*20
page_num+=1
else:
# use sorted function to sort the list, reverse parameter is True for ascending, default or False for descending, key=lambda not quite understand the principle here
movieLIST = sorted(newlist, key=lambda movieList : movieList[1],reverse = True)
for movie in movieLIST:
print(movie)
(3)
print('End')