Importing related packages
import time import pydash import base64 import requests from lxml import etree from aip import AipFace from pathlib import Path
Baidu Cloud Face Detection Application Information
#The only information you have to fill in are these three lines # APP_ID = "xxxxxxxx" API_KEY = "xxxxxxxxxxxxxxxx" SECRET_KEY = "xxxxxxxxxxxxxxxx" # Filter the face value threshold, feel free if you have a lot of storage space BEAUTY_THRESHOLD = 55 AUTHORIZATION = "oauth c3cef7c66a1843f8b3a9e6a1e3160e20" # If the permissions are wrong, open Knowles in the browser and copy one in the developer tools without logging in # It is advisable to change,Because I don't know the anti-crawler strategy of Knowledgeable,If too many people use the same,May affect program operation
None of the following changes are necessary
# The length of the discussion list for each request, it is not recommended to set it too long. LIMIT = 5 # This is the ID of the topic "Beauty", which is the parent topic of "Face Value" (20013528). SOURCE = "19552207"
Crawler pretends to be a normal browser request
USER_AGENT = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.5 Safari/534.55.3" REFERER = "/topic/%s/newest" % SOURCE # Discussion list request url for a topic BASE_URL = "/api/v4/topics/%s/feeds/timeline_activity" # Request parameters attached to the initial request url URL_QUERY = "?include=data%5B%3F%%3Dtopic_sticky_module%29%%5B%3F%%3Danswer%29%%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%3Bdata%5B%3F%%3Dtopic_sticky_module%29%%5B%3F%%3Danswer%29%.is_normal%2Ccomment_count%2Cvoteup_count%2Ccontent%2Crelevant_info%%5B%3F%28type%3Dbest_answerer%29%%3Bdata%5B%3F%%3Dtopic_sticky_module%29%%5B%3F%%3Darticle%29%%2Cvoteup_count%2Ccomment_count%2Cvoting%%5B%3F%28type%3Dbest_answerer%29%%3Bdata%5B%3F%%3Dtopic_sticky_module%29%%5B%3F%%3Dpeople%29%.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F%28type%3Dbest_answerer%29%%3Bdata%5B%3F%%3Danswer%29%%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%3Bdata%5B%3F%%3Danswer%29%%5B%3F%28type%3Dbest_answerer%29%%3Bdata%5B%3F%%3Darticle%29%%%5B%3F%28type%3Dbest_answerer%29%%3Bdata%5B%3F%%3Dquestion%29%.comment_count&limit=" + str( LIMIT) HEADERS = { "User-Agent": USER_AGENT, "Referer": REFERER, "authorization": AUTHORIZATION
Specify the url to get the original content/image.
def fetch_image(url): try: response = (url, headers=HEADERS) except Exception as e: raise e return
Specify the url, get the corresponding JSON Return / Topic List
def fetch_activities(url): try: response = (url, headers=HEADERS) except Exception as e: raise e return ()
Processing the list of returned topics
def parser_activities(datums, face_detective): for data in datums["data"]: target = data["target"] if "content" not in target or "question" not in target or "author" not in target: continue html = (target["content"]) seq = 0 title = target["question"]["title"] author = target["author"]["name"] images = ("//img/@src") for image in images: if not ("http"): continue image_data = fetch_image(image) score = face_detective(image_data) if not score: continue name = "{}--{}--{}--{}.jpg".format(score, author, title, seq) seq = seq + 1 path = Path(__file__).("image").joinpath(name) try: f = open(path, "wb") (image_data) () () print(path) (2) except Exception as e: continue if not datums["paging"]["is_end"]: return datums["paging"]["next"] else: return None
Initialize Face Detection Tool
def init_detective(app_id, api_key, secret_key): client = AipFace(app_id, api_key, secret_key) options = {"face_field": "age,gender,beauty,qualities"} def detective(image): image = str(base64.b64encode(image), "utf-8") response = (str(image), "BASE64", options) response = ("result") if not response: return if (not response) or (response["face_num"] == 0): return face_list = response["face_list"] if (face_list, "0.face_probability") < 0.6: return if (face_list, "") < BEAUTY_THRESHOLD: return if (face_list, "") != "female": return score = (face_list, "") return score return detective
program entry
def main(): face_detective = init_detective(APP_ID, API_KEY, SECRET_KEY) url = BASE_URL % SOURCE + URL_QUERY while url is not None: datums = fetch_activities(url) url = parser_activities(datums, face_detective) (5) if __name__ == '__main__': main()
This is the whole content of this article, I hope it will help you to learn more.