The new and old code before and after optimization is as follows:
from git_tools.git_tool import get_collect_projects, QQNews_Git from threading import Thread, Lock import datetime base_url = "" project_members_commits_lang_info = {} lock = Lock() threads = [] ''' Author:zenkilan ''' def count_time(func): def took_up_time(*args, **kwargs): start_time = () ret = func(*args, **kwargs) end_time = () took_up_time = (end_time - start_time).total_seconds() print(f"{func.__name__} execution took up time:{took_up_time}") return ret return took_up_time def get_project_member_lang_code_lines(git, member, begin_date, end_date): global project_members_commits_lang_info global lock member_name = member["username"] r = git.get_user_info(member_name) if not r["id"]: return user_commits_lang_info = git.get_commits_user_lang_diff_between(r["id"], begin_date, end_date) if len(user_commits_lang_info) == 0: return () project_members_commits_lang_info.setdefault(, dict()) project_members_commits_lang_info[][member_name] = user_commits_lang_info () def get_project_lang_code_lines(project, begin_date, end_date): global threads git = QQNews_Git(project[1], base_url, project[0]) project_members = git.get_project_members() if len(project_members) == 0: return for member in project_members: thread = Thread(target=get_project_member_lang_code_lines, args=(git, member, begin_date, end_date)) (thread) () @count_time def get_projects_lang_code_lines(begin_date, end_date): """ Getting project line-of-code language-related statistics - a new approach (to improve efficiency) Applying multithreading instead of for loops Concurrent access to shared external resources :return. """ global project_members_commits_lang_info global threads for project in get_collect_projects(): thread = Thread(target=get_project_lang_code_lines, args=(project, begin_date, end_date)) (thread) () @count_time def get_projects_lang_code_lines_old(begin_date, end_date): """ Getting project line-of-code language-related statistics - the old way (seriously time-consuming) Programming using the most basic ideas Double for loop nesting and each level contains time consuming operations :return. """ project_members_commits_lang_info = {} for project in get_collect_projects(): git = QQNews_Git(project[1], base_url, project[0]) project_members = git.get_project_members() user_commits_lang_info_dict = {} if len(project_members) == 0: continue for member in project_members: member_name = member["username"] r = git.get_user_info(member_name, debug=False) if not r["id"]: continue try: user_commits_lang_info = git.get_commits_user_lang_diff_between(r["id"], begin_date, end_date) if len(user_commits_lang_info) == 0: continue user_commits_lang_info_dict[member_name] = user_commits_lang_info project_members_commits_lang_info[] = user_commits_lang_info_dict except: pass return project_members_commits_lang_info def test_results_equal(resultA, resultB): """ Test method :param resultA. :param resultB. :return. """ print(resultA) print(resultB) assert len(str(resultA)) == len(str(resultB)) if __name__ == '__main__': from git_tools.config import begin_date, end_date get_projects_lang_code_lines(begin_date, end_date) for t in threads: () old_result = get_projects_lang_code_lines_old(begin_date, end_date) test_results_equal(old_result, project_members_commits_lang_info)
The old method had time-consuming operations in both the outer for loop and the inner for loop:
1)git.get_project_members()
2)git.get_user_info(member_name, debug=False)
Optimize in two steps, inside before outside or outside before inside. Replace for loops with multithreading, concurrently share external resources, and add locks to avoid write conflicts.
The test passes and the function runtime decorator displays (in seconds):
get_projects_lang_code_lines execution took up time:1.85294
get_projects_lang_code_lines_old execution took up time:108.604177
Approximately 58 times faster
This is the whole content of this article.