SoFunction
Updated on 2024-11-10

Python3 how to use multi-threading ascending program running speed

The new and old code before and after optimization is as follows:

from git_tools.git_tool import get_collect_projects, QQNews_Git
from threading import Thread, Lock
import datetime

base_url = ""
project_members_commits_lang_info = {}
lock = Lock()
threads = []

'''
Author:zenkilan
'''


def count_time(func):
  def took_up_time(*args, **kwargs):
    start_time = ()
    ret = func(*args, **kwargs)
    end_time = ()
    took_up_time = (end_time - start_time).total_seconds()
    print(f"{func.__name__} execution took up time:{took_up_time}")
    return ret

  return took_up_time


def get_project_member_lang_code_lines(git, member, begin_date, end_date):
  global project_members_commits_lang_info
  global lock
  member_name = member["username"]
  r = git.get_user_info(member_name)
  if not r["id"]:
    return
  user_commits_lang_info = git.get_commits_user_lang_diff_between(r["id"], begin_date, end_date)
  if len(user_commits_lang_info) == 0:
    return
  ()
  project_members_commits_lang_info.setdefault(, dict())
  project_members_commits_lang_info[][member_name] = user_commits_lang_info
  ()


def get_project_lang_code_lines(project, begin_date, end_date):
  global threads
  git = QQNews_Git(project[1], base_url, project[0])
  project_members = git.get_project_members()
  if len(project_members) == 0:
    return
  for member in project_members:
    thread = Thread(target=get_project_member_lang_code_lines, args=(git, member, begin_date, end_date))
    (thread)
    ()


@count_time
def get_projects_lang_code_lines(begin_date, end_date):
  """
  Getting project line-of-code language-related statistics - a new approach (to improve efficiency)
  Applying multithreading instead of for loops
  Concurrent access to shared external resources
  :return.
  """
  global project_members_commits_lang_info
  global threads
  for project in get_collect_projects():
    thread = Thread(target=get_project_lang_code_lines, args=(project, begin_date, end_date))
    (thread)
    ()


@count_time
def get_projects_lang_code_lines_old(begin_date, end_date):
  """
  Getting project line-of-code language-related statistics - the old way (seriously time-consuming)
  Programming using the most basic ideas
  Double for loop nesting and each level contains time consuming operations
  :return.
  """
  project_members_commits_lang_info = {}
  for project in get_collect_projects():
    git = QQNews_Git(project[1], base_url, project[0])
    project_members = git.get_project_members()
    user_commits_lang_info_dict = {}
    if len(project_members) == 0:
      continue
    for member in project_members:
      member_name = member["username"]
      r = git.get_user_info(member_name, debug=False)
      if not r["id"]:
        continue
      try:
        user_commits_lang_info = git.get_commits_user_lang_diff_between(r["id"], begin_date, end_date)
        if len(user_commits_lang_info) == 0:
          continue
        user_commits_lang_info_dict[member_name] = user_commits_lang_info
        project_members_commits_lang_info[] = user_commits_lang_info_dict
      except:
        pass
  return project_members_commits_lang_info


def test_results_equal(resultA, resultB):
  """
  Test method
  :param resultA.
  :param resultB.
  :return.
  """
  print(resultA)
  print(resultB)
  assert len(str(resultA)) == len(str(resultB))


if __name__ == '__main__':
  from git_tools.config import begin_date, end_date

  get_projects_lang_code_lines(begin_date, end_date)
  for t in threads:
    ()
  old_result = get_projects_lang_code_lines_old(begin_date, end_date)
  test_results_equal(old_result, project_members_commits_lang_info)

The old method had time-consuming operations in both the outer for loop and the inner for loop:

1)git.get_project_members()

2)git.get_user_info(member_name, debug=False)

Optimize in two steps, inside before outside or outside before inside. Replace for loops with multithreading, concurrently share external resources, and add locks to avoid write conflicts.

The test passes and the function runtime decorator displays (in seconds):

get_projects_lang_code_lines execution took up time:1.85294

get_projects_lang_code_lines_old execution took up time:108.604177

Approximately 58 times faster

This is the whole content of this article.