SoFunction
Updated on 2024-11-13

Python implementation of the copy large number of files function

This article example for you to share the python realization of copying a large number of files of specific code for your reference, the details are as follows

Originally went to the project company copy data, the results went to find that there are 500G, relying on the system's copy function is afraid to take several hours, so come back to learn a hand operation, words do not say on the code:

Note: CopyFiles1 can copy the sourceDir with subdirectories to the targetDir as is, while CopyFiles2 filters the sourceDir for specific format files and puts them directly in the targetDir, which is messy, but fast!

import os
import time
import shutil
sourceDir = r"D:\copytest\datatest"
targetDir = r"D:\copytest\result"
copyFileCounts = 0
 
def CopyFiles1(sourceDir, targetDir):
# Completely even the subdirectories will be copied well and beautifully
  global copyFileCounts
  print(sourceDir )
  print("%s Current Processing Folder %s Processed %s Files" %(('%Y-%m-%d %H:%M:%S',(())), sourceDir,copyFileCounts) )
  for f in (sourceDir):
    sourceF = (sourceDir, f)
    targetF = (targetDir, f)
 
    if (sourceF):
 
      if not (targetDir):
        (targetDir)
      copyFileCounts += 1
 
 
      if not (targetF) or ((targetF) and ((targetF) != (sourceF))):
 
        open(targetF, "wb").write(open(sourceF, "rb").read())
        print ("%s %s Copy complete" %(('%Y-%m-%d %H:%M:%S',(())), targetF))
      else:
        print ("%s %s already exists, not duplicated" %(('%Y-%m-%d %H:%M:%S',(())), targetF))
 
    if (sourceF):
      copyFiles(sourceF, targetF)
 
def CopyFiles2(dir):
  # Will copy all the files in the directory together, fast, can filter the files
  i=0
  for root,dir1,filename in (dir):
   #print(filename)
   for index in range(len(filename)):
    #print((filename[index])[1])
    #if (filename[index])[1]=='.' :#Note here that filename is a tuple, and can only be a string when splitext method is used
    if 1==1:
      #i+=1
      print('here')
      root1="D:\\copytest\\result3"
      old_path = (root, filename[index])
      print(old_path)
      new_path = (root1,filename[index])
      (old_path,new_path)
 
#print("Total of ",i, "Layer files copied!")
 
if __name__ == "__main__":
 time_start = ()
 try:
  import psyco
  ()
 except ImportError:
   pass
 #CopyFiles1(sourceDir,targetDir)
 CopyFiles2("D:/copytest/datatest")
 time_end = ()
 print('totally cost', time_end - time_start)

This is the whole content of this article.