SoFunction
Updated on 2024-11-15

python read word documents, insert mysql database sample code

The contents of the table are as follows:

python读取word,插入mysql

1, the realization of batch import word documents, take the document title as the number of digital numbering

2, in addition to take the contents of the hook above need to match out into the library into the library, all other content directly into the library mysql

# wuyanfeng
# -*- coding:utf-8 -*-
# Read the text in the docx code example
import docx
import pymysql
import re
import os

# Create database links
conn = (
 host='',
 port=3306,
 user='root',
 passwd='wYf092415*',
 db='pays',
 charset='utf8',
)
# Create cursors
cursor = ()

#Slicing Functions
def section(info,key,len11):
 a = len(info)
 print(a, type(a))
 d = []
 e = 0
 g = -1
 i = 0
 task_class=[]
 while i < len(info):
  # for i in range(len(info)):
  # i+=1
  print("i::::", i)
  try:
   #c = ("a", e)
   #print("c:::::", c)
   c = (key, e)
   #print("c:::::", c)

   print("c type judgment",type(c))
  except ValueError:
   print(ValueError)
  try:
   if (c != '') & (g < int(c)):
    (c)
    g = c
    i = c + 1
    print("illlldddd:", i)
    e = c + 1
    continue

   elif (c == ''):
     break
  except UnboundLocalError:
   print(UnboundLocalError)

   return task_class
  break
 print("d", d, type(d))
 print(d[0], type(d[0]))
 print("Length of d:",len(d))
 # Start slicing
 if len(d) != 0:
  for j in range(len(d)):
   print("info11:::", info, type(info))
   info = ''.join(info)
   print("info222:::",info,type(info))
   print("d[%d]"%j,d[j])
   #print("d[j]:5"%j,info[d[j]:5])

   llll = info[d[j]+1:d[j]+5]
   print("d[%d]:5" % j, llll)
   task_class.append(llll)
   print("task_class::11", task_class)

 task_class=",".join(task_class)
 print("str1112222",task_class)
 return task_class


def insettable(file):
 print("file::::::::::::::::::::", file)
 print("type::::::::::::::::::::", type(file))
 # file1 = file
 # file1 = str(file1)
 ddd = ("repository\\\(\d+)", file)
 print("ddd:::::::::::", ddd)
 print("ddd[0]:::", ddd[0])
 ddd = int(ddd[0])
 print("ddd::::", type(ddd))

 file = (file)
 # Read the table:
 t = [0]
 print(t)
 print("1:", (0, 0).text) # 1
 cell1 = (0, 0).text
 print("tyep::::", type((0, 0).text))

 print("2:", (0, 1).text) # 2
 cell2 = (0, 1).text

 print("2:", (0, 2).text) # 2
 cell3 = (0, 2).text

 print("2:", (0, 3).text) # 2
 cell4 = (0, 3).text
 print("cell4:::::::::", cell4)

 print("3:", (1, 0).text) # 3
 cell5 = (1, 0).text

 print("4:", (1, 2).text) # 4
 cell6 = (1, 2).text

 print("5:", (1, 3).text) # 5
 task_type = (1, 3).text
 # task_type = ('.*[☑√](.*)$', cell7)
 # task_type = ''.join(cell7)
 print("task_type111111:", task_type)
 # task_class = task_class[0:4]
 '''Low-level processing
 a = int(task_type.count("☑"))
 print("a|||||||", a, type(a))
 b = int(task_type.count("√"))
 print("b|||||||", b, type(a))
 if (a == 1) | (b == 1):
  print("111111111111111111")
  # task_type = ('.*[☑√](.*)$', task_type)
  task_type = ('.*[☑√](.*)$', task_type)
  print("task_type1", task_type)
  task_type = ''.join(task_type)
  print("task_type2", task_type)
  task_type = task_type[0:4]
  print("task_type3:d:%s,b=%d" % (a, b), task_type)
 elif (a == 0) & (b == 0):
  print("2222222222222222222")
  task_type = 'Statutory duties'
  print("a:%s,b=%s" % (a, b), task_type)
 elif (a == 2) | (b == 2):
  print("333333333333333333333")
  task_type = 'Statutory duties, job duties'
  print("a:%s,b=%s" % (a, b), task_type)
 '''
 # Call the slice function
 task_type1 = section(task_type, "√", 4)
 task_type2 = section(task_type, "☑", 4)
 task_type1 = "".join(task_type1)
 task_type2 = "".join(task_type2)
 print("task_type1:::", task_type1,type(task_type1))
 print("task_type2:::", task_type2,type(task_type2))
 if task_type1.strip()!="":
  task_type = task_type1
  print("task_type111:::", task_type1)
 elif task_type2.strip()!="":
  task_type = task_type2
  print("task_type222:::", task_type2)

 print("6:", (1, 4).text) # 6
 cell8 = (1, 4).text

 print("7:", (2, 1).text) # 7
 cell9 = (2, 1).text

 # Get document object
 # file = ("D:\\ Configuration Library\\\ Public Case APP\\\ 1.2 System Specifications\\\ Knowledge Base\\\\ 14 Personnel Death Advance Disposal.docx")
 print("Number of paragraphs:" + str(len())) # The number of paragraphs is 13, with each carriage return segregating one paragraph.
 lenn = len()
 print("len:", lenn)
 # Output the contents of each paragraph
 for para in :
  print()

 # Output paragraph numbers and paragraph content
 for i in range(len()):
  print("No." + str(i) + "The paragraph reads:" + [i].text)

 list6 = []
 for i in range(len()):
  if 0 == i:
   print("i:", i)
   lis0 = [i].text
   print("list0:", lis0)
   print(type(lis0))

  elif 1 == i:
   print("i:", i)
   task_class = [i].text
   print("lis1", task_class,type(task_class))
   '''Low-level processing
   print("task_class111111:", task_class)

   c = int(task_class.count("☑"))
   task_class = ''.join(task_class)
   #print(task_class.index('☑'))
   print("c|||||||", c, type(c))
   d = int(task_class.count("√"))
   print(task_class.index('√'))

   print("d|||||||", d, type(d))
   task_class = (r'[☑√](?:.*)', task_class)
   task_class = ''.join(task_class)
   task_class = task_class[1:5]
   print("task_class", task_class)
  '''
   # Call the slice function
   task_class1 = section(task_class, "√", 4)
   task_class2 = section(task_class, "☑", 4)
   task_class1 = "".join(task_class1)
   task_class2 = "".join(task_class2)
   print("task_class1:::", task_class1,type(task_class1))
   print("task_class2:::", task_class2,type(task_class2))
   if task_class1.strip()!="":
    task_class = task_class1
    print("task_class11:::", task_class1)
   elif task_class2.strip()!="":
    task_class = task_class2
    print("task_class22:::", task_class2)


  if 2 == i:
   print("i:", i)
   lis2 = [i].text

   print("lis2", lis2)
   print(type(lis2))
   preparer = ('filling unit:(.*?)$', lis2)
   preparer = ''.join(preparer)
   print("preparer:%s" % preparer)

  # elif 3 == i:
  #  print("i:", i)
  #  lis3 = [i].text
  elif 3 == i:
   print("i:", i)
   lis4 = [i].text
   print("lis4", lis4)
   print(type(lis4))
  elif 3 < i < lenn - 1:
   print("i:", i)
   print([i].text)
   print(type([i].text))
   # list6[i-5] = ([i].text)
   (str([i].text).strip('\xa0'))
   # ("%s\n" % str([i].text).strip('\xa0'))
   print(list6)
 key_steps = "\n".join(list6)
 # print("key_steps:\n",key_steps.strip('\n'))

 (
  "insert into `t_knowledge_base` (`no`, `preparer`, `task_class`, `task_name`, `task_specification`, `task_type`, `task_desc`, `task_basis`, `key_steps`) values ('%d','%s','%s','%s','%s','%s','%s',NULL,'%s')" % (
   ddd, preparer, task_class, cell2, cell4, task_type, cell9, key_steps))
 ()


def traverse(f):
 fs = (f)
 for f1 in fs:
  tmp_path = (f, f1)
  if not (tmp_path):
   print('File: %s' % tmp_path)
   insettable(tmp_path)
  else:
   print('Folder: %s' % tmp_path)
   traverse(tmp_path)


path = 'D:\\\ Configuration Library\ Public App\\\1.2 System Specifications\\\ Knowledge Base'
traverse(path)

# Single file tuning
# path = 'D:\\\ Configuration Library\\\ Public Case APP\\\\ 1.2 System Specification\\\ Knowledge Base\\\\ 14 Personnel Death Advance Disposal.docx'
# insettable(path)

# Close the cursor
()
# Close the connection
()

Above this python read word documents, insert mysql database sample code is all that I have shared with you, I hope to give you a reference, and I hope you support me more.