Core Function Overview
This script implements the following core functions:
- Paragraph style cloning: Completely copy font, color, bold, italic and other formats
- Table format migration: Including cell borders, column widths, alignment methods, etc.
- Page break recognition processing: Automatically identify and copy page breaks
- Document structure maintenance: Maintain the hierarchy of the original document
Code parsing
1. Basic style copy
def copy_paragraph_style(run_from, run_to): """Copy run style""" run_to.bold = run_from.bold run_to.italic = run_from.italic run_to.underline = run_from.underline run_to. = run_from. run_to. = run_from. run_to. = run_from. run_to.font.all_caps = run_from.font.all_caps run_to. = run_from. run_to. = run_from.
This function implements a complete copy of text styles within a paragraph, covering more common format properties.
2. Page break recognition mechanism
def is_page_break(element): """Judge whether an element is a page break""" if ('p'): for child in element: if ('br') and (qn('type')) == 'page': return True elif ('tbl'): if () is not None: next_element = () if next_element.('p'): for child in next_element: if ('br') and (qn('type')) == 'page': return True return False
Through XML element parsing, intelligent recognition of page breaks after paragraphs and tables is achieved.
3. Table Depth Cloning
def clone_table(old_table, new_doc): """Create a new form from an old form""" new_table = new_doc.add_table(rows=len(old_table.rows), cols=len(old_table.columns)) if old_table.style: new_table.style = old_table.style for i, old_row in enumerate(old_table.rows): for j, old_cell in enumerate(old_row.cells): new_cell = new_table.cell(i, j) for paragraph in new_cell.paragraphs: new_cell._element.remove(paragraph._element) for old_paragraph in old_cell.paragraphs: new_paragraph = new_cell.add_paragraph() for old_run in old_paragraph.runs: new_run = new_paragraph.add_run(old_run.text) copy_paragraph_style(old_run, new_run) new_paragraph.alignment = old_paragraph.alignment copy_cell_borders(old_cell, new_cell) for i, col in enumerate(old_table.columns): if is not None: new_table.columns[i].width = return new_table
This function implements:
- Table style inheritance
- Deep copy of cell content
- Border format migration
- Column width accurate copy
4. Main function logic
def clone_document(old_doc_path, new_doc_path): try: old_doc = Document(old_doc_path) new_doc = Document() # Page break processing logic elements = old_doc. para_index = 0 table_index = 0 index = 0 while index < len(elements): element = elements[index] if ('p'): old_para = old_doc.paragraphs[para_index] clone_paragraph(old_para, new_doc) para_index += 1 index += 1 elif ('tbl'): old_table = old_doc.tables[table_index] clone_table(old_table, new_doc) table_index += 1 index += 1 elif ('br') and (qn('type')) == 'page': if index>0: new_doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE) index += 1 else: index += 1 # Check page breaks if index < len(elements) and is_page_break(elements[index]): if index>0: new_doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE) index += 1 new_doc.save(new_doc_path) print(f"The document has been saved to:{new_doc_path}") except Exception as e: print(f"An error occurred while copying a document:{e}")
The main function adopts a double pointer strategy, and maintains the index counters of paragraphs and tables at the same time to ensure the accuracy of element order.
Example of usage
if __name__ == "__main__": clone_document('', 'cloned_example.docx')
Operation mode:
- Installation dependencies:
pip install python-docx
- Prepare the source file
- Execute script to generate cloned files
Things to note
- Section breaks support: The current version has not yet implemented cloning of section breaks and header footer (the relevant part has been commented in the code)
-
Compatibility testing: It is recommended to use
.docx
Format file,.doc
The file may not be parsed correctly - Performance optimization: It is recommended to add memory optimization logic when processing large documents
Summarize
This solution implements complete style and format migration by deeply analyzing the XML structure of Word documents. Subsequent expansion directions:
- Supports section breaks and header footer cloning
- Added image and chart copying functions
- Develop a graphical operation interface
The complete code has been tested and can be directly applied to document automation processing scenarios. With appropriate extensions, a complete document template management system can be built.
from docx import Document from import Pt, RGBColor from import WD_PARAGRAPH_ALIGNMENT, WD_BREAK from import OxmlElement from import qn def copy_paragraph_style(run_from, run_to): """Copy run style""" run_to.bold = run_from.bold run_to.italic = run_from.italic run_to.underline = run_from.underline run_to. = run_from. run_to. = run_from. run_to. = run_from. run_to.font.all_caps = run_from.font.all_caps run_to. = run_from. run_to. = run_from. def is_page_break(element): """Judge whether an element is a page break (after a paragraph or table)""" if ('p'): for child in element: if ('br') and (qn('type')) == 'page': return True elif ('tbl'): # There may be page breaks after the table (judged by the next element) if () is not None: next_element = () if next_element.('p'): for child in next_element: if ('br') and (qn('type')) == 'page': return True return False def clone_paragraph(old_para, new_doc): """Create a new paragraph from an old paragraph""" new_para = new_doc.add_paragraph() if old_para.style: new_para.style = old_para.style for old_run in old_para.runs: new_run = new_para.add_run(old_run.text) copy_paragraph_style(old_run, new_run) new_para.alignment = old_para.alignment return new_para def copy_cell_borders(old_cell, new_cell): """Copy the border style of the cell""" old_tc = old_cell._tc new_tc = new_cell._tc old_borders = old_tc.xpath('.//w:tcBorders') if old_borders: old_border = old_borders[0] new_border = OxmlElement('w:tcBorders') border_types = ['top', 'left', 'bottom', 'right', 'insideH', 'insideV'] for border_type in border_types: old_element = old_border.find(f'.//w:{border_type}', namespaces={ 'w': '/wordprocessingml/2006/main' }) if old_element is not None: new_element = OxmlElement(f'w:{border_type}') for attr, value in old_element.(): new_element.set(attr, value) new_border.append(new_element) tc_pr = new_tc.get_or_add_tcPr() tc_pr.append(new_border) def clone_table(old_table, new_doc): """Create a new form from an old form""" new_table = new_doc.add_table(rows=len(old_table.rows), cols=len(old_table.columns)) if old_table.style: new_table.style = old_table.style for i, old_row in enumerate(old_table.rows): for j, old_cell in enumerate(old_row.cells): new_cell = new_table.cell(i, j) for paragraph in new_cell.paragraphs: new_cell._element.remove(paragraph._element) for old_paragraph in old_cell.paragraphs: new_paragraph = new_cell.add_paragraph() for old_run in old_paragraph.runs: new_run = new_paragraph.add_run(old_run.text) copy_paragraph_style(old_run, new_run) new_paragraph.alignment = old_paragraph.alignment copy_cell_borders(old_cell, new_cell) for i, col in enumerate(old_table.columns): if is not None: new_table.columns[i].width = return new_table def clone_document(old_doc_path, new_doc_path): try: old_doc = Document(old_doc_path) new_doc = Document() # # Copy section breaks and header footer # for old_section in old_doc.sections: # new_section = new_doc.add_section(start_type=old_section.start_type) # new_section.left_margin = old_section.left_margin # new_section.right_margin = old_section.right_margin # # Other section breaking attributes... # # # header # for para in old_section.: # new_para = new_section.header.add_paragraph() # for run in : # new_run = new_para.add_run() # copy_paragraph_style(run, new_run) # new_para.alignment = # # # footer # for para in old_section.: # new_para = new_section.footer.add_paragraph() # for run in : # new_run = new_para.add_run() # copy_paragraph_style(run, new_run) # new_para.alignment = # Copy the main content elements = old_doc. para_index = 0 table_index = 0 index = 0 while index < len(elements): element = elements[index] if ('p'): old_para = old_doc.paragraphs[para_index] clone_paragraph(old_para, new_doc) para_index += 1 index += 1 elif ('tbl'): old_table = old_doc.tables[table_index] clone_table(old_table, new_doc) table_index += 1 index += 1 elif ('br') and (qn('type')) == 'page': if index>0: new_doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE) index += 1 else: index += 1 # Check page breaks if index < len(elements) and is_page_break(elements[index]): if index>0: new_doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE) index += 1 new_doc.save(new_doc_path) print(f"The document has been saved to:{new_doc_path}") except Exception as e: print(f"An error occurred while copying a document:{e}") #User Exampleif __name__ == "__main__": clone_document('', 'cloned_example.docx')
This is the end of this article about the complete code of deep cloning of Word documents using Python. For more related Python Word deep cloning content, please search for my previous articles or continue browsing the related articles below. I hope everyone will support me in the future!