How to Merge and Split Word Documents in C#
This article is aimed to introduce a simple solution to merge a part of/whole word documents into a single document along with split a word document into separate documents by section and page break in C#.
Background
I need to merge many small word documents and then send the results to my clients, after that I have to split them into the same separate documents as before. For doing this, I searched on Google and fortunately I found some free and useful solutions. First, I found Microsoft.Office.Interop.Word, I tried it but it needs me to write a lot of codes and I was stuck at somewhere. Considering the timeline, I have to turn to other solutions, finally I found a free word API on CodePlex that solved all my problems and easy to use. I am happy to share my solution with you here. Hope this solution will also help you guys.
Using the code
Part 1 - Merge
When merging two word documents, we can choose to merge part of one document into another document along with merge the whole documents into a single document.
For the data confidentiality, I created two sample documents like below.
Document 1
Document 2
Merge specific section of one word document into another document
Step 1: Initialize a new object of Document class and load document 1.Document doc1 = new Document();
doc1.LoadFromFile("Sales Report.docx", FileFormat.Docx);
Step 2: Load document 2 which has two sections.Document doc2 = new Document();
doc2.LoadFromFile("Stories.docx", FileFormat.Docx);
Step 3: Merge the first section of document 2 into document 1.// Get the first section of document 2
Section sec = doc2.Sections[0];
// Clone the section and add it to document 1
doc1.Sections.Add(sec.Clone());
//Save
doc1.SaveToFile("Mergesection.docx", FileFormat.Docx);
Output:
Merge whole word documents into a single document
Step 1: Load document 1.Document document = new Document();
document.LoadFromFile("Sales Report.docx", FileFormat.Docx);
Step 2: Merge document 1 and document 2 into a single document.document.InsertTextFromFile("Stories.docx", FileFormat.Docx);
//Save
document.SaveToFile("MergeFiles.docx", FileFormat.Docx);
Output:
Part 2 – Split
Here I will explain two different methods to split a word document.
Split Word document by section
Step 1: Initialize a new object of Document class and load document 2 which has two sections.Document document = new Document();
document.LoadFromFile("Stories.docx");
Step 2: Define another new word document object.Document newWord;
Step 3: Traverse through all sections of document 2, clone each section and add it to a new word document as new section, then save the new documents to specific path.for (int i = 0; i < document.Sections.Count; i++)
{
newWord = new Document();
newWord.Sections.Add(document.Sections[i].Clone());
newWord.SaveToFile(String.Format(@"test\out_{0}.docx", i));
}
Output:
Split Word document by page break
The following original word document has two page breaks at the end of the first and the second page.
Now refer to the following code snippets to split it by page breaks.
Step 1: Load the original word document.Document original = new Document();
original.LoadFromFile("New Zealand.docx");
Step 2: create a new word document object and add a new section.Document newWord = new Document();
Section section = newWord.AddSection();
Step 3: Detect page break and then split the document to multiple documents by page break.int index = 0;
foreach (Section sec in original.Sections)
{
foreach (DocumentObject obj in sec.Body.ChildObjects)
{
if (obj is Paragraph)
{
Paragraph para = obj as Paragraph;
section.Body.ChildObjects.Add(para.Clone());
foreach (DocumentObject parobj in para.ChildObjects)
{
if (parobj is Break && (parobj as Break).BreakType == BreakType.PageBreak)
{
int i = para.ChildObjects.IndexOf(parobj);
for (int j = i; j < para.ChildObjects.Count; j++)
{
section.Body.LastParagraph.ChildObjects.RemoveAt(i);
}
newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
index++;
newWord = new Document();
section = newWord.AddSection();
section.Body.ChildObjects.Add(para.Clone());
while (i >= 0)
{
section.Paragraphs[0].ChildObjects.RemoveAt(i);
i--;
}
if (section.Paragraphs[0].ChildObjects.Count == 0)
{
section.Body.ChildObjects.RemoveAt(0);
}
}
}
}
if (obj is Table)
{
section.Body.ChildObjects.Add(obj.Clone());
}
}
}
newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
Output:
Full codes
Mergeusing Spire.Doc;
namespace Merge_Word_Document
{
class Program
{
static void Main(string[] args)
{
//Merge specific section of one word document to another document
/*Document doc1 = new Document();
doc1.LoadFromFile("Sales Report.docx", FileFormat.Docx);
Document doc2 = new Document();
doc2.LoadFromFile("Stories.docx", FileFormat.Docx);
Section sec = doc2.Sections[0];
doc1.Sections.Add(sec.Clone());
doc1.SaveToFile("Mergesection.docx", FileFormat.Docx);*/
//Merge whole word documents into one
Document document = new Document();
document.LoadFromFile("Sales Report.docx", FileFormat.Docx);
document.InsertTextFromFile("Stories.docx", FileFormat.Docx);
document.SaveToFile("MergeFiles.docx", FileFormat.Docx);
}
}
}
Splitusing System;
using Spire.Doc;
using Spire.Doc.Documents;
namespace Split_Word_Document
{
class Program
{
static void Main(string[] args)
{
//Split by section
/*Document document = new Document();
document.LoadFromFile("Stories.doc");
Document newWord;
for (int i = 0; i < document.Sections.Count; i++)
{
newWord = new Document();
newWord.Sections.Add(document.Sections[i].Clone());
newWord.SaveToFile(String.Format(@"test\out_{0}.docx", i));
}*/
//Split by page break
Document original = new Document();
original.LoadFromFile("New Zealand.docx");
Document newWord = new Document();
Section section = newWord.AddSection();
int index = 0;
foreach (Section sec in original.Sections)
{
foreach (DocumentObject obj in sec.Body.ChildObjects)
{
if (obj is Paragraph)
{
Paragraph para = obj as Paragraph;
section.Body.ChildObjects.Add(para.Clone());
foreach (DocumentObject parobj in para.ChildObjects)
{
if (parobj is Break && (parobj as Break).BreakType == BreakType.PageBreak)
{
int i = para.ChildObjects.IndexOf(parobj);
for (int j = i; j < para.ChildObjects.Count; j++)
{
section.Body.LastParagraph.ChildObjects.RemoveAt(i);
}
newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
index++;
newWord = new Document();
section = newWord.AddSection();
section.Body.ChildObjects.Add(para.Clone());
while (i >= 0)
{
section.Paragraphs[0].ChildObjects.RemoveAt(i);
i--;
}
if (section.Paragraphs[0].ChildObjects.Count == 0)
{
section.Body.ChildObjects.RemoveAt(0);
}
}
}
}
if (obj is Table)
{
section.Body.ChildObjects.Add(obj.Clone());
}
}
}
newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
}
}
}