Extract images from word document using OpenXML
This resource is about to extract images from word document using OpenXML. File extension should be .docx. DOCX means XML Documents which helps you to read files easily and perform any action on the system because it is light weight.
Purpose : To extract images from word document (.docx) without word installed on system
Requirements : Download Document.OpenXML from Nuget Packages. Find the below screen shot for reference.
Step 1 : Create one console application or class library
Step 2 : Write the below code in class file
Namespaces
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System.IO;
Main method
static void Main(string[] args)
{
ExtractImages(@"E:\Nirav\Testing\abcd.docx", @"E:\Nirav\Testing\Output", "jpg");
}
Function to extract images
public static int ExtractImages(string InputFilePath, string OutputPath, string OutputImageFileExtension)
{
int SavedImageCount = 0;
try
{
if (OutputImageFileExtension.Length <= 1)
{
throw new Exception("Output image file extension cannot be blank.");
}
else
{
if (!OutputImageFileExtension.StartsWith("."))
OutputImageFileExtension = "." + OutputImageFileExtension;
if (!File.Exists(InputFilePath))
throw new Exception("Input file doesn't exists on the system.");
else if (!Directory.Exists(OutputPath))
throw new Exception("Output folder doesn't exists on the system.");
else
{
WordprocessingDocument doc = WordprocessingDocument.Open(InputFilePath, false);
Body body = doc.MainDocumentPart.Document.Body;
IEnumerable
foreach (Drawing draw in drawings)
{
foreach (DocumentFormat.OpenXml.Drawing.Blip b in draw.Descendants
{
ImagePart imgPart = (ImagePart)doc.MainDocumentPart.GetPartById(b.Embed);
using (System.Drawing.Image saveImage = System.Drawing.Bitmap.FromStream(imgPart.GetStream()))
{
int Cnt = 1;
while (File.Exists(OutputPath + "\\" + "img_" + Cnt.ToString("D3") + OutputImageFileExtension))
Cnt++;
string ImgFileName = "img_" + Cnt.ToString("D3") + OutputImageFileExtension;
saveImage.Save(OutputPath + "\\" + ImgFileName, System.Drawing.Imaging.ImageFormat.Png);
SavedImageCount++;
}
}
}
}
}
}
catch (Exception ex)
{
Console.WriteLine("An error occurred while extracting images from word file: " + ex.Message);
}
return SavedImageCount;
}