How to read content from PDF in ASP.NET?
In this article I am going to explain about how to read PDF data in ASP.NET. I have read that PDF content using iTextSharp.dll. This technique is will be used in the various scenario for you.
Description :
I have data in the PDF as a text using iTextSharp.dll and write in the webpage. Each line by line read data from the PDF.Client side
I have placed one file upload control to upload pdf to read content
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
<title>Read content from PDF</title>
</head>
<body>
<form id="form1" runat="server">
<div>
<table width="800" align="center" cellpadding="0" cellspacing="0">
<tr>
<td colspan="2" align="center" height="60">
<b>Read PDF content in ASP.NET</b>
</td>
</tr>
<tr>
<td colspan="2" align="center" height="30">
<asp:Label ID="lblmsg" runat="server"></asp:Label>
</td>
</tr>
<tr>
<td height="60">
Select your File
</td>
<td>
<asp:FileUpload ID="FileUpload1" runat="server" />
</td>
</tr>
<tr>
<td colspan="2" align="center" height="60">
<asp:Button ID="Button1" runat="server" Text="Click to Read content and display" OnClick="Button1_Click" />
</td>
</tr>
<tr>
<td colspan="2" align="center" height="60">
<asp:Label ID="lblContent" runat="server"></asp:Label>
</td>
</tr>
</table>
</div>
</form>
</body>
</html>Server side
using System.IO;
using System.Text;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
}
protected void Button1_Click(object sender, EventArgs e)
{
if (FileUpload1.HasFile)
{
string filename = FileUpload1.FileName;
//Save file temporary in server path
FileUpload1.SaveAs(Server.MapPath(filename));
//pass that method in sepearte pdf content read method
ReadPDFContent(Server.MapPath(filename));
}
}
void ReadPDFContent(string path)
{
try
{
//Using string builder we can append each page contents
StringBuilder text = new StringBuilder();
//Before read check that file is available or not in the specified location
if (File.Exists(path))
{
PdfReader read = new PdfReader(path);
//Read content for each using below for loop
for (int page = 1; page <= read.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string eachlinetext = PdfTextExtractor.GetTextFromPage(read, page, strategy);
eachlinetext = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(eachlinetext)));
text.Append(eachlinetext);
//Close the instance of pdf reader instance
read.Close();
//use <br/> tag to write new page content in new line
text.Append("<br/>");
}
}
lblContent.Text = text.ToString();
}
catch (Exception ex)
{
lblmsg.Text = "Error during read pdf content";
}
//Delete after read data from server path
if (System.IO.File.Exists(path))
{
System.IO.File.Delete(path);
}
}
}
Output :
Content in PDF
Content in web page after ReadSource code:
Client Side: ASP.NET
Code Behind: C#Conclusion
I hope this code snippet is help you to know about read PDF content.
this is fine
I have one PDf file in that pdf, content and image is there. How to read image and content.
and dynamically i have to change the content and disply that content and image to new Pdfusing
using iTextSharp.dll