in this post I will show how to convert pdf document to the text file using pdftotext.** (pdftotext is an open-source command-line utility for converting PDF files to plain text files —i.e. extracting text data from PDF-protected files. It is freely available and included with many Linux distributions. It must be installed as part of the xpdf package for Windows.) click here to download pdftotext
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="pdf2tex.aspx.cs" Inherits="pdf2tex"
ValidateRequest="False" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
<title>Untitled Page</title>
<form id="form1" runat="server">
<asp:FileUpload ID="FileUpload1" runat="server" />
<br />
<asp:Button ID="btnRead" Text="Convert" runat="Server" OnClick="btnRead_Click" />
<br />
<asp:TextBox ID="txtContent" runat="Server" TextMode="MultiLine" Height="376px" Width="411px"></asp:TextBox>
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;
public partial class pdf2tex : System.Web.UI.Page
protected void Page_Load(object sender, EventArgs e)
protected void btnRead_Click(object sender, EventArgs e)
string appPath = Request.ApplicationPath;
System.Diagnostics.Process p = new System.Diagnostics.Process();
p.StartInfo.Arguments = " -raw -htmlmeta" + " " + FileUpload1.PostedFile.FileName + " " + "c:\\output.htm"; ;
p.StartInfo.FileName = Page.MapPath("pdftotext.exe");
p.StartInfo.UseShellExecute = false;
p.StartInfo.CreateNoWindow = false;
p.StartInfo.RedirectStandardOutput = false;
txtContent.Text = ReadFile("c:\\output.htm");
public string ReadFile(string s)
StreamReader sr = new StreamReader(s);
string strReturn = sr.ReadToEnd();
return strReturn;