Product Home Page:
https://www.verypdf.com/app/image-to-pdf-ocr-converter-cmd/sdk-for-net.html
VeryPDF Image to PDF OCR SDK for .NET can be called from C# code to convert image files to searchable PDF files quickly, the trial version of VeryPDF Image to PDF OCR SDK for .NET can be downloaded from following URL,
https://www.verypdf.com/dl2.php/image2pdfocrsdk.zip
The evaluation package is contain VB, VC++, C#, VB.NET examples, you can compile and test its functions easily.
The following is a simple C# example, it can convert from a PNG file to Raster PDF file,
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.IO;
namespace CSharp_WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
[DllImport("i2pocrsdk.dll")]
internal static extern int Image2PDFOCR_PDFCmd(string strCommandLine);
private void button1_Click(object sender, EventArgs e)
{
int nRet = Image2PDFOCR_PDFCmd("\"D:\\temp\\13.png\" \"D:\\temp\\out.pdf\"");
}
}
}
Image2PDFOCR_PDFCmd() function is support lots of options, please refer to the entire list at below,
Image to PDF Converter v2.1 Web: http://www.verypdf.com/tif2pdf/tif2pdf.htm
Email: support@verypdf.com
-------------------------------------------------------
Description:
Usage: img2pdf [options] <Image-file> [<PDF-file>]
-ocr <int> : create full-text searchable PDF file
-ocrtxt <string> : export OCRed text to a text file
-ocrtxtxy <string> : export OCRed text with X, Y coordinate to a text file
-nopdf : do not create PDF file, it is useful when you use '-ocrtxt' parameter
-ocr2 <string> : this option is only available for internal use
-addtextlayerintopdf <string>: this option is only available for internal use
-xres <int> : set X resolution to image file
-yres <int> : set Y resolution to image file
-ocrrect <string> : set a rectangle region for OCR
-aligntextypos : align OCRed text contents by Y position
-firstpg <int> : PDF to Image: first page to convert
-lastpg <int> : PDF to Image: last page to convert
-iswh : PDF to Image: specify width/height or DPI
-bitcount <int> : PDF to Image: set color depth
-autobitcount : PDF to Image: determine color depth automatically
-pidpi <int> : PDF to Image: set DPI resolution
-compress <int> : PDF to Image: set compression method to TIFF format
-compress 1 : NONE compression
-compress 2 : CCITT modified Huffman RLE
-compress 3 : CCITT Group 3 fax encoding (1d)
-compress 4 : CCITT Group 4 fax encoding
-compress 5 : LZW compression
-compress 6 : OJPEG compression
-compress 7 : JPEG DCT compression
-compress 32773 : PACKBITS compression
-compress 32809 : THUNDERSCAN compression
-multipage : PDF to Image: create multipage TIFF file
-opw <string> : owner password (for encrypted PDF file)
-upw <string> : user password (for encrypted PDF file)
-quality <int> : set quality for JPEG, JPEG2000 compression
-width <int> : set page width to PDF file
-height <int> : set page height to PDF file
-rotate <int> : rotate pages, 90, 180, 270
-view : view PDF file after creation
-skewcorrect : skew correct for black and white image file
-despeckle : despeckle for black and white image file
-chkbwcolor : check and invert color for black and white image file
-dpi <int> : set DPI resolution to output PDF file
-dpi 0 : use the default image width and height information
-dpi -1 : take DPI info from original image file
-dpi <int> : set resolution to output PDF file
-append <int> :
-append 0: overwrite if PDF file exists
-append 1: insert before first page if PDF file exists
-append 2: append to last page if PDF file exists
-append 3: rename filename if PDF file exists
-producer <string> : set 'producer' to PDF file
-creator <string> : set 'creator' to PDF file
-subject <string> : set 'subject' to PDF file
-title <string> : set 'title' to PDF file
-author <string> : set 'author' to PDF file
-keywords <string> : set 'keywords' to PDF file
-credate <string> : set 'CreationDate' to PDF file
e.g. -credate "20070116230629-08'00'"
-moddate <string> : set 'ModDate' to PDF file
e.g. -moddate "20070116230629-08'00'"
-openpwd <string> : set 'open password' to PDF file
-ownerpwd <string> : set 'owner password' to PDF file
-keylen <int> : key length (40 or 128 bit)
-keylen 0: 40 bit RC4 encryption (Acrobat 3 or higher)
-keylen 1: 128 bit RC4 encryption (Acrobat 5 or higher)
-keylen 2: 128 bit RC4 encryption (Acrobat 6 or higher)
-encryption <int> : restrictions
-encryption 0: encrypt the file only
-encryption 3900: deny anything
-encryption 4: deny printing
-encryption 8: deny modification of contents
-encryption 16: deny copying of contents
-encryption 32: no commenting
===128 bit encryption only -> ignored if 40 bit encryption is used
-encryption 256: deny FillInFormFields
-encryption 512: deny ExtractObj
-encryption 1024: deny Assemble
-encryption 2048: disable high res. printing
-encryption 4096: do not encrypt metadata
-bookmark <int> : create bookmark to PDF file
-bookmark -1: don't create bookmarks
-bookmark -2: read bookmarks from bookmark.ini file
-bookmark -3: use filenames as bookmarks
-bookmark -4: use the filenames as bookmarks, one bookmark at the first page of each tif
-bookmark <num>: "num" must >= 0, specify first number in bookmarks
-mergepdf <string> : merge two PDF files into one PDF file
-mergepdf "file1|file2|file3|...": merge several PDF files into one
-mergepdf "C:\filelist.txt": merge PDF files from a .txt file
-mergepdf "C:\*.pdf": merge PDF files into one PDF file
-burstpdf <string> : burst PDF file into single page PDF files
-pdfx : create PDF/X compliance PDF file
-pdfa : create PDF/A compliance PDF file
-log <string> : redirect output message to a log file
-nocenter : don't center the contents in the PDF file
-noempty : delete empty pages from PDF file
-linearize : optimize PDF file for fast web view
-margin <string> : set page margin to PDF file
-margin 10 : set margin to 10pt to left
-margin 10x10 : set margin to 10pt to left,top
-margin 10x10x10 : set margin to 10pt to left,top,right
-margin 10x10x10x10 : set margin to 10pt to left,top,right,bottom
-wtext <string> : watermark on printed document
-wtype <int> : type of watermark
0 : normal watermark
1 : watermark on header
2 : watermark on footer
-wf <string> : font name of watermark
-wh <int> : font size of watermark
-wb : specify bold font
-wi : specify an italic font
-wu : specify an underlined font
-ws : specify a strikeout font
-wa <int> : angle of watermark
-wbox <string> : a rectangle to output formatted text, it is only useful for "-walign" option, eg:
-wbox "0,0,595,842"
-wbox "0,0,612,792"
-walign <int> : set text align
1 : left
2 : center
3 : right
-wc <string> : color of watermark,
FF0000: Red color
00FF00: Green color
0000FF: Blue color
HexNum: Other colors
-wx <int> : X offset of watermark
-wy <int> : Y offset of watermark
-quick : convert image to PDF without processing
-usegs : use Ghostscript to convert PDF file to image file
-getpagecount : retrieve page count from PDF file
-v : print copyright and version info
-h : print usage information
-help : print usage information
--help : print usage information
-? : print usage information
-$ <string> : input registration key
Example:
img2pdf.dll C:\in.jpg C:\out.pdf
img2pdf.dll C:\in.tif C:\out.pdf
img2pdf.dll -pdfa C:\in.tif C:\out.pdf
img2pdf.dll -pdfx C:\in.tif C:\out.pdf
img2pdf.dll -linearize -pdfa C:\in.tif C:\out.pdf
img2pdf.dll C:\*.tif C:\*.pdf
img2pdf.dll -margin 100x100x100x100 C:\in.tif C:\out.pdf
img2pdf.dll -append 2 -width 612 -height 792 C:\*.tif C:\out.pdf
img2pdf.dll -append 2 C:\*.tif C:\out.pdf
img2pdf.dll C:\filelist.txt C:\out.pdf
img2pdf.dll -width 612 -height 792 C:\in.tif C:\out.pdf
img2pdf.dll -append 1 C:\in.tif C:\out.pdf
img2pdf.dll -append 2 C:\in.tif C:\out.pdf
img2pdf.dll -subject "subject" C:\in.tif C:\out.pdf
img2pdf.dll -ownerpwd 123 -keylen 2 -encryption 3900 C:\in.tif C:\out.pdf
img2pdf.dll -bookmark 3 C:\in.tif C:\out.pdf
img2pdf.dll -mergepdf "C:\1.pdf|C:\2.pdf|C:\3.pdf" C:\out.pdf
img2pdf.dll -mergepdf "C:\*.pdf" C:\out.pdf
img2pdf.dll -mergepdf "C:\filelist.txt" C:\out.pdf
Add watermarks into PDF files:
img2pdf.dll -wtext "ImagePDF" "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wc "0000FF" "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wx 100 -wy 100 "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wtype 1 "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wtype 2 "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wtype 0 -wa 45 "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wf "Arial" "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wf "Arial" -wh 20 "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wf "Arial" -wh 20 -wb -wi -wu -ws "C:\in.tif" C:\out.pdf
img2pdf.dll -wtext "ImagePDF" -wf "Arial" -walign 3 -wh 20 -wbox "0,770,612,792" "C:\in.tif" C:\out.pdf
if you want convert a scanned TIFF file to OCRed PDF file, you can simple use following function,
int nRet = Image2PDFOCR_PDFCmd("-ocr 1 \"D:\\temp\\13.tif\" \"D:\\temp\\out.pdf\"");
you can also specify despeckle and deskew options to improve OCR result, for example,
int nRet = Image2PDFOCR_PDFCmd("-ocr 1 -skewcorrect -despeckle \"D:\\temp\\13.tif\" \"D:\\temp\\out.pdf\"");
With VeryPDF Image to PDF OCR SDK for .NET product, you can also retrieve position for each characters from scanned TIFF and PDF files, the following VB.NET code will show this function for you,
Imports System
Imports System.Text
Imports System.Text.Encoding
Imports Microsoft.VisualBasic
Imports System.IO
Imports System.Drawing
Imports System.Drawing.Imaging
Public Class Form1
Private Declare Function Image2PDFOCR_SinglePage_GetTextInfo Lib "i2pocrsdk.dll" (ByVal lpszPDFFile As String, ByRef lpChars As Integer, ByVal lpszOptions As String) As Integer
Private Declare Sub Image2PDFOCR_SinglePage_FreeTextInfo Lib "i2pocrsdk.dll" (ByRef lpChars As Byte)
Private Declare Function Image2PDFOCR_SinglePage_CreatePDF Lib "i2pocrsdk.dll" (ByVal lpszInPDFFile As String, ByRef lpChars As Byte, ByVal nCharCount As Integer, ByVal lpszOutPDFFile As String, ByVal lpszOptions As String) As Integer
Private Declare Function Image2PDFOCR_PDFCmd Lib "i2pocrsdk.dll" (ByVal lpszOptions As String) As Integer
Private Declare Function Image2PDFOCR_CreateSearchablePDF Lib "i2pocrsdk.dll" (ByVal lpTIFOrPDFFile As String, ByVal lpOutputFile As String, ByVal lpOptions As String) As Integer
Private Declare Function Image2PDFOCR_GetTextHandle Lib "i2pocrsdk.dll" (ByVal lpszTIFOrPDFFile As String, ByVal lpszOptions As String) As Integer
Private Declare Function Image2PDFOCR_GetOCRedPageCount Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer) As Integer
Private Declare Function Image2PDFOCR_GetTextInfo Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer, ByVal nPage As Integer, ByRef lpOutTextInfo As Integer) As Integer
Private Declare Function Image2PDFOCR_SetTextInfo Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer, ByVal nPage As Integer, ByRef lpInTextInfo As Byte, ByVal nCharNum As Integer) As Integer
Private Declare Sub Image2PDFOCR_FreeTextHandle Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer)
Private Declare Function Image2PDFOCR_CreatePDF Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer, ByVal lpszOutPDFFile As String, ByVal lpszOptions As String) As Integer
Private Declare Function Image2PDFOCR_CreatePDFInMemory Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer, ByVal lpszOptions As String, ByRef lpDataBuf As Integer, ByRef nDataBufLen As Integer) As Integer
Private Declare Function Image2PDFOCR_GetPageCount Lib "i2pocrsdk.dll" (ByVal lpszPDFFile As String) As Integer
Private Declare Function Image2PDFOCR_GetWordCountOnPage Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer, ByVal nPage As Integer) As Integer
Private Declare Function Image2PDFOCR_GetWordInfoByIndex Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer, ByVal nPage As Integer, ByVal nWordIndex As Integer, ByRef X As Integer, ByRef Y As Integer, ByRef Width As Integer, ByRef Height As Integer, ByRef lpText As Byte) As Integer
Private Declare Function Image2PDFOCR_SetWordInfoByIndex Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer, ByVal nPage As Integer, ByVal nWordIndex As Integer, ByVal X As Integer, ByVal Y As Integer, ByVal Width As Integer, ByVal Height As Integer, ByVal lpText As String) As Integer
Private Declare Function Image2PDFOCR_ReadPDFFromMemory Lib "i2pocrsdk.dll" (ByVal hImage2PDFData As Integer, ByRef lpDataBuf As Byte, ByVal nDataBufLen As Integer) As Integer
Public Structure TEXTPOSITION
Public x As Integer
Public y As Integer
Public width As Integer
Public height As Integer
Public text() As Byte
Sub init()
ReDim text(500)
End Sub
End Structure
Private Sub OCRPDFButton_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles OCRPDFButton.Click
Dim strOptions As String
Dim strInFile As String
Dim strOutFile As String
Dim strLogMsg As String
strInFile = Application.StartupPath() & "\test1.pdf"
strOutFile = Application.StartupPath() & "\test1_vb_net_ocred.pdf"
strOptions = "-pidpi 300"
Dim nPageCount As Integer = Image2PDFOCR_GetPageCount(strInFile)
strLogMsg = """" & strInFile & """ file contains """ & Str(nPageCount) & """ pages."
MsgBox(strLogMsg)
Dim hOCRTextSDK As Integer = Image2PDFOCR_GetTextHandle(strInFile, strOptions)
If hOCRTextSDK <> 0 Then
Dim nOCRedPageCount As Integer = Image2PDFOCR_GetOCRedPageCount(hOCRTextSDK)
For page = 0 To nOCRedPageCount - 1
Dim nWordCount As Integer = Image2PDFOCR_GetWordCountOnPage(hOCRTextSDK, page)
For nWordIndex = 0 To nWordCount - 1
Dim nWord As TEXTPOSITION
nWord.init()
Image2PDFOCR_GetWordInfoByIndex(hOCRTextSDK, page, nWordIndex, nWord.x, nWord.y, nWord.width, nWord.height, nWord.text(0))
Dim enc As New System.Text.UTF8Encoding()
Dim strWord As String = enc.GetString(nWord.text)
Image2PDFOCR_SetWordInfoByIndex(hOCRTextSDK, page, nWordIndex, nWord.x, nWord.y, nWord.width, nWord.height, strWord)
Next nWordIndex
Next page
Dim nDataBufLen As Integer = 0
Dim nRet As Integer = Image2PDFOCR_CreatePDFInMemory(hOCRTextSDK, strOptions, 0, nDataBufLen)
If nRet = 0 And nDataBufLen > 0 Then
Dim vecFileData() As Byte = Nothing
ReDim vecFileData(nDataBufLen)
nRet = Image2PDFOCR_ReadPDFFromMemory(hOCRTextSDK, vecFileData(0), nDataBufLen)
Dim oFileStream As System.IO.FileStream
oFileStream = New System.IO.FileStream(strOutFile, System.IO.FileMode.Create)
oFileStream.Write(vecFileData, 0, vecFileData.Length)
oFileStream.Close()
End If
Image2PDFOCR_FreeTextHandle(hOCRTextSDK)
End If
End Sub
End Class
The full version of Image to PDF OCR Converter SDK for .NET product can be purchased from following web page,
https://www.verypdf.com/app/image-to-pdf-ocr-converter/try-and-buy.html#buy-ocr-sdk-for-net