[VeryPDF Release Notes] VeryPDF Releases a new version of PDF to Text OCR SDK for .NET today

VeryPDF Releases a new version of PDF to Text OCR SDK for .NET today, the new version can be downloaded from following web page,

https://www.verypdf.com/app/pdf-to-text-ocr-converter/sdk-for-net.html
https://www.verypdf.com/dl2.php/pdf2txtocrsdk.zip

image

The following are some examples for "PDF to Text OCR SDK for .NET" software,

C# Source Code to call PDF to Text OCR SDK for .NET product,

private void button1_Click(object sender, EventArgs e)
{
  string strStartupPath = System.Windows.Forms.Application.StartupPath + "\\";

  System.Type pdf2vecName = Type.GetTypeFromProgID("pdfcom.pdfclass");
  if (pdf2vecName != null)
  {
      object pdf2vec = Activator.CreateInstance(pdf2vecName);
      string strInFile = strStartupPath + "test-color.tif";
      string strOutFile = strStartupPath + "_test-color.pdf";
      string strCmd = "-$ XXXXXXXXXXXXXXXXXXXX -ocr -ocrmode 4 -outboxfile \"" + strInFile + "\" \"" + strOutFile + "\"";
      //string strCmd = "-$ XXXXXXXXXXXXXXXXXXXX -ocr -ocrmode 4 -res 72 \"" + strInFile + "\" \"" + strOutFile + "\"";
      //string strCmd = "-$ XXXXXXXXXXXXXXXXXXXX -ocr -ocrmode 4 -res 72 -bitcount 24 \"" + strInFile + "\" \"" + strOutFile + "\"";
      //string strCmd = "-$ XXXXXXXXXXXXXXXXXXXX \"" + strInFile + "\" \"" + strOutFile + "\"";

      MessageBox.Show(strCmd);
      object[] argn = new object[1];
      argn[0] = strCmd;
      int nRet = (int)pdf2vecName.InvokeMember("com_PDFToTextOCRSDKShell", BindingFlags.InvokeMethod, null, pdf2vec, argn);
      MessageBox.Show("Return Value is: " + string.Format("{0}", nRet));
  }
}

ASP Source Code to call PDF to Text OCR SDK for .NET product,

<%
        strInFile = "D:\test.pdf"
        strOutFile = "D:\_out.pdf"
        strCmd = "-$ XXXXXXXXXXXXXXXXXXXX -ocr -lang eng -ocrmode 4 -outboxfile """ + strInFile + """ """ + strOutFile + """"

      Set VeryPDFCom = Server.CreateObject("pdfcom.pdfclass")
        nReturn = VeryPDFCom.com_PDFToTextOCRSDKShell(strCmd)
      Response.write "Conversion finished."
%>

Javascript Source Code to call PDF to Text OCR SDK for .NET product,

<html>
<script type="text/javascript">

function PDFToTextOCRSDKConverter()
{
        strInPDF = "D:\\downloads\\Test_1_file.pdf"
        strOutImage = "D:\\downloads\\_test_out.pdf"
        strCmd = "-$ XXXXXXXXXXXXXXXXXXXX -ocr -lang eng -ocrmode 4 -outboxfile \"" + strInPDF + "\" \"" + strOutImage + "\""
       
    alert("Start converting,\n\n" + strCmd);
        try {
                var VeryPDFCom = new ActiveXObject("pdfcom.pdfclass");
                VeryPDFCom.com_PDFToTextOCRSDKShell(strCmd)
        } catch (error) {
            alert(error + '\n\nThis error occurred while\nattempting to load VeryPDF PDF to Text OCR Converter COM/SDK.')
            return;
        }
    alert("Finished the converting.\n\n" + strCmd);
}

</script>

<body>
    <input type="button" value="Test VeryPDF PDF to Text OCR Converter COM/SDK Function" id="btn" onclick="PDFToTextOCRSDKConverter()">
</body>
</html>

PHP Source Code to call PDF to Text OCR SDK for .NET product,

<?php

$strFolderDir = dirname(__FILE__);
$strParentDir = dirname($strFolderDir);
$strInFile = $strParentDir . "\\test-multi-columns.pdf";
$strOutFile = $strFolderDir . "\\_out.pdf";
$strLicenseKey = "-$ XXXX-XXXX-XXXX-XXXX";
$strCmd = "-ocr -lang eng -ocrmode 4 -outboxfile $strLicenseKey \"$strInFile\" \"$strOutFile\"";

echo $strCmd . "\n";
$VeryPDFComObject = new COM("pdfcom.pdfclass");
$VeryPDFComObject->com_PDFToTextOCRSDKShell($strCmd);
echo "Conversion finished.\n";

?>

image

VB Source Code to call PDF to Text OCR SDK for .NET product,

Private Sub Command1_Click()
    strFolderDir = App.Path & IIf(Right$(App.Path, 1) <> "\", "\", "")
    strInFile = strFolderDir & "\test-multi-columns.pdf"
    strOutFile = strFolderDir & "\_test_out.pdf"
    strLicenseKey = "-$ XXXX-XXXX-XXXX-XXXX"
   
    strCmd = "-ocr -lang eng -ocrmode 4 -outboxfile " & strLicenseKey & " """ & strInFile & """ """ & strOutFile & """"
   
    Set VeryPDFCom = CreateObject("pdfcom.pdfclass")
       nReturn = VeryPDFCom.com_PDFToTextOCRSDKShell(strCmd)
    MsgBox "Conversion finished."
End Sub

VB Script Source Code to call PDF to Text OCR SDK for .NET product,

Set fso = CreateObject("Scripting.FileSystemObject")
strFolderDir = fso.GetParentFolderName(WScript.ScriptFullName)
strParentDir = fso.GetParentFolderName(strFolderDir)
strInFile = strParentDir & "\test-multi-columns.pdf"
strOutFile = strFolderDir & "\_out.pdf"
strLicenseKey = "-$ XXXX-XXXX-XXXX-XXXX"
strCmd = "-ocr -lang eng -ocrmode 4 -outboxfile " & strLicenseKey & " """ & strInFile & """ """ & strOutFile & """"

msgbox strCmd

set VeryPDFCom = CreateObject("pdfcom.pdfclass")
nReturn = VeryPDFCom.com_PDFToTextOCRSDKShell(strCmd)
msgbox "Conversion finished."

VB.NET Source Code to call PDF to Text OCR SDK for .NET product,

Imports System.Reflection
Imports System.Threading

Public Class Form1
    Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
        Dim strInFile As String
        Dim strOutFile As String
        Dim strCmd As String
        Dim pdfcom As pdfcom.pdfclass
        Dim nRet As Integer

        Try
            strInFile = Application.StartupPath() & "\test-color.tif"
            strOutFile = Application.StartupPath() & "\_test-color.pdf"
            strCmd = "-$ XXXXXXXXXXXXXXXXXXXX -ocr -lang eng -ocrmode 4 -outboxfile """ + strInFile + """ """ + strOutFile + """"
            MsgBox(strCmd)
            pdfcom = CType(CreateObject("pdfcom.pdfclass"), pdfcom.pdfclass)
            nRet = pdfcom.com_PDFToTextOCRSDKShell(strCmd)
            MsgBox("com_PDFToTextOCRSDKShell method does return: " & CStr(nRet))
        Catch ee As Exception
            MsgBox(ee.Message)
        End Try

    End Sub
End Class

VC++ Source Code to call PDF to Text OCR SDK for .NET product,

#include <windows.h>
#include <stdio.h>

__declspec(dllexport)
int WINAPI PDFToTextOCRSDKShell(const char *lpszCmdLine);

int main(int argc, char* argv[])
{
    if(argc == 1)
    {
        printf("Usage: [options] <in-file> [<out-file>]\n");
        return 0;
    }
    char szCmdBuf[2048] = {0};
    for(int i = 1; i < argc; i++)
    {
        strcat(szCmdBuf, "\"");
        strcat(szCmdBuf, argv[i]);
        strcat(szCmdBuf, "\" ");
    }
    int nRet = PDFToTextOCRSDKShell(szCmdBuf);
    printf("nRet = %d\n",nRet);
    return 0;
}
int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
    LPTSTR lpCmdLine, int nCmdShow)
{
    return main(__argc,__argv);
}

image

The following are command line options which supported by PDF to Text OCR SDK for .NET product,

C:\>pdf2txtocr.exe
PDF to Text Converter OCR Command Line v3.0
Web:
https://www.verypdf.com/
Email: support@verypdf.com
Release Date: Nov  6 2012
-------------------------------------------------------
Description:
  Convert text based PDF files to plain text files.
  Convert scanned PDF files and image files to plain text files and searchable PDF files by OCR technology.
  Convert embedded fonts in PDF file to a new searchable PDF file.
  Keep color during PDF, TIFF and image formats to searchable PDF files conversion.
Input formats:
  1. Text based PDF files
  2. Scanned PDF files
  3. Scanned single page and multi-page TIFF files
  4. Scanned JPEG, PNG, BMP, GIF, PCX, TGA, PBM, PNM, PPM files
Output formats:
  1. Plain text files without layout
  2. Plain text files with layout
  3. Plain text based PDF files
  4. Attach OCRed text layer to original PDF file
  5. OCRed BW PDF files with hidden text layer
  6. OCRed Color PDF files with hidden text layer
  7. OCRed Grayscale PDF files with hidden text layer
-------------------------------------------------------
Usage: pdf2txtocr.exe [options] <PDF-file> <Text-file>
  -firstpage <int>    : first PDF page to convert
  -lastpage <int>     : last PDF page to convert
  -res <int>          : set resolution, the unit is DPI (default is 300 dpi)
  -ownerpwd <string>  : set owner password for encrypted PDF file
  -userpwd <string>   : set user password for encrypted PDF file
  -layout             : maintain original physical layout
  -noc                : don't insert page breaks 0x0C between pages in text file
  -bitcount <int>     : set color depth when render PDF page to image data, it can be set 1, 8, 24, default is 8bit
  -rotate <int>       : rotate pages before OCR
  -threshold <int>    : lightness threshold that used to convert image to B&W
  -ocr                : enable OCR function for scanned PDF file
  -lang <string>      : choose the language for OCR engine
  -ocrmode <int>      : set OCR mode
    -ocrmode 0: output to text file
    -ocrmode 1: OCR PDF pages and insert new text layer under original PDF pages
    -ocrmode 2: output to plain text based PDF file
    -ocrmode 3: output to OCRed PDF file (BW) with hidden text layer
    -ocrmode 4: output to OCRed PDF file (Color) with hidden text layer
  -text <string>      : add additional text at end of each text page, this parameter supports the following variables:
    %PageNumber%: current page number
    %PageCount% : total page count of PDF file
  -outboxfile         : output [X, Y, Width, Height] information for each word when OCR
  -producer <string>  : Set 'producer' to PDF file
  -creator <string>   : Set 'creator' to PDF file
  -subject <string>   : Set 'subject' to PDF file
  -title <string>     : Set 'title' to PDF file
  -author <string>    : Set 'author' to PDF file
  -keywords <string>  : Set 'keywords' to PDF file
  -openpwd <string>   : Set 'open password' to PDF file
  -ownerpwd <string>  : Set 'owner password' to PDF file
  -keylen <int>       : Key length (40 or 128 bit)
        -keylen 0:  40 bit RC4 encryption (Acrobat 3 or higher)
        -keylen 1: 128 bit RC4 encryption (Acrobat 5 or higher)
        -keylen 2: 128 bit RC4 encryption (Acrobat 6 or higher)
  -encryption <int>   : Restrictions
        -encryption    0: Encrypt the file only
        -encryption 3900: Deny anything
        -encryption    4: Deny printing
        -encryption    8: Deny modification of contents
        -encryption   16: Deny copying of contents
        -encryption   32: No commenting
        ===128 bit encryption only -> ignored if 40 bit encryption is used
        -encryption  256: Deny FillInFormFields
        -encryption  512: Deny ExtractObj
        -encryption 1024: Deny Assemble
        -encryption 2048: Disable high res. printing
        -encryption 4096: Do not encrypt metadata
  -$ <string>         : input your License Key
Examples:
  pdf2txtocr.exe C:\in.pdf C:\out.txt
  pdf2txtocr.exe -firstpage 1 -lastpage 1 C:\in.pdf C:\out.txt
  pdf2txtocr.exe -ocr -res 300 C:\in.pdf C:\out.txt
  pdf2txtocr.exe -ownerpwd 123 -userpwd 456 C:\in.pdf C:\out.txt
  pdf2txtocr.exe -layout C:\in.pdf C:\out.txt
  pdf2txtocr.exe -noc C:\in.pdf C:\out.txt
  pdf2txtocr.exe C:\in.tif C:\out.txt
  pdf2txtocr.exe C:\in.jpg C:\out.txt
  pdf2txtocr.exe C:\in.bmp C:\out.txt
  pdf2txtocr.exe C:\in.png C:\out.txt
  pdf2txtocr.exe -ocr -lang eng C:\in.pdf C:\out.txt
  pdf2txtocr.exe -ocr -bitcount 1 C:\in.pdf C:\out.txt
  pdf2txtocr.exe -ocr -bitcount 8 C:\in.pdf C:\out.txt
  pdf2txtocr.exe -ocr -bitcount 24 C:\in.pdf C:\out.txt
  pdf2txtocr.exe -ocr -lang deu C:\in.pdf C:\out.txt
  pdf2txtocr.exe -lang deu C:\in.tif C:\out.txt
  pdf2txtocr.exe -text "PageText %PageNumber% of %PageCount%" C:\in.pdf C:\out.txt
  pdf2txtocr.exe -subject "subject" C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ownerpwd 123 -keylen 2 -encryption 3900 C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -subject "subject" -title "title" C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocr -lang eng -ocrmode 0 C:\in.pdf C:\out.txt
  pdf2txtocr.exe -ocr -lang deu -ocrmode 1 C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocr -lang eng -ocrmode 2 C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocr -lang eng -ocrmode 3 C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocr -lang eng -ocrmode 2 -outboxfile C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocr -lang fra -ocrmode 1 C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocr -lang ita -ocrmode 1 C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocr -lang nld -ocrmode 1 C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocr -lang spa -ocrmode 1 C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -bitcount 24 -ocrmode 4 -ocr C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -bitcount 8 -ocrmode 4 -ocr C:\in.pdf C:\out.pdf
  pdf2txtocr.exe -ocrmode 4 -ocr C:\in.tif C:\out.pdf
  pdf2txtocr.exe -ocrmode 3 -threshold 200 -ocr C:\in.tif C:\out.pdf
  pdf2txtocr.exe -ocrmode 4 -rotate 90 -ocr C:\in.tif C:\out.pdf

image

You can download the trial version of "PDF to Text OCR SDK for .NET" from following web page to try,

https://www.verypdf.com/app/pdf-to-text-ocr-converter/sdk-for-net.html
https://www.verypdf.com/dl2.php/pdf2txtocrsdk.zip

If you encounter any problem, please feel free to let us know, we will assist you asap.

VN:F [1.9.20_1166]
Rating: 10.0/10 (2 votes cast)
VN:F [1.9.20_1166]
Rating: 0 (from 0 votes)
[VeryPDF Release Notes] VeryPDF Releases a new version of PDF to Text OCR SDK for .NET today, 10.0 out of 10 based on 2 ratings

Related Posts

Leave a Reply

Your email address will not be published. Required fields are marked *


Verify Code   If you cannot see the CheckCode image,please refresh the page again!