|
//Exported PDF document pages as images.
Bitmap image = loadedDocument.ExportAsImage(0,200,200);
*Accuracy of the extracted text is depends on the quality of the exported image. We can export the image with quality by changing DPI while exporting the image from the PDF document.
|
|
//Region in which the text to be extracted.
Rectangle region = new Rectangle(120, 122, 1360, 520);
using (OCRProcessor processor = new OCRProcessor("../../Tesseract binaries"))
{
//Language to process the OCR
processor.Settings.Language = Languages.English;
//Clone the exported image with respect to the region of the searched text.
using (Bitmap clonedImage = image.Clone(region, System.Drawing.Imaging.PixelFormat.Format32bppArgb))
{
//Extracted the text from the image using OCR engine.
ocrText = processor.PerformOCR(clonedImage, @"../../Tessdata/");
}
} |