There is a way to adjust the size of a scanned pdf page based on the size of the original page content. Both are in A4 but the content on the page does not have the same position and scale.
The procedure I have tried so far is this:
Reading the original document and the scanned document.
The scanned document is passed into OCR processor.
Search for a common text of both (for example a keyword)
based on the position of the keyword and its size, I tried to readjust the content but with poor results.
PdfLoadedDocument loadedDocumentModello = new PdfLoadedDocument(helperModello.DocumentStream); -- Original pdf
PdfLoadedDocument lDoc = new PdfLoadedDocument(helper.DocumentStream); -- Scanned pdf
Dictionary<int, List<RectangleF>> pppRectagli = new Dictionary<int, List<RectangleF>>();
Dictionary<int, List<RectangleF>> pppRectagliModello = new Dictionary<int, List<RectangleF>>();
using (OCRProcessor processor = new OCRProcessor(System.Configuration.ConfigurationManager.AppSettings[@"pathmTesseractBinaries"]))
{
processor.Settings.Language = "en";
processor.Settings.TesseractVersion = TesseractVersion.Version4_0;
processor.Settings.EnableNativeCall = true;
processor.Settings.Performance = Performance.Slow;
processor.Settings.OCREngineMode = OCREngineMode.LSTMOnly;
//processor.Settings.IsRegionMarked = true;
string ocr = processor.PerformOCR(lDoc, System.Configuration.ConfigurationManager.AppSettings[@"pathmTesseract"], true);
MemoryStream stream3 = new MemoryStream();
// Saves the document as stream
lDoc.Save(stream3);
lDoc.Close();
// Converts the PdfDocument object to byte form.
byte[] docBytes3 = stream3.ToArray();
helper.Load(docBytes3);
lDoc = new PdfLoadedDocument(helper.DocumentStream);
lDoc.FindText("WORD:", out pppRectagli);
loadedDocumentModello.FindText("WORD:", out pppRectagliModello);
List<RectangleF> fff = pppRectagli[0];
List<RectangleF> fffmodello = pppRectagliModello[0];
float XfinaleDif = fff[0].X - fffmodello[0].X;
float YfinaleDif = fff[0].Y - fffmodello[0].Y;
float HfinaleDif = fff[0].Height - fffmodello[0].Height;
float WfinaleDif = fff[0].Width - fffmodello[0].Width;
//float scale = Math.Min(fffmodello[0].Width / fff[0].Width, fffmodello[0].Height / fff[0].Height);
PdfDocument document = new PdfDocument();
// SizeF pppssssddd = new SizeF(lDoc.Pages[0].Size.Width * (fffmodello[0].Width / fff[0].Width), lDoc.Pages[0].Size.Height
// * (fffmodello[0].Height / fff[0].Height));
document.PageSettings.Size = lDoc.Pages[0].Size;
document.PageSettings.Margins.All = 0;
document.EnableMemoryOptimization = true;
for (int i = 0; i < lDoc.Pages.Count; i++)
{
PdfPage page = document.Pages.Add();
PdfGraphics g = page.Graphics;
g.ScaleTransform((fffmodello[0].Width / fff[0].Width), fffmodello[0].Height / fff[0].Height);
PdfPageBase lpage = lDoc.Pages[i];
PdfTemplate template = lpage.CreateTemplate();
float X = lDoc.Pages[i].Graphics.Size.ToPointF().X;
float Y = lDoc.Pages[i].Graphics.Size.ToPointF().Y;
PointF poin = new PointF(- XfinaleDif, - YfinaleDif);
g.DrawPdfTemplate(template, poin, g.Size);
}
MemoryStream stream = new MemoryStream();
// Saves the document as stream
document.Save(stream);
document.Close();
lDoc.Close(true);
// Converts the PdfDocument object to byte form.
byte[] docBytes = stream.ToArray();
helper.Load(docBytes);
lDoc = new PdfLoadedDocument(helper.DocumentStream);