BoldSignA modern eSignature application with affordable pricing. Sign up today for unlimited document usage!
private bool IsBlankPage(PdfLoadedPage lpage)
{
bool isBlankPage = false;
//Extract images
Image[] images = lpage.ExtractImages();
if (images.Length > 0)
{
foreach (Image img in images)
{
if (!PerformOCR(img as Bitmap))
{
isBlankPage = false;
break;
}
else
isBlankPage = true;
}
}
else
{
isBlankPage = true;
}
return isBlankPage;
} |
private bool PerformOCR(Bitmap img)
{
bool empty = false;
//Create a new OCR processor
using (OCRProcessor processor = new OCRProcessor(tesseractBinariesPath))
{
//Set language.
processor.Settings.Language = Languages.English;
//perform OCR
string text = processor.PerformOCR(img,tessdataPath);
if(text == null || text == string.Empty )
{
empty = true;
}
}
return empty;
} |
Hi.
Thanks for the answer.
A question about your answer:
If the image doesn't include text (eg a picture), PerformOCR will return empty value and we will remove a good page (not only the empty one).
private bool IsEmptyImage(Bitmap image)
{
bool isEmpty = true;
int blackPixelCount = 0;
//Suspect 25% of image have black pixels then it is not an empty image.
int blackPixelRange = ((image.Width * image.Height) / 100) * 25;
for (int i = 0; i < image.Width; i++)
{
for (int j = 0; j < image.Height; j++)
{
Color color = image.GetPixel(i, j);
if (color.R == 255 && color.G == 255 && color.B == 255)
{
//Skip the white pixels
}
else if (color.R == 0 && color.G == 0 && color.B == 0)
{
//Get the black pixels count
blackPixelCount++;
}
else
{
//Colored pixels
isEmpty = false;
break;
}
if (blackPixelCount >= blackPixelRange)
{
isEmpty = false;
break;
}
}
if (!isEmpty)
break;
}
return isEmpty;
}
|
Hi. Thanks for your help.
Your solution works well but it is very slow. It takes 1 minute to work a 42 pages pdf file.
private bool IsEmpty(Bitmap image)
{
Rectangle bounds = new Rectangle(0, 0, image.Width, image.Height);
BitmapData bmpData = image.LockBits(bounds, ImageLockMode.ReadWrite, image.PixelFormat);
IntPtr ptr = bmpData.Scan0;
int bytes = Math.Abs(bmpData.Stride) * image.Height;
byte[] rgbValues = new byte[bytes];
// Copy the RGB values into the array.
Marshal.Copy(ptr, rgbValues, 0, bytes);
// Unlock the bits.
image.UnlockBits(bmpData);
//Suspect 25% of image have black pixels then it is not an empty image.
int blackPixelRange = ((image.Width * image.Height) / 100) * 25;
//Get the white pixels count
int whitePixelsCount = Enumerable.Range(0, rgbValues.Length).Where(i => rgbValues[i] == 255).ToList().Count;
//Get the black pixels count
int blackPixelsCount = Enumerable.Range(0, rgbValues.Length).Where(i => rgbValues[i] == 0).ToList().Count;
if ((blackPixelsCount + whitePixelsCount) != rgbValues.Length)
return false;
else if (blackPixelsCount >= blackPixelRange)
return false;
else
return true;
}
|