Source Code
Source Code
Source Code
Imports
Imports
Imports
Imports
Imports
Imports
Imports
System
System.IO
System.Text
iTextSharp.text
System.Drawing.Imaging
SD = System.Drawing
VB = Microsoft.VisualBasic
System.Windows.Resources
Imports
Imports
Imports
Imports
Imports
System.Collections.Generic
System.Runtime.Serialization
System.Runtime.Serialization.Json
System.Threading
TS = tessnet2
Imports MODI
Public Class frmReadPDF
Dim sFileName, sFileExtn As String
Public cxBasePath, cxTempPath, cxDesktopPath, cxDesktopLog, cxMyDocPath As S
tring
Private DateTime = &H132 '306
Private Sub frmReadPDF_Load(ByVal sender As Object, ByVal e As System.EventA
rgs) Handles Me.Load
cxDesktopPath = System.Environment.GetFolderPath(Environment.SpecialFold
er.Desktop)
cxBasePath = System.Environment.CurrentDirectory
End Sub
Private Sub btnPickFile_Click(ByVal sender As System.Object, ByVal e As Syst
em.EventArgs) Handles btnPickFile.Click
Dim ofd As OpenFileDialog
rtbPdfText.Text = ""
Try
ofd = New OpenFileDialog
ofd.Title = "Please Select a PDF/Image file"
ofd.InitialDirectory = cxBasePath
ofd.Filter = "PDF & Image files (*.pdf, *.jpg, *.jpeg, *.gif, *.tif,
*.png) | *.pdf; *.jpg; *.jpeg; *.gif; *.tif; *.png"
If ofd.ShowDialog = Windows.Forms.DialogResult.OK Then
Dim fi As FileInfo = New FileInfo(ofd.FileName)
If fi.Length > 2000000 Then
'--// Limit file size upto 2MB
MsgBox("Please choose a file of max size upto 2MB !", MsgBox
Style.Exclamation + MsgBoxStyle.OkOnly, Me.Text)
Exit Sub
End If
txtFileName.Text = ofd.FileName
lblFileExtn.Text = VB.LCase(System.IO.Path.GetExtension(ofd.File
Name))
sFileExtn = Replace(lblFileExtn.Text, ".", String.Empty)
Select Case sFileExtn
Case "pdf"
btnReadPdf.Enabled = True
btnReadFSM.Enabled = True
btnReadMDI.Enabled = False
btnReadImg.Enabled = False
btnReadTss.Enabled = False
Case "bmp", "tif", "gif", "jpg", "png", "jpeg", "tiff"
btnReadPdf.Enabled = False
btnReadFSM.Enabled = False
btnReadMDI.Enabled = True
btnReadImg.Enabled = True
btnReadTss.Enabled = True
End Select
End If
Catch ex As Exception
MsgBox(ex.Message)
Finally
ofd = Nothing
End Try
End Sub
Private Sub btnReadMDI_Click(ByVal sender As System.Object, ByVal e As Syste
m.EventArgs) Handles btnReadMDI.Click
Dim out As String = ""
Dim md As New MODI.Document()
Try
md.Create(txtFileName.Text)
md.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, True, True)
Dim image As MODI.Image = DirectCast(md.Images(0), MODI.Image)
Dim layout As MODI.Layout = image.Layout
For j As Integer = 0 To layout.Words.Count - 1
Dim word As MODI.Word = DirectCast(layout.Words(j), MODI.Word)
out += " " & word.Text
Next
rtbPdfText.Text = out
Catch ex As Exception
MsgBox(ex.Message)
Finally
md = Nothing
End Try
End Sub
Private Sub btnReadPdf_Click(ByVal sender As System.Object, ByVal e As Syste
m.EventArgs) Handles btnReadPdf.Click
Try
Dim sb As String = GetTextFromPDF(txtFileName.Text)
rtbPdfText.Text = sb.ToString
Catch ex As Exception
MsgBox(ex.Message)
End Try
End Sub
Public Function GetTextFromPDF(ByVal PdfFileName As String) As String
Dim oReader As New iTextSharp.text.pdf.PdfReader(PdfFileName)
Dim sOut = ""
For i = 1 To oReader.NumberOfPages
Dim its As New iTextSharp.text.pdf.parser.SimpleTextExtractionStrate
gy
sOut &= iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(
oReader, i, its)
Next
Return sOut
End Function
Private Sub btnReadImg_Click(ByVal sender As System.Object, ByVal e As Syste
m.EventArgs) Handles btnReadImg.Click
Dim sTmp As String = ""
Dim validExtns() As String = {"bmp", "tif", "gif", "jpg", "png", "jpeg",
"tiff"}
Dim result As String() = Array.FindAll(validExtns, Function(s) s.Equals(
sFileExtn))
If (result.Length > 0) Then
Dim img As Bitmap = New Bitmap(txtFileName.Text)
Dim str As String = ConvertImageToBase64String(img)
rtbPdfText.Text = str
End Sub
Public Shared Function OCRImage(ByVal bm As System.Drawing.Image, ByVal lang
uage As String, ByVal path As String) As String
OCRImage = ""
Dim oOCR As New tessnet2.Tesseract
Try
oOCR.Init(path, language, False)
Dim WordList As New List(Of tessnet2.Word)
WordList = oOCR.doOCR(bm, SD.Rectangle.Empty)
Dim LineCount As Integer = tessnet2.Tesseract.LineCount(WordList)
For i As Integer = 0 To LineCount - 1
OCRImage &= tessnet2.Tesseract.GetLineText(WordList, i) & vbCrLf
Next
Catch ex As Exception
MsgBox(ex.Message)
Finally
oOCR.Dispose()
End Try
End Function
Private Sub btnReadTss_Click(ByVal sender As System.Object, ByVal e As Syste
m.EventArgs) Handles btnReadTss.Click
'Dim img As Bitmap = New Bitmap(txtFileName.Text)
'Dim gfx As Graphics = Graphics.FromImage(img)
'gfx.CopyFromScreen(New Point(Me.Location.X + PictureBox1.Location.X + 4
, Me.Location.Y + PictureBox1.Location.Y + 30), New Point(0, 0), img.Size)
'PictureBox1.Image = img
Dim str2 As String = ReadImageTextUsingTESS()
rtbPdfText.Text = str2
End Sub
Public Function ReadImageTextUsingTESS() As String
Dim tsDataPath As String = cxBasePath & "\tessdata"
Dim str As String = ""
Dim ocr As New TS.Tesseract
Try
Dim img As Bitmap = New Bitmap(txtFileName.Text)
Next
''--// WORKING; STOPPED
''For Each word As tessnet2.Word In result
''
str &= word.Text & " "
''
Console.WriteLine("{0} : {1}", word.Text, word.Text)
''Next
'Dim img2 As System.Drawing.Image = Image.FromFile(txtFileName.Text)
'Dim WordList As New List(Of tessnet2.Word)
'WordList = ocr.doOCR(img2, System.Drawing.Rectangle.Empty)
'Dim LineCount As Integer = tessnet2.Tesseract.LineCount(WordList)
'For i As Integer = 0 To LineCount - 1
'
str &= tessnet2.Tesseract.GetLineText(WordList, i) & vbCrLf
'Next
Catch ex As Exception
MsgBox(ex.Message)
Finally
If ocr IsNot Nothing Then ocr.Dispose()
ocr = Nothing
End Try
Return str
End Function
Public Function ConvertImageToBase64String(ByVal value As System.Drawing.Ima
ge) As String
If value Is Nothing Then Return ""
Dim szResult As String = ""
Using ms As New MemoryStream
value.Save(ms, ImageFormat.Jpeg)
ms.Flush()
ms.Position = 0
Dim buffer = ms.ToArray
'szResult = System.Text.UnicodeEncoding.ASCII.GetString(buffer)
szResult = Convert.ToBase64String(buffer)
End Using
Return szResult
End Function
Private Sub btnReadFSM_Click(ByVal sender As System.Object, ByVal e As Syste
m.EventArgs) Handles btnReadFSM.Click
Dim bteRead() As Byte = Nothing
Dim lngRead As Long = 0
Dim strRead As String
Try
Using fs As FileStream = File.OpenRead(txtFileName.Text)
Dim ms As New MemoryStream()
ms.SetLength(fs.Length)
lngRead = fs.Length
ReDim bteRead(lngRead)
fs.Read(bteRead, 0, lngRead)
strRead = Encoding.ASCII.GetString(bteRead)
'Dim buffer = ms.ToArray
'strRead = Convert.ToBase64String(buffer, Base64FormattingOption
s.InsertLineBreaks)
rtbPdfText.Text = strRead
End Using
Catch ex As Exception
MsgBox(ex.Message)
End Try
End Sub
Private Sub btnClose_Click(ByVal sender As System.Object, ByVal e As System.
EventArgs) Handles btnClose.Click
GC.Collect()
Me.Close()
End Sub
End Class