|class||OCR||MBS OCR Plugin||21.3||✅ Yes||✅ Yes||✅ Yes||✅ Yes||All|
Dim OCR As new TessEngineMBS // your instance of tesseract
If Not ocr.Initialize("C:\Program Files\Tesseract-OCR\tessdata", "eng") Then
MsgBox "failed to initialize"
Dim f As FolderItem = SpecialFolder.Desktop.Child("test.jpg")
Dim p As Picture = f.OpenAsPicture
// get the text
Dim OCRText As String = OCR.GetText
This replaces the older TesseractMBS class.
The new class is for Tesseract 4.x and 5.x versions.
- 2 events
- 9 properties
- shared property Version as String
- 31 methods
- method AllWordConfidences as Integer()
- method AnalyseLayout as TessPageIteratorMBS
- method Clear
- method Constructor
- method GetAltoText(PageNumber as Integer) as String
- method GetAvailableLanguages as String()
- method GetBoolVariable(Name as String, byref value as boolean) as Boolean
- method GetBoxText(PageNumber as Integer) as String
- method GetDoubleVariable(Name as String, byref value as Double) as Boolean
- method GetHOCRText(PageNumber as Integer) as String
- method GetIntVariable(Name as String, byref value as Integer) as Boolean
- method GetLoadedLanguages as String()
- method GetLSTMBoxText(PageNumber as Integer) as String
- method GetStringVariable(Name as String) as String
- method GetText as String
- method GetTsvText(PageNumber as Integer) as String
- method GetUNLVText as String
- method GetWordStrBoxText(PageNumber as Integer) as String
- method Initialize(dataPath as String, language as String, Mode as Integer = 3, configs() as String = nil) as Boolean
- method IsValidWord(Word as String) as Boolean
- method PrintVariablesToFile(File as FolderItem) as Boolean
- method PrintVariablesToPath(Path as String) as Boolean
- method Recognize as Boolean
- method ResultIterator as TessResultIteratorMBS
- method SetImage(pic as picture)
- method SetImageData(Data as MemoryBlock)
- method SetImageData(Data as String)
- method SetImageFile(File as FolderItem)
- method SetImageFile(Path as String)
- method SetRectangle(Left as Integer, Top as Integer, Width as Integer, Height as Integer)
- method SetVariable(Name as String, Value as String)
- 7 shared methods
- shared method LibraryLoaded as Boolean
- shared method LibraryLoadError as Integer
- shared method LibraryLoadErrorMessage as String
- shared method LoadLibrary(file as folderitem) as boolean
- shared method LoadLibrary(path as string) as boolean
- shared method SetCurrentWorkingDirectory(path as folderitem) as boolean
- shared method SetCurrentWorkingDirectory(path as String) as boolean
- 18 constants
OCR Engine Running Modes
Specify this mode when calling init_*(), to indicate that any of the above modes should be automatically inferred from the variables in the language-specific config, command-line configs, or if not specified in any of the above should be set to the default kOcrEngineModeTesseractOnly.
Run just the LSTM line recognizer.
Run the LSTM recognizer, but allow fallback to Tesseract when things get difficult. deprecated
Run Tesseract only - fastest; deprecated
Fully automatic page segmentation, but no OSD.
Automatic page segmentation, but no OSD, or OCR.
Automatic page segmentation with orientation and script detection. (OSD)
Treat the image as a single word in a circle.
Orientation and script detection only.
Treat the image as a single text line, bypassing hacks that are Tesseract-specific.
Assume a single uniform block of text. (Default.)
Assume a single uniform block of vertically aligned text.
Treat the image as a single character.
Assume a single column of text of variable sizes.
Treat the image as a single text line.
Treat the image as a single word.
Find as much text as possible in no particular order.
Sparse text with orientation and script det.
This class has no sub classes.
Some examples using this class:
The items on this page are in the following plugins: MBS OCR Plugin.
MBS Xojo PDF Plugins