twain3.0/3rdparty/hgOCR/include/ccmain/cube_reco_context.h

158 lines
5.0 KiB
C++

/**********************************************************************
* File: cube_reco_context.h
* Description: Declaration of the Cube Recognition Context Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
// (or a thread) would create one CubeRecoContext object per language.
// The CubeRecoContext object also provides methods to get and set the
// different attribues of the Cube OCR Engine.
#ifndef CUBE_RECO_CONTEXT_H
#define CUBE_RECO_CONTEXT_H
#include <string>
#include "neural_net.h"
#include "lang_model.h"
#include "classifier_base.h"
#include "feature_base.h"
#include "char_set.h"
#include "word_size_model.h"
#include "char_bigrams.h"
#include "word_unigrams.h"
namespace tesseract {
class Tesseract;
class TessdataManager;
class CubeRecoContext {
public:
// Reading order enum type
enum ReadOrder {
L2R,
R2L
};
// Instantiate using a Tesseract object
CubeRecoContext(Tesseract *tess_obj);
~CubeRecoContext();
// accessor functions
inline const string & Lang() const { return lang_; }
inline CharSet *CharacterSet() const { return char_set_; }
const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
inline CharClassifier *Classifier() const { return char_classifier_; }
inline WordSizeModel *SizeModel() const { return word_size_model_; }
inline CharBigrams *Bigrams() const { return char_bigrams_; }
inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
inline TuningParams *Params() const { return params_; }
inline LangModel *LangMod() const { return lang_mod_; }
// the reading order of the language
inline ReadOrder ReadingOrder() const {
return ((lang_ == "ara") ? R2L : L2R);
}
// does the language support case
inline bool HasCase() const {
return (lang_ != "ara" && lang_ != "hin");
}
inline bool Cursive() const {
return (lang_ == "ara");
}
inline bool HasItalics() const {
return (lang_ != "ara" && lang_ != "hin");
}
inline bool Contextual() const {
return (lang_ == "ara");
}
// RecoContext runtime flags accessor functions
inline bool SizeNormalization() const { return size_normalization_; }
inline bool NoisyInput() const { return noisy_input_; }
inline bool OOD() const { return lang_mod_->OOD(); }
inline bool Numeric() const { return lang_mod_->Numeric(); }
inline bool WordList() const { return lang_mod_->WordList(); }
inline bool Punc() const { return lang_mod_->Punc(); }
inline bool CaseSensitive() const {
return char_classifier_->CaseSensitive();
}
inline void SetSizeNormalization(bool size_normalization) {
size_normalization_ = size_normalization;
}
inline void SetNoisyInput(bool noisy_input) {
noisy_input_ = noisy_input;
}
inline void SetOOD(bool ood_enabled) {
lang_mod_->SetOOD(ood_enabled);
}
inline void SetNumeric(bool numeric_enabled) {
lang_mod_->SetNumeric(numeric_enabled);
}
inline void SetWordList(bool word_list_enabled) {
lang_mod_->SetWordList(word_list_enabled);
}
inline void SetPunc(bool punc_enabled) {
lang_mod_->SetPunc(punc_enabled);
}
inline void SetCaseSensitive(bool case_sensitive) {
char_classifier_->SetCaseSensitive(case_sensitive);
}
inline tesseract::Tesseract *TesseractObject() const {
return tess_obj_;
}
// Returns the path of the data files
bool GetDataFilePath(string *path) const;
// Creates a CubeRecoContext object using a tesseract object. Data
// files are loaded via the tessdata_manager, and the tesseract
// unicharset is provided in order to map Cube's unicharset to
// Tesseract's in the case where the two unicharsets differ.
static CubeRecoContext *Create(Tesseract *tess_obj,
TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset);
private:
bool loaded_;
string lang_;
CharSet *char_set_;
UNICHARSET *tess_unicharset_;
WordSizeModel *word_size_model_;
CharClassifier *char_classifier_;
CharBigrams *char_bigrams_;
WordUnigrams *word_unigrams_;
TuningParams *params_;
LangModel *lang_mod_;
Tesseract *tess_obj_; // CubeRecoContext does not own this pointer
bool size_normalization_;
bool noisy_input_;
// Loads and initialized all the necessary components of a
// CubeRecoContext. See .cpp for more details.
bool Load(TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset);
};
}
#endif // CUBE_RECO_CONTEXT_H