twain3.0/3rdparty/hgOCR/include/ccmain/cube_reco_context.h

/**********************************************************************
 * File:        cube_reco_context.h
 * Description: Declaration of the Cube Recognition Context Class
 * Author:    Ahmad Abdulkader
 * Created:   2007
 *
 * (C) Copyright 2008, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

 // The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
 // (or a thread) would create one CubeRecoContext object per language.
 // The CubeRecoContext object also provides methods to get and set the
 // different attribues of the Cube OCR Engine.

#ifndef CUBE_RECO_CONTEXT_H
#define CUBE_RECO_CONTEXT_H

#include <string>
#include "neural_net.h"
#include "lang_model.h"
#include "classifier_base.h"
#include "feature_base.h"
#include "char_set.h"
#include "word_size_model.h"
#include "char_bigrams.h"
#include "word_unigrams.h"

namespace tesseract {

	class Tesseract;
	class TessdataManager;

	class CubeRecoContext {
	public:
		// Reading order enum type
		enum ReadOrder {
			L2R,
			R2L
		};

		// Instantiate using a Tesseract object
		CubeRecoContext(Tesseract *tess_obj);

		~CubeRecoContext();

		// accessor functions
		inline const string & Lang() const { return lang_; }
		inline CharSet *CharacterSet() const { return char_set_; }
		const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
		inline CharClassifier *Classifier() const { return char_classifier_; }
		inline WordSizeModel *SizeModel() const { return word_size_model_; }
		inline CharBigrams *Bigrams() const { return char_bigrams_; }
		inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
		inline TuningParams *Params() const { return params_; }
		inline LangModel *LangMod() const { return lang_mod_; }

		// the reading order of the language
		inline ReadOrder ReadingOrder() const {
			return ((lang_ == "ara") ? R2L : L2R);
		}

		// does the language support case
		inline bool HasCase() const {
			return (lang_ != "ara" && lang_ != "hin");
		}

		inline bool Cursive() const {
			return (lang_ == "ara");
		}

		inline bool HasItalics() const {
			return (lang_ != "ara" && lang_ != "hin");
		}

		inline bool Contextual() const {
			return (lang_ == "ara");
		}

		// RecoContext runtime flags accessor functions
		inline bool SizeNormalization() const { return size_normalization_; }
		inline bool NoisyInput() const { return noisy_input_; }
		inline bool OOD() const { return lang_mod_->OOD(); }
		inline bool Numeric() const { return lang_mod_->Numeric(); }
		inline bool WordList() const { return lang_mod_->WordList(); }
		inline bool Punc() const { return lang_mod_->Punc(); }
		inline bool CaseSensitive() const {
			return char_classifier_->CaseSensitive();
		}

		inline void SetSizeNormalization(bool size_normalization) {
			size_normalization_ = size_normalization;
		}
		inline void SetNoisyInput(bool noisy_input) {
			noisy_input_ = noisy_input;
		}
		inline void SetOOD(bool ood_enabled) {
			lang_mod_->SetOOD(ood_enabled);
		}
		inline void SetNumeric(bool numeric_enabled) {
			lang_mod_->SetNumeric(numeric_enabled);
		}
		inline void SetWordList(bool word_list_enabled) {
			lang_mod_->SetWordList(word_list_enabled);
		}
		inline void SetPunc(bool punc_enabled) {
			lang_mod_->SetPunc(punc_enabled);
		}
		inline void SetCaseSensitive(bool case_sensitive) {
			char_classifier_->SetCaseSensitive(case_sensitive);
		}
		inline tesseract::Tesseract *TesseractObject() const {
			return tess_obj_;
		}

		// Returns the path of the data files
		bool GetDataFilePath(string *path) const;
		// Creates a CubeRecoContext object using a tesseract object. Data
		// files are loaded via the tessdata_manager, and the tesseract
		// unicharset is provided in order to map Cube's unicharset to
		// Tesseract's in the case where the two unicharsets differ.
		static CubeRecoContext *Create(Tesseract *tess_obj,
			TessdataManager *tessdata_manager,
			UNICHARSET *tess_unicharset);

	private:
		bool loaded_;
		string lang_;
		CharSet *char_set_;
		UNICHARSET *tess_unicharset_;
		WordSizeModel *word_size_model_;
		CharClassifier *char_classifier_;
		CharBigrams *char_bigrams_;
		WordUnigrams *word_unigrams_;
		TuningParams *params_;
		LangModel *lang_mod_;
		Tesseract *tess_obj_;  // CubeRecoContext does not own this pointer
		bool size_normalization_;
		bool noisy_input_;

		// Loads and initialized all the necessary components of a
		// CubeRecoContext. See .cpp for more details.
		bool Load(TessdataManager *tessdata_manager,
			UNICHARSET *tess_unicharset);
	};
}

#endif  // CUBE_RECO_CONTEXT_H