NERsuite  1.1.1
src/nersuite/nersuite.h
00001 /*
00002  *      NERSuite class header
00003  *
00004  * Copyright (c) 
00005  * All rights reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions are met:
00009  *     * Redistributions of source code must retain the above copyright
00010  *       notice, this list of conditions and the following disclaimer.
00011  *     * Redistributions in binary form must reproduce the above copyright
00012  *       notice, this list of conditions and the following disclaimer in the
00013  *       documentation and/or other materials provided with the distribution.
00014  *     * Neither the names of the authors nor the names of its contributors
00015  *       may be used to endorse or promote products derived from this
00016  *       software without specific prior written permission.
00017  *
00018  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00019  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00020  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00021  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00022  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00023  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00024  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00025  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00026  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00027  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00028  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029  */
00030 
00031 
00032 #ifndef    _NERSUITE_H
00033 #define    _NERSUITE_H
00034   
00035 
00036 #include <iostream>
00037 #include <map>
00038 #include <list>
00039 #include <stdio.h>
00040 #include <string.h>
00041 
00042 // BEGIN: utils
00043 #include "../nersuite_common/string_utils.h"
00044 #include "../nersuite_common/text_loader.h"
00045 #include "../nersuite_common/option_parser.h"
00046 // END: utils
00047 
00048 // BEGIN: feature extractor
00049 #include "typedefs.h"
00050 #include "FExtor.h"
00051 // END: feature extractor
00052 
00053 // BEGIN: CRFSuite wrapping
00054 #include <stdlib.h>
00055 #include <string>
00056 #include <time.h>
00057 #include "crfsuite2.h"
00058 
00059 typedef         double                                          floatval_t;
00060 typedef         std::vector<std::string>        V1_STR;
00061 typedef         std::vector< V1_STR >           V2_STR;
00062 typedef         std::vector< V2_STR >           V3_STR;
00063 
00064 #define MODE_LEARN      "learn"
00065 #define MODE_TAG        "tag"
00066 #define DEFAULT_MODEL_FILE      "model.m"
00067 
00068 
00069 namespace NER
00070 {
00092         class Suite
00093         {
00094         private:
00095                 OPTION_PARSER opt_parser;
00096                 COLUMN_INFO     COL_INFO;
00097 
00098         public:
00101                 Suite(int nargs, char** args);
00102 
00106                 int learn();
00107 
00111                 int tag();
00112 
00113         private:
00114                 int learn_crfsuite(std::istream &is);
00115                 void read_data(std::istream &is, const COLUMN_INFO &COL_INFO, CRFSuite::Trainer* trainer);
00116 
00117                 int run_tagging(std::istream &is, std::ostream &os, CRFSuite::Tagger& tagger, FeatureExtractor &FExtor);
00118                 int tag_crfsuite(V2_STR &one_sent, V2_STR &sent_feat, CRFSuite::Tagger& tagger, std::map<std::string, int> &term_idx, std::ostream &os);
00119                 void output_result_standoff(std::ostream &os, CRFSuite::StringList& yseq, std::vector<std::vector<std::string> > &one_sent, std::map<std::string, int> &term_idx);
00120                 void output_result_conll(std::ostream &os, CRFSuite::StringList& yseq, std::vector<std::vector<std::string> > &one_sent);
00121 
00122                 void set_column_info(const std::string &mode);
00123                 int pad_answer(const std::string &mode, const V2_STR &one_sent, V2_STR &sent_feats);
00124         };
00125 }
00126 
00141 #endif
00142 
00143 
00144 
00145 
00146 
00147 
00148 
00149 
00150 
00151 
00152 
00153 
00154 
 All Classes Functions Variables