NERsuite
1.1.1
|
00001 /* 00002 * NERSuite class header 00003 * 00004 * Copyright (c) 00005 * All rights reserved. 00006 * 00007 * Redistribution and use in source and binary forms, with or without 00008 * modification, are permitted provided that the following conditions are met: 00009 * * Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * * Redistributions in binary form must reproduce the above copyright 00012 * notice, this list of conditions and the following disclaimer in the 00013 * documentation and/or other materials provided with the distribution. 00014 * * Neither the names of the authors nor the names of its contributors 00015 * may be used to endorse or promote products derived from this 00016 * software without specific prior written permission. 00017 * 00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00019 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00020 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00021 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 00022 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00025 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00026 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00027 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 */ 00030 00031 00032 #ifndef _NERSUITE_H 00033 #define _NERSUITE_H 00034 00035 00036 #include <iostream> 00037 #include <map> 00038 #include <list> 00039 #include <stdio.h> 00040 #include <string.h> 00041 00042 // BEGIN: utils 00043 #include "../nersuite_common/string_utils.h" 00044 #include "../nersuite_common/text_loader.h" 00045 #include "../nersuite_common/option_parser.h" 00046 // END: utils 00047 00048 // BEGIN: feature extractor 00049 #include "typedefs.h" 00050 #include "FExtor.h" 00051 // END: feature extractor 00052 00053 // BEGIN: CRFSuite wrapping 00054 #include <stdlib.h> 00055 #include <string> 00056 #include <time.h> 00057 #include "crfsuite2.h" 00058 00059 typedef double floatval_t; 00060 typedef std::vector<std::string> V1_STR; 00061 typedef std::vector< V1_STR > V2_STR; 00062 typedef std::vector< V2_STR > V3_STR; 00063 00064 #define MODE_LEARN "learn" 00065 #define MODE_TAG "tag" 00066 #define DEFAULT_MODEL_FILE "model.m" 00067 00068 00069 namespace NER 00070 { 00092 class Suite 00093 { 00094 private: 00095 OPTION_PARSER opt_parser; 00096 COLUMN_INFO COL_INFO; 00097 00098 public: 00101 Suite(int nargs, char** args); 00102 00106 int learn(); 00107 00111 int tag(); 00112 00113 private: 00114 int learn_crfsuite(std::istream &is); 00115 void read_data(std::istream &is, const COLUMN_INFO &COL_INFO, CRFSuite::Trainer* trainer); 00116 00117 int run_tagging(std::istream &is, std::ostream &os, CRFSuite::Tagger& tagger, FeatureExtractor &FExtor); 00118 int tag_crfsuite(V2_STR &one_sent, V2_STR &sent_feat, CRFSuite::Tagger& tagger, std::map<std::string, int> &term_idx, std::ostream &os); 00119 void output_result_standoff(std::ostream &os, CRFSuite::StringList& yseq, std::vector<std::vector<std::string> > &one_sent, std::map<std::string, int> &term_idx); 00120 void output_result_conll(std::ostream &os, CRFSuite::StringList& yseq, std::vector<std::vector<std::string> > &one_sent); 00121 00122 void set_column_info(const std::string &mode); 00123 int pad_answer(const std::string &mode, const V2_STR &one_sent, V2_STR &sent_feats); 00124 }; 00125 } 00126 00141 #endif 00142 00143 00144 00145 00146 00147 00148 00149 00150 00151 00152 00153 00154