NERsuite
1.1.1
|
00001 #ifndef _SENTENCE_TAGGER_TEST 00002 #define _SENTENCE_TAGGER_TEST 00003 00004 #include <string> 00005 #include <fstream> 00006 #include "../dictionary_tagger/sentence_tagger.h" 00007 00008 using namespace std; 00009 using namespace NER; 00010 00011 static const char* SENTENCE_TAGGER_TEST_TEXT_FILE = "sentence_tagger_test.txt"; 00012 static const char* SENTENCE_TAGGER_TEST_DB_FILE = "sentence_tagger_test.cdbpp"; 00013 00014 00015 Dictionary* PrepareDictionaryTestDB(int normalize_type, const char* filename, const char* dbname) 00016 { 00017 ofstream ofs(filename); 00018 ofs << "SampleEntry\tClass1\tClass2\tClass3" << endl; 00019 ofs << "S0a1m2p3l4e5E6n7t8r9y\tClass1\tClass4\tClass5" << endl; 00020 ofs << "S_a!m\"p#l$e%E&n\'t(r)y[S]a-m=p~l^e|E\\n@t[r]yS+a;m:p<l>e,E.n?t/r y\tClass6\tClass2\tClass7\n" << endl; 00021 ofs << "SampleEntry SampleEntry1 SampleEntry2\tClass8\tClass3\tClass9" << endl; 00022 ofs.close(); 00023 00024 Dictionary* pdict = new Dictionary(dbname); 00025 pdict->build(filename, normalize_type); 00026 return pdict; 00027 } 00028 00029 void TestSentenceTagger_NormalizeNone() 00030 { 00031 Dictionary* pdict = PrepareDictionaryTestDB(NormalizeNone, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE); 00032 pdict->open(); 00033 00034 SentenceTagger::set_normalize_type(NormalizeNone); 00035 vector<NE> v_ne; 00036 vector<int> v_idx; 00037 SentenceTagger one_sent; 00038 00039 stringstream ss; 00040 ss << "0\t11\tSampleEntry\tSampleEntry\tNN\t-" << endl; 00041 ss << "12\t23\tSampleEntry1\tSampleEntry1\tNN\t-" << endl; 00042 ss << "24\t35\tSampleEntry2\tSampleEntry2\tNN\t-" << endl; 00043 ss << endl; 00044 one_sent.read(ss); 00045 one_sent.tag_nes(*pdict); 00046 if (one_sent[0][8] != "B-Class3") 00047 throw new TestException("assert failed", __FILE__, __LINE__); 00048 if (one_sent[0][13] != "B-Class8") 00049 throw new TestException("assert failed", __FILE__, __LINE__); 00050 if (one_sent[0][14] != "B-Class9") 00051 throw new TestException("assert failed", __FILE__, __LINE__); 00052 if (one_sent[1][8] != "I-Class3") 00053 throw new TestException("assert failed", __FILE__, __LINE__); 00054 if (one_sent[1][13] != "I-Class8") 00055 throw new TestException("assert failed", __FILE__, __LINE__); 00056 if (one_sent[1][14] != "I-Class9") 00057 throw new TestException("assert failed", __FILE__, __LINE__); 00058 if (one_sent[2][8] != "I-Class3") 00059 throw new TestException("assert failed", __FILE__, __LINE__); 00060 if (one_sent[2][13] != "I-Class8") 00061 throw new TestException("assert failed", __FILE__, __LINE__); 00062 if (one_sent[2][14] != "I-Class9") 00063 throw new TestException("assert failed", __FILE__, __LINE__); 00064 00065 delete pdict; 00066 } 00067 00068 void TestSentenceTagger_NormalizeCase() 00069 { 00070 Dictionary* pdict = PrepareDictionaryTestDB(NormalizeCase, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE); 00071 pdict->open(); 00072 00073 SentenceTagger::set_normalize_type(NormalizeCase); 00074 vector<NE> v_ne; 00075 vector<int> v_idx; 00076 SentenceTagger one_sent; 00077 00078 stringstream ss; 00079 ss << "0\t11\tsampleentry\tsampleentry\tNN\t-" << endl; 00080 ss << endl; 00081 one_sent.read(ss); 00082 one_sent.tag_nes(*pdict); 00083 if (one_sent[0][6] != "B-Class1") 00084 throw new TestException("assert failed", __FILE__, __LINE__); 00085 if (one_sent[0][7] != "B-Class2") 00086 throw new TestException("assert failed", __FILE__, __LINE__); 00087 if (one_sent[0][8] != "B-Class3") 00088 throw new TestException("assert failed", __FILE__, __LINE__); 00089 00090 00091 delete pdict; 00092 } 00093 00094 void TestSentenceTagger_NormalizeSymbol() 00095 { 00096 Dictionary* pdict = PrepareDictionaryTestDB(NormalizeSymbol, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE); 00097 pdict->open(); 00098 00099 SentenceTagger::set_normalize_type(NormalizeSymbol); 00100 vector<NE> v_ne; 00101 vector<int> v_idx; 00102 SentenceTagger one_sent; 00103 00104 stringstream ss; 00105 ss << "0\t11\tS!a\"m#p$l%e&E\'n(t)r[y]S-a=m~p^l|e\\E@n[t]r+yS;a:m<p>l,e.E?n/t r_y\tsampleentry\tNN\t-" << endl; 00106 ss << endl; 00107 one_sent.read(ss); 00108 one_sent.tag_nes(*pdict); 00109 if (one_sent[0][7] != "B-Class2") 00110 throw new TestException("assert failed", __FILE__, __LINE__); 00111 if (one_sent[0][11] != "B-Class6") 00112 throw new TestException("assert failed", __FILE__, __LINE__); 00113 if (one_sent[0][12] != "B-Class7") 00114 throw new TestException("assert failed", __FILE__, __LINE__); 00115 00116 delete pdict; 00117 } 00118 00119 void TestSentenceTagger_NormalizeNumber() 00120 { 00121 Dictionary* pdict = PrepareDictionaryTestDB(NormalizeNumber, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE); 00122 pdict->open(); 00123 00124 SentenceTagger::set_normalize_type(NormalizeNumber); 00125 vector<NE> v_ne; 00126 vector<int> v_idx; 00127 SentenceTagger one_sent; 00128 00129 stringstream ss; 00130 ss << "0\t11\tSampleEntry\tSampleEntry\tNN\t-" << endl; 00131 ss << "12\t23\tSampleEntry9\tSampleEntry9\tNN\t-" << endl; 00132 ss << "24\t35\tSampleEntry9\tSampleEntry9\tNN\t-" << endl; 00133 ss << endl; 00134 one_sent.read(ss); 00135 one_sent.tag_nes(*pdict); 00136 if (one_sent[0][8] != "B-Class3") 00137 throw new TestException("assert failed", __FILE__, __LINE__); 00138 if (one_sent[0][13] != "B-Class8") 00139 throw new TestException("assert failed", __FILE__, __LINE__); 00140 if (one_sent[0][14] != "B-Class9") 00141 throw new TestException("assert failed", __FILE__, __LINE__); 00142 if (one_sent[1][8] != "I-Class3") 00143 throw new TestException("assert failed", __FILE__, __LINE__); 00144 if (one_sent[1][13] != "I-Class8") 00145 throw new TestException("assert failed", __FILE__, __LINE__); 00146 if (one_sent[1][14] != "I-Class9") 00147 throw new TestException("assert failed", __FILE__, __LINE__); 00148 if (one_sent[2][8] != "I-Class3") 00149 throw new TestException("assert failed", __FILE__, __LINE__); 00150 if (one_sent[2][13] != "I-Class8") 00151 throw new TestException("assert failed", __FILE__, __LINE__); 00152 if (one_sent[2][14] != "I-Class9") 00153 throw new TestException("assert failed", __FILE__, __LINE__); 00154 00155 delete pdict; 00156 } 00157 00158 void TestSentenceTagger_TokenBase() 00159 { 00160 Dictionary* pdict = PrepareDictionaryTestDB(NormalizeToken, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE); 00161 pdict->open(); 00162 00163 SentenceTagger::set_normalize_type(NormalizeToken); 00164 vector<NE> v_ne; 00165 vector<int> v_idx; 00166 SentenceTagger one_sent; 00167 00168 stringstream ss; 00169 ss << "0\t11\tSampleEntry\tSampleEntry\tNN\t-" << endl; 00170 ss << endl; 00171 one_sent.read(ss); 00172 one_sent.tag_nes(*pdict); 00173 if (one_sent[0][6] != "B-Class1") 00174 throw new TestException("assert failed", __FILE__, __LINE__); 00175 if (one_sent[0][7] != "B-Class2") 00176 throw new TestException("assert failed", __FILE__, __LINE__); 00177 if (one_sent[0][8] != "B-Class3") 00178 throw new TestException("assert failed", __FILE__, __LINE__); 00179 00180 delete pdict; 00181 } 00182 00183 #endif