NERsuite  1.1.1
src/nersuitetest/SentenceTaggerTest.h
00001 #ifndef _SENTENCE_TAGGER_TEST
00002 #define _SENTENCE_TAGGER_TEST
00003 
00004 #include <string>
00005 #include <fstream>
00006 #include "../dictionary_tagger/sentence_tagger.h"
00007 
00008 using namespace std;
00009 using namespace NER;
00010 
00011 static const char* SENTENCE_TAGGER_TEST_TEXT_FILE = "sentence_tagger_test.txt";
00012 static const char* SENTENCE_TAGGER_TEST_DB_FILE = "sentence_tagger_test.cdbpp";
00013 
00014 
00015 Dictionary* PrepareDictionaryTestDB(int normalize_type, const char* filename, const char* dbname)
00016 {
00017         ofstream ofs(filename);
00018         ofs << "SampleEntry\tClass1\tClass2\tClass3" << endl;
00019         ofs << "S0a1m2p3l4e5E6n7t8r9y\tClass1\tClass4\tClass5" << endl;
00020         ofs << "S_a!m\"p#l$e%E&n\'t(r)y[S]a-m=p~l^e|E\\n@t[r]yS+a;m:p<l>e,E.n?t/r y\tClass6\tClass2\tClass7\n" << endl;
00021         ofs << "SampleEntry SampleEntry1 SampleEntry2\tClass8\tClass3\tClass9" << endl;
00022         ofs.close();
00023 
00024         Dictionary* pdict = new Dictionary(dbname);
00025         pdict->build(filename, normalize_type);
00026         return pdict;
00027 }
00028 
00029 void TestSentenceTagger_NormalizeNone()
00030 {
00031         Dictionary* pdict = PrepareDictionaryTestDB(NormalizeNone, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE);
00032         pdict->open();
00033         
00034         SentenceTagger::set_normalize_type(NormalizeNone);
00035         vector<NE>              v_ne;
00036         vector<int>             v_idx;
00037         SentenceTagger  one_sent;
00038 
00039         stringstream ss;
00040         ss << "0\t11\tSampleEntry\tSampleEntry\tNN\t-" << endl;
00041         ss << "12\t23\tSampleEntry1\tSampleEntry1\tNN\t-" << endl;
00042         ss << "24\t35\tSampleEntry2\tSampleEntry2\tNN\t-" << endl;
00043         ss << endl;
00044         one_sent.read(ss);
00045         one_sent.tag_nes(*pdict);
00046         if (one_sent[0][8] != "B-Class3")
00047                 throw new TestException("assert failed", __FILE__, __LINE__);
00048         if (one_sent[0][13] != "B-Class8")
00049                 throw new TestException("assert failed", __FILE__, __LINE__);
00050         if (one_sent[0][14] != "B-Class9")
00051                 throw new TestException("assert failed", __FILE__, __LINE__);
00052         if (one_sent[1][8] != "I-Class3")
00053                 throw new TestException("assert failed", __FILE__, __LINE__);
00054         if (one_sent[1][13] != "I-Class8")
00055                 throw new TestException("assert failed", __FILE__, __LINE__);
00056         if (one_sent[1][14] != "I-Class9")
00057                 throw new TestException("assert failed", __FILE__, __LINE__);
00058         if (one_sent[2][8] != "I-Class3")
00059                 throw new TestException("assert failed", __FILE__, __LINE__);
00060         if (one_sent[2][13] != "I-Class8")
00061                 throw new TestException("assert failed", __FILE__, __LINE__);
00062         if (one_sent[2][14] != "I-Class9")
00063                 throw new TestException("assert failed", __FILE__, __LINE__);
00064 
00065         delete pdict;
00066 }
00067 
00068 void TestSentenceTagger_NormalizeCase()
00069 {
00070         Dictionary* pdict = PrepareDictionaryTestDB(NormalizeCase, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE);
00071         pdict->open();
00072         
00073         SentenceTagger::set_normalize_type(NormalizeCase);
00074         vector<NE>              v_ne;
00075         vector<int>             v_idx;
00076         SentenceTagger  one_sent;
00077 
00078         stringstream ss;
00079         ss << "0\t11\tsampleentry\tsampleentry\tNN\t-" << endl;
00080         ss << endl;
00081         one_sent.read(ss);
00082         one_sent.tag_nes(*pdict);
00083         if (one_sent[0][6] != "B-Class1")
00084                 throw new TestException("assert failed", __FILE__, __LINE__);
00085         if (one_sent[0][7] != "B-Class2")
00086                 throw new TestException("assert failed", __FILE__, __LINE__);
00087         if (one_sent[0][8] != "B-Class3")
00088                 throw new TestException("assert failed", __FILE__, __LINE__);
00089 
00090 
00091         delete pdict;
00092 }
00093 
00094 void TestSentenceTagger_NormalizeSymbol()
00095 {
00096         Dictionary* pdict = PrepareDictionaryTestDB(NormalizeSymbol, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE);
00097         pdict->open();
00098         
00099         SentenceTagger::set_normalize_type(NormalizeSymbol);
00100         vector<NE>              v_ne;
00101         vector<int>             v_idx;
00102         SentenceTagger  one_sent;
00103 
00104         stringstream ss;
00105         ss << "0\t11\tS!a\"m#p$l%e&E\'n(t)r[y]S-a=m~p^l|e\\E@n[t]r+yS;a:m<p>l,e.E?n/t r_y\tsampleentry\tNN\t-" << endl;
00106         ss << endl;
00107         one_sent.read(ss);
00108         one_sent.tag_nes(*pdict);
00109         if (one_sent[0][7] != "B-Class2")
00110                 throw new TestException("assert failed", __FILE__, __LINE__);
00111         if (one_sent[0][11] != "B-Class6")
00112                 throw new TestException("assert failed", __FILE__, __LINE__);
00113         if (one_sent[0][12] != "B-Class7")
00114                 throw new TestException("assert failed", __FILE__, __LINE__);
00115 
00116         delete pdict;
00117 }
00118 
00119 void TestSentenceTagger_NormalizeNumber()
00120 {
00121         Dictionary* pdict = PrepareDictionaryTestDB(NormalizeNumber, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE);
00122         pdict->open();
00123         
00124         SentenceTagger::set_normalize_type(NormalizeNumber);
00125         vector<NE>              v_ne;
00126         vector<int>             v_idx;
00127         SentenceTagger  one_sent;
00128 
00129         stringstream ss;
00130         ss << "0\t11\tSampleEntry\tSampleEntry\tNN\t-" << endl;
00131         ss << "12\t23\tSampleEntry9\tSampleEntry9\tNN\t-" << endl;
00132         ss << "24\t35\tSampleEntry9\tSampleEntry9\tNN\t-" << endl;
00133         ss << endl;
00134         one_sent.read(ss);
00135         one_sent.tag_nes(*pdict);
00136         if (one_sent[0][8] != "B-Class3")
00137                 throw new TestException("assert failed", __FILE__, __LINE__);
00138         if (one_sent[0][13] != "B-Class8")
00139                 throw new TestException("assert failed", __FILE__, __LINE__);
00140         if (one_sent[0][14] != "B-Class9")
00141                 throw new TestException("assert failed", __FILE__, __LINE__);
00142         if (one_sent[1][8] != "I-Class3")
00143                 throw new TestException("assert failed", __FILE__, __LINE__);
00144         if (one_sent[1][13] != "I-Class8")
00145                 throw new TestException("assert failed", __FILE__, __LINE__);
00146         if (one_sent[1][14] != "I-Class9")
00147                 throw new TestException("assert failed", __FILE__, __LINE__);
00148         if (one_sent[2][8] != "I-Class3")
00149                 throw new TestException("assert failed", __FILE__, __LINE__);
00150         if (one_sent[2][13] != "I-Class8")
00151                 throw new TestException("assert failed", __FILE__, __LINE__);
00152         if (one_sent[2][14] != "I-Class9")
00153                 throw new TestException("assert failed", __FILE__, __LINE__);
00154 
00155         delete pdict;
00156 }
00157 
00158 void TestSentenceTagger_TokenBase()
00159 {
00160         Dictionary* pdict = PrepareDictionaryTestDB(NormalizeToken, SENTENCE_TAGGER_TEST_TEXT_FILE, SENTENCE_TAGGER_TEST_DB_FILE);
00161         pdict->open();
00162         
00163         SentenceTagger::set_normalize_type(NormalizeToken);
00164         vector<NE>              v_ne;
00165         vector<int>             v_idx;
00166         SentenceTagger  one_sent;
00167 
00168         stringstream ss;
00169         ss << "0\t11\tSampleEntry\tSampleEntry\tNN\t-" << endl;
00170         ss << endl;
00171         one_sent.read(ss);
00172         one_sent.tag_nes(*pdict);
00173         if (one_sent[0][6] != "B-Class1")
00174                 throw new TestException("assert failed", __FILE__, __LINE__);
00175         if (one_sent[0][7] != "B-Class2")
00176                 throw new TestException("assert failed", __FILE__, __LINE__);
00177         if (one_sent[0][8] != "B-Class3")
00178                 throw new TestException("assert failed", __FILE__, __LINE__);
00179 
00180         delete pdict;
00181 }
00182 
00183 #endif
 All Classes Functions Variables