NERsuite  1.1.1
src/nersuite_common/string_utils.h
00001 /*
00002 *      NERSuite
00003 *      string utility
00004 *
00005 * Copyright (c) 
00006 * All rights reserved.
00007 *
00008 * Redistribution and use in source and binary forms, with or without
00009 * modification, are permitted provided that the following conditions are met:
00010 *     * Redistributions of source code must retain the above copyright
00011 *       notice, this list of conditions and the following disclaimer.
00012 *     * Redistributions in binary form must reproduce the above copyright
00013 *       notice, this list of conditions and the following disclaimer in the
00014 *       documentation and/or other materials provided with the distribution.
00015 *     * Neither the names of the authors nor the names of its contributors
00016 *       may be used to endorse or promote products derived from this
00017 *       software without specific prior written permission.
00018 *
00019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00020 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00021 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00022 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00023 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00024 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00025 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00026 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00027 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00028 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00029 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030 */
00031 
00032 #ifndef         _STRING_UTILS_H
00033 #define         _STRING_UTILS_H
00034 
00035 
00036 #include <string>
00037 #include <vector>
00038 #include <sstream>
00039 #include <stdlib.h>
00040 
00041 namespace NER
00042 {
00051         template<typename T1> 
00052         int tokenize(T1 &V_STR, std::string &one_line, std::string del)
00053         {
00054                 V_STR.clear();
00055 
00056                 int total_elem = 0;
00057                 size_t beg = 0, end = one_line.find(del, 0);
00058 
00059                 while(beg < one_line.length())
00060                 {
00061                         if ((end = one_line.find(del, beg)) == std::string::npos)
00062                                 end = one_line.length();
00063 
00064                         V_STR.push_back(one_line.substr(beg, end - beg));
00065 
00066                         beg = end + 1;
00067                         ++total_elem;
00068                 }
00069 
00070                 return total_elem;
00071         }
00072 
00073         extern void             trim_ws(std::string &str);
00074         extern std::string      int2str(int i);
00075         extern std::string      int2strIDX(int idx);
00076         extern bool             check_alphanum(const char ch);
00077 
00078         extern std::string      make_lowercase(const std::string& str);
00079         extern std::string      squeeze_nums(const std::string& str);
00080         extern std::string      squeeze_syms(const std::string& str);
00081         extern std::string      squeeze_ws(const std::string& str);
00082 
00083 }
00084 #endif
00085 
 All Classes Functions Variables