diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..84a1a18 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +*.pyc + +# Generated by setup.py +ldig.egg-info + +# Generated by cmake +ldig/ldigcpp/CMakeCache.txt +ldig/ldigcpp/CMakeFiles/ +ldig/ldigcpp/Makefile +ldig/ldigcpp/bin/ +ldig/ldigcpp/cmake_install.cmake + +# Uncompressed models +ldig/models/model.latin/ +ldig/ldigcpp/lang50.x64.model + +# Dependency that is not a submodule +ldig/ldigcpp/cybozulib/ + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..3af6521 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +recursive-include ldig/maxsubst * +recursive-include ldig/static * +recursive-include ldig/models * diff --git a/readme.md b/README.md similarity index 71% rename from readme.md rename to README.md index 31d4fae..38f7238 100644 --- a/readme.md +++ b/README.md @@ -1,16 +1,34 @@ ldig (Language Detection with Infinity Gram) ====================== - This is a prototype of language detection for short message service (twitter). with 99.1% accuracy for 17 languages +About this fork +--------------- + +In this fork we just add some conveniences to enable the usage of this as +a library in python programs. + +Changes include an updated `.gitignore` so that it ignores unpacked models, +`__init__.py` file so that it can be treated as a module, added +a `ldig_standalone.py` file with a convenient class to detect language on text +and not on files, updated model file permissions and a `setup.py` to install it +easily. + +The original `c++` branch, is merged with the original `master` for convenience +too, since it takes away nothing from the python point of view, yet adds an +extra `C++` version. + +All real work was done by the author of the original, Nakatani Shuyo / Cybozu +Labs Inc. under a MIT License (see below or at https://github.com/shuyo/ldig). + Usage ------ 1. Extract model directory - tar xf models/[select model archive] + tar xvzf models/[select model archive] 2. Detect ldig.py -m [model directory] [text data file] @@ -27,7 +45,7 @@ As input data, Each tweet is one line in text file as the below format. It is also optional as metadata. (ldig doesn't use metadata and label for detection, of course :D) -The output data of lidg is as the below. +The output data of ldig is as the below. [correct label]\t[detected label]\t[original metadata and text] diff --git a/ldig/__init__.py b/ldig/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/da.py b/ldig/da.py similarity index 100% rename from da.py rename to ldig/da.py diff --git a/ldig.py b/ldig/ldig.py similarity index 100% rename from ldig.py rename to ldig/ldig.py diff --git a/ldig/ldig_standalone.py b/ldig/ldig_standalone.py new file mode 100644 index 0000000..17033a8 --- /dev/null +++ b/ldig/ldig_standalone.py @@ -0,0 +1,41 @@ +import numpy as np +from itertools import izip +from operator import itemgetter +import ldig + + +class LdigDetector(object): + """Standalone detector, based on `server.py:Detector`. + + If your text is already normalized, it might be slightly faster to + initialize with `normalize=False` prior to use the `detect` method, or just + use the `_detect_normalize` or `_detect` methods directly. + + """ + def __init__(self, modeldir, normalize=True): + self.ldig = ldig.ldig(modeldir) + self.features = self.ldig.load_features() + self.trie = self.ldig.load_da() + self.labels = self.ldig.load_labels() + self.param = np.load(self.ldig.param) + if normalize: + self.detect = self._detect_normalize + else: + self.detect = self._detect + + def _detect_normalize(self, text): + _, text, _ = ldig.normalize_text(text) + return self._detect(text) + + def _detect(self, text): + events = self.trie.extract_features(u"\u0001" + text + u"\u0001") + _sum = np.zeros(len(self.labels)) + + for id in sorted(events, key=lambda id: self.features[id][0]): + phi = self.param[id, ] + _sum += phi * events[id] + exp_w = np.exp(_sum - _sum.max()) + prob = exp_w / exp_w.sum() + + r = sorted(izip(self.labels, prob), key=itemgetter(1), reverse=True) + return r diff --git a/ldig/ldigcpp/CMakeLists.txt b/ldig/ldigcpp/CMakeLists.txt new file mode 100644 index 0000000..82fca72 --- /dev/null +++ b/ldig/ldigcpp/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 2.8) +project(ldig) + +set(CMAKE_C_FLAGS_RELEASE "-Wall -O2") +set(CMAKE_C_FLAGS_DEBUG "-g") +set(CMAKE_BUILD_TYPE Release) +set(CMAKE_CXX_FLAGS "-std=gnu++0x") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "bin/") + +include_directories("${PROJECT_SOURCE_DIR}") +include_directories("${PROJECT_SOURCE_DIR}/ldig") +include_directories("${PROJECT_SOURCE_DIR}/cybozulib/include") + +find_package(Boost 1.46 COMPONENTS regex) +if(Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + add_executable(ldig ldig/ldig.cpp) + target_link_libraries(ldig ${Boost_LIBRARIES}) + add_executable(ldig_test ldigtest/test_model.cpp ldigtest/test_da.cpp) + target_link_libraries(ldig_test ${Boost_LIBRARIES}) +endif() + + diff --git a/ldig/ldigcpp/esaxx/COPYING b/ldig/ldigcpp/esaxx/COPYING new file mode 100644 index 0000000..07394df --- /dev/null +++ b/ldig/ldigcpp/esaxx/COPYING @@ -0,0 +1,24 @@ +This is the esaxx copyright. + +Copyright (c) 2010 Daisuke Okanohara All Rights Reserved. + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/ldig/ldigcpp/esaxx/README b/ldig/ldigcpp/esaxx/README new file mode 100644 index 0000000..c3afa09 --- /dev/null +++ b/ldig/ldigcpp/esaxx/README @@ -0,0 +1,34 @@ +ESAXX +---------------------- + +This library provides the implementation of enhanced suffix array. +For an input text of length N, this library builds an enhanced suffix array in O(N) time +using 20N bytes. + +For a suffix array construction, I use sais.hxx, the induced sorting algorithm +implemented by Yuta Mori. + +It also provides the program to enumerate the statistics of all substrings in the text. + +> enum_substring + Enumerate all substring +> enum_substring -w + Input are words separated by space. + +Example: +------------------ +$ cat abra +abracadabra +$ enum_substring < abra + n:11 +alpha:256 + node:5 +0 2 4 abra +1 5 1 a +2 2 3 bra +3 2 2 ra +4 11 0 + +$ enum_substring -w < wiki.txt > + +Daisuke Okanohara diff --git a/ldig/ldigcpp/esaxx/cmdline.h b/ldig/ldigcpp/esaxx/cmdline.h new file mode 100644 index 0000000..2fc0260 --- /dev/null +++ b/ldig/ldigcpp/esaxx/cmdline.h @@ -0,0 +1,704 @@ +/* +Copyright (c) 2009, Hideyuki Tanaka +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY ''AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cmdline{ + +namespace detail{ + +template +class lexical_cast_t{ +public: + static Target cast(const Source &arg){ + Target ret; + std::stringstream ss; + if (!(ss<>ret && ss.eof())) + throw std::bad_cast(); + + return ret; + } +}; + +template +class lexical_cast_t{ +public: + static Target cast(const Source &arg){ + return arg; + } +}; + +template +class lexical_cast_t{ +public: + static std::string cast(const Source &arg){ + std::ostringstream ss; + ss< +class lexical_cast_t{ +public: + static Target cast(const std::string &arg){ + Target ret; + std::istringstream ss(arg); + if (!(ss>>ret && ss.eof())) + throw std::bad_cast(); + return ret; + } +}; + +template +struct is_same { + static const bool value = false; +}; + +template +struct is_same{ + static const bool value = true; +}; + +template +Target lexical_cast(const Source &arg) +{ + return lexical_cast_t::value>::cast(arg); +} + +static inline std::string demangle(const std::string &name) +{ + int status=0; + char *p=abi::__cxa_demangle(name.c_str(), 0, 0, &status); + std::string ret(p); + free(p); + return ret; +} + +template +std::string readable_typename() +{ + return demangle(typeid(T).name()); +} + +template <> +std::string readable_typename() +{ + return "string"; +} + +} // detail + +//----- + +class cmdline_error : public std::exception { +public: + cmdline_error(const std::string &msg): msg(msg){} + ~cmdline_error() throw() {} + const char *what() const throw() { return msg.c_str(); } +private: + std::string msg; +}; + +template +struct default_reader{ + T operator()(const std::string &str){ + return detail::lexical_cast(str); + } +}; + +template +struct range_reader{ + range_reader(const T &low, const T &high): low(low), high(high) {} + T operator()(const std::string &s) const { + T ret=default_reader()(s); + if (!(ret>=low && ret<=high)) throw cmdline::cmdline_error("range_error"); + return ret; + } +private: + T low, high; +}; + +template +range_reader range(const T &low, const T &high) +{ + return range_reader(low, high); +} + +template +struct oneof_reader{ + T operator()(const std::string &s){ + T ret=default_reader()(s); + if (std::find(alt.begin(), alt.end(), s)==alt.end()) + throw cmdline_error(""); + return ret; + } + void add(const T &v){ alt.push_back(v); } +private: + std::vector alt; +}; + +template +oneof_reader oneof(T a1) +{ + oneof_reader ret; + ret.add(a1); + return ret; +} + +template +oneof_reader oneof(T a1, T a2) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + return ret; +} + +template +oneof_reader oneof(T a1, T a2, T a3) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + return ret; +} + +template +oneof_reader oneof(T a1, T a2, T a3, T a4) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + return ret; +} + +template +oneof_reader oneof(T a1, T a2, T a3, T a4, T a5) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + return ret; +} + +template +oneof_reader oneof(T a1, T a2, T a3, T a4, T a5, T a6) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + return ret; +} + +template +oneof_reader oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + ret.add(a7); + return ret; +} + +template +oneof_reader oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + ret.add(a7); + ret.add(a8); + return ret; +} + +template +oneof_reader oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8, T a9) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + ret.add(a7); + ret.add(a8); + ret.add(a9); + return ret; +} + +template +oneof_reader oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8, T a9, T a10) +{ + oneof_reader ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + ret.add(a7); + ret.add(a8); + ret.add(a9); + ret.add(a10); + return ret; +} + +//----- + +class parser{ +public: + parser(){ + } + ~parser(){ + for (std::map::iterator p=options.begin(); + p!=options.end(); p++) + delete p->second; + } + + void add(const std::string &name, + char short_name=0, + const std::string &desc=""){ + if (options.count(name)) throw cmdline_error("multiple definition: "+name); + options[name]=new option_without_value(name, short_name, desc); + ordered.push_back(options[name]); + } + + template + void add(const std::string &name, + char short_name=0, + const std::string &desc="", + bool need=true, + const T def=T()){ + add(name, short_name, desc, need, def, default_reader()); + } + + template + void add(const std::string &name, + char short_name=0, + const std::string &desc="", + bool need=true, + const T def=T(), + F reader=F()){ + if (options.count(name)) throw cmdline_error("multiple definition: "+name); + options[name]=new option_with_value_with_reader(name, short_name, need, def, desc, reader); + ordered.push_back(options[name]); + } + + void footer(const std::string &f){ + ftr=f; + } + + void set_program_name(const std::string &name){ + prog_name=name; + } + + bool exist(const std::string &name){ + if (options.count(name)==0) throw cmdline_error("there is no flag: --"+name); + return options[name]->has_set(); + } + + template + const T &get(const std::string &name) const { + if (options.count(name)==0) throw cmdline_error("there is no flag: --"+name); + const option_with_value *p=dynamic_cast*>(options.find(name)->second); + if (p==NULL) throw cmdline_error("type mismatch flag '"+name+"'"); + return p->get(); + } + + const std::vector &rest() const { + return others; + } + + bool parse(int argc, char *argv[]){ + errors.clear(); + others.clear(); + + if (argc<1){ + errors.push_back("argument number must be longer than 0"); + return false; + } + if (prog_name=="") + prog_name=argv[0]; + + std::map lookup; + for (std::map::iterator p=options.begin(); + p!=options.end(); p++){ + if (p->first.length()==0) continue; + char initial=p->second->short_name(); + if (initial){ + if (lookup.count(initial)>0){ + lookup[initial]=""; + errors.push_back(std::string("short option '")+initial+"' is ambiguous"); + return false; + } + else lookup[initial]=p->first; + } + } + + for (int i=1; idescription()<set()){ + errors.push_back("option needs value: --"+name); + return; + } + } + + void set_option(const std::string &name, const std::string &value){ + if (options.count(name)==0){ + errors.push_back("undefined option: --"+name); + return; + } + if (!options[name]->set(value)){ + errors.push_back("option value is invalid: --"+name+"="+value); + return; + } + } + + class option_base{ + public: + virtual ~option_base(){} + + virtual bool has_value() const=0; + virtual bool set()=0; + virtual bool set(const std::string &value)=0; + virtual bool has_set() const=0; + virtual bool valid() const=0; + virtual bool must() const=0; + + virtual const std::string &name() const=0; + virtual char short_name() const=0; + virtual const std::string &description() const=0; + virtual std::string short_description() const=0; + }; + + class option_without_value : public option_base { + public: + option_without_value(const std::string &name, + char short_name, + const std::string &desc) + :nam(name), snam(short_name), desc(desc), has(false){ + } + ~option_without_value(){} + + bool has_value() const { return false; } + + bool set(){ + has=true; + return true; + } + + bool set(const std::string &){ + return false; + } + + bool has_set() const { + return has; + } + + bool valid() const{ + return true; + } + + bool must() const{ + return false; + } + + const std::string &name() const{ + return nam; + } + + char short_name() const{ + return snam; + } + + const std::string &description() const { + return desc; + } + + std::string short_description() const{ + return "--"+nam; + } + + private: + std::string nam; + char snam; + std::string desc; + bool has; + }; + + template + class option_with_value : public option_base { + public: + option_with_value(const std::string &name, + char short_name, + bool need, + const T &def, + const std::string &desc) + : nam(name), snam(short_name), need(need), has(false) + , def(def), actual(def) { + this->desc=full_description(desc); + } + ~option_with_value(){} + + const T &get() const { + return actual; + } + + bool has_value() const { return true; } + + bool set(){ + return false; + } + + bool set(const std::string &value){ + try{ + actual=read(value); + has=true; + } + catch(const std::exception &e){ + return false; + } + return true; + } + + bool has_set() const{ + return has; + } + + bool valid() const{ + if (need && !has) return false; + return true; + } + + bool must() const{ + return need; + } + + const std::string &name() const{ + return nam; + } + + char short_name() const{ + return snam; + } + + const std::string &description() const { + return desc; + } + + std::string short_description() const{ + return "--"+nam+"="+detail::readable_typename(); + } + + protected: + std::string full_description(const std::string &desc){ + return + desc+" ("+detail::readable_typename()+ + (need?"":" [="+detail::lexical_cast(def)+"]") + +")"; + } + + virtual T read(const std::string &s)=0; + + std::string nam; + char snam; + bool need; + std::string desc; + + bool has; + T def; + T actual; + }; + + template + class option_with_value_with_reader : public option_with_value { + public: + option_with_value_with_reader(const std::string &name, + char short_name, + bool need, + const T def, + const std::string &desc, + F reader) + : option_with_value(name, short_name, need, def, desc), reader(reader){ + } + + private: + T read(const std::string &s){ + return reader(s); + } + + F reader; + }; + + std::map options; + std::vector ordered; + std::string ftr; + + std::string prog_name; + std::vector others; + + std::vector errors; +}; + +} // cmdline diff --git a/ldig/ldigcpp/esaxx/configure b/ldig/ldigcpp/esaxx/configure new file mode 100755 index 0000000..9023ecf --- /dev/null +++ b/ldig/ldigcpp/esaxx/configure @@ -0,0 +1,121 @@ +#! /bin/sh + +# waf configure wrapper + +# Fancy colors used to beautify the output a bit. +# +if [ "$NOCOLOR" ] ; then + NORMAL="" + BOLD="" + RED="" + YELLOW="" + GREEN="" +else + NORMAL='\033[0m' + BOLD='\033[01;1m' + RED='\033[01;91m' + YELLOW='\033[00;33m' + GREEN='\033[01;92m' +fi + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_ERROR=2 +EXIT_BUG=10 + +CUR_DIR=$PWD + +#possible relative path +WORKINGDIR=`dirname $0` +cd $WORKINGDIR +#abs path +WORKINGDIR=`pwd` +cd $CUR_DIR + + +# Checks for WAF. Honours $WAF if set. Stores path to 'waf' in $WAF. +# Requires that $PYTHON is set. +# +checkWAF() +{ + printf "Checking for WAF\t\t\t: " + #installed miniwaf in sourcedir + if [ -z "$WAF" ] ; then + if [ -f "${WORKINGDIR}/waf" ] ; then + WAF="${WORKINGDIR}/waf" + if [ ! -x "$WAF" ] ; then + chmod +x $WAF + fi + fi + fi + if [ -z "$WAF" ] ; then + if [ -f "${WORKINGDIR}/waf-light" ] ; then + ${WORKINGDIR}/waf-light --make-waf + WAF="${WORKINGDIR}/waf" + fi + fi + #global installed waf with waf->waf.py link + if [ -z "$WAF" ] ; then + WAF=`which waf 2>/dev/null` + fi + # neither waf nor miniwaf could be found + if [ ! -x "$WAF" ] ; then + printf "$RED""not found""$NORMAL""\n" + echo "Go to http://code.google.com/p/waf/" + echo "and download a waf version" + exit $EXIT_FAILURE + else + printf "$GREEN""$WAF""$NORMAL""\n" + fi +} + +# Generates a Makefile. Requires that $WAF is set. +# +generateMakefile() +{ + cat > Makefile << EOF +#!/usr/bin/make -f +# Waf Makefile wrapper +WAF_HOME=$CUR_DIR + +all: + @$WAF build + +all-debug: + @$WAF -v build + +all-progress: + @$WAF -p build + +install: + $WAF install + +uninstall: + $WAF uninstall + +clean: + @$WAF clean + +distclean: + @$WAF distclean + @-rm -rf build + @-rm -f Makefile + +check: + @$WAF check + +dist: + @$WAF dist + +.PHONY: clean dist distclean check uninstall install all + +EOF +} + +checkWAF +generateMakefile + +"${WAF}" configure $* +exit $? + + diff --git a/ldig/ldigcpp/esaxx/enumSubstring.cpp b/ldig/ldigcpp/esaxx/enumSubstring.cpp new file mode 100644 index 0000000..34ba8f0 --- /dev/null +++ b/ldig/ldigcpp/esaxx/enumSubstring.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include "cmdline.h" +#include "esa.hxx" + +using namespace std; + +int readFile(const char* fn, vector& T){ + FILE* fp = fopen(fn, "rb"); + if (fp == NULL){ + cerr << "cannot open " << fn << endl; + return -1; + } + + if (fseek(fp, 0, SEEK_END) != 0){ + cerr << "cannot fseek " << fn << endl; + fclose(fp); + return -1; + } + int n = ftell(fp); + rewind(fp); + if (n < 0){ + cerr << "cannot ftell " << fn << endl; + fclose(fp); + return -1; + } + T.resize(n); + if (fread(&T[0], sizeof(unsigned char), (size_t)n, fp) != (size_t) n){ + cerr << "fread error " << fn << endl; + fclose(fp); + return -1; + } + + fclose(fp); + return 0; +} + +int getID(const string& str, tr1::unordered_map& word2id){ + tr1::unordered_map::const_iterator it = word2id.find(str); + if (it == word2id.end()){ + int newID = (int)word2id.size(); + word2id[str] = newID; + return newID; + } else { + return it->second; + } +} + +void printSnipet(const vector& T, const int beg, const int len, const vector& id2word){ + for (int i = 0; i < len; ++i){ + int c = T[beg + i]; + if (id2word.size() > 0){ + cout << id2word[c] << " "; + } else { + cout << (isspace((char)c) ? '_' : (char)c); + } + } +} + +int main(int argc, char* argv[]){ + cmdline::parser p; + p.add("word", 'w', "word type"); + + if (!p.parse(argc, argv)){ + cerr << p.error() << endl + << p.usage() << endl; + return -1; + } + + if (p.rest().size() > 0){ + cerr << p.usage() << endl; + return -1; + } + + vector T; + + bool isWord = p.exist("word"); + tr1::unordered_map word2id; + istreambuf_iterator isit(cin); + istreambuf_iterator end; + + size_t origLen = 0; + if (isWord){ + string word; + while (isit != end){ + char c = *isit++; + if (!isspace(c)){ + word += c; + } else if (word.size() > 0){ + T.push_back(getID(word, word2id)); + word = ""; + } + ++origLen; + } + if (word.size() > 0){ + T.push_back(getID(word, word2id)); + } + } else { + while (isit != end){ + T.push_back((unsigned char)(*isit++)); + } + origLen = T.size(); + } + + vector id2word(word2id.size()); + for (tr1::unordered_map::const_iterator it = word2id.begin(); + it != word2id.end(); ++it){ + id2word[it->second] = it->first; + } + + vector SA(T.size()); + vector L (T.size()); + vector R (T.size()); + vector D (T.size()); + + int k = (isWord) ? (int)id2word.size() : 0x100; + if (isWord){ + cerr << "origN:" << origLen << endl; + } + cerr << " n:" << T.size() << endl; + cerr << "alpha:" << k << endl; + + int nodeNum = 0; + if (esaxx(T.begin(), SA.begin(), + L.begin(), R.begin(), D.begin(), + (int)T.size(), k, nodeNum) == -1){ + return -1; + } + cerr << " node:" << nodeNum << endl; + + for (int i = 0; i < nodeNum; ++i){ + cout << i << "\t" << R[i] - L[i] << "\t" << D[i] << "\t"; + printSnipet(T, SA[L[i]], D[i], id2word); + cout << endl; + } + + return 0; +} diff --git a/ldig/ldigcpp/esaxx/esa.hxx b/ldig/ldigcpp/esaxx/esa.hxx new file mode 100644 index 0000000..acb5c7a --- /dev/null +++ b/ldig/ldigcpp/esaxx/esa.hxx @@ -0,0 +1,125 @@ +/* + * esa.hxx + * Copyright (c) 2010 Daisuke Okanohara All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ESA_HXX +#define _ESA_HXX + +#include +#include +#include +#include "sais.hxx" + +namespace esaxx_private { +template +index_type suffixtree(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, index_type n){ + if (n == 0){ + return 0; + } + sarray_type Psi = L; + Psi[SA[0]] = SA[n-1]; + for (index_type i = 1; i < n; ++i){ + Psi[SA[i]] = SA[i-1]; + } + + // Compare at most 2n log n charcters. Practically fastest + // "Permuted Longest-Common-Prefix Array", Juha Karkkainen, CPM 09 + sarray_type PLCP = R; + index_type h = 0; + for (index_type i = 0; i < n; ++i){ + index_type j = Psi[i]; + while (i+h < n && j+h < n && + T[i+h] == T[j+h]){ + ++h; + } + PLCP[i] = h; + if (h > 0) --h; + } + + sarray_type H = L; + for (index_type i = 0; i < n; ++i){ + H[i] = PLCP[SA[i]]; + } + H[0] = -1; + + std::vector > S; + S.push_back(std::make_pair((index_type)-1, (index_type)-1)); + size_t nodeNum = 0; + for (index_type i = 0; ; ++i){ + std::pair cur (i, (i == n) ? -1 : H[i]); + std::pair cand(S.back()); + while (cand.second > cur.second){ + if (i - cand.first > 1){ + L[nodeNum] = cand.first; + R[nodeNum] = i; + D[nodeNum] = cand.second; + ++nodeNum; + } + cur.first = cand.first; + S.pop_back(); + cand = S.back(); + } + if (cand.second < cur.second){ + S.push_back(cur); + } + if (i == n) break; + S.push_back(std::make_pair(i, n - SA[i] + 1)); + } + return nodeNum; +} +} + +/** + * @brief Build an enhanced suffix array of a given string in linear time + * For an input text T, esaxx() builds an enhancd suffix array in linear time. + * i-th internal node is represented as a triple (L[i], R[i], D[i]); + * L[i] and R[i] is the left/right boundary of the suffix array as SA[L[i]....R[i]-1] + * D[i] is the depth of the internal node + * The number of internal node is at most N-1 and return the actual number by + * @param T[0...n-1] The input string. (random access iterator) + * @param SA[0...n-1] The output suffix array (random access iterator) + * @param L[0...n-1] The output left boundary of internal node (random access iterator) + * @param R[0...n-1] The output right boundary of internal node (random access iterator) + * @param D[0...n-1] The output depth of internal node (random access iterator) + * @param n The length of the input string + * @param k The alphabet size + * @pram nodeNum The output the number of internal node + * @return 0 if succeded, -1 or -2 otherwise + */ + +template +int esaxx(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, + index_type n, index_type k, index_type& nodeNum) { + if ((n < 0) || (k <= 0)) return -1; + int err = saisxx(T, SA, n, k); + if (err != 0){ + return err; + } + nodeNum = esaxx_private::suffixtree(T, SA, L, R, D, n); + return 0; +} + + +#endif // _ESA_HXX diff --git a/maxsubst/sais.hxx b/ldig/ldigcpp/esaxx/sais.hxx similarity index 100% rename from maxsubst/sais.hxx rename to ldig/ldigcpp/esaxx/sais.hxx diff --git a/ldig/ldigcpp/esaxx/waf b/ldig/ldigcpp/esaxx/waf new file mode 100755 index 0000000..d99146f Binary files /dev/null and b/ldig/ldigcpp/esaxx/waf differ diff --git a/ldig/ldigcpp/esaxx/wscript b/ldig/ldigcpp/esaxx/wscript new file mode 100644 index 0000000..559e0d7 --- /dev/null +++ b/ldig/ldigcpp/esaxx/wscript @@ -0,0 +1,24 @@ +VERSION= '0.0.3' +APPNAME= 'esaxx' + +srcdir= '.' +blddir= 'bin' + +def set_options(ctx): + ctx.tool_options('compiler_cxx') + +def configure(ctx): + ctx.check_tool('compiler_cxx') + ctx.env.CXXFLAGS += ['-O2', '-Wall', '-g'] + +def build(bld): + task1= bld(features='cxx cprogram', + source = 'enumSubstring.cpp', + name = 'enum_substring', + target = 'enum_substring', + includes = '.') + +def dist_hook(): + import os + os.remove('upload.sh') + os.remove('googlecode_upload.py') diff --git a/models/model.latin.20120209.tar.xz b/ldig/ldigcpp/lang50.x64.model.xz similarity index 65% rename from models/model.latin.20120209.tar.xz rename to ldig/ldigcpp/lang50.x64.model.xz index 766ee64..20200d7 100644 Binary files a/models/model.latin.20120209.tar.xz and b/ldig/ldigcpp/lang50.x64.model.xz differ diff --git a/ldig/ldigcpp/ldig.sln b/ldig/ldigcpp/ldig.sln new file mode 100644 index 0000000..6ac01f1 --- /dev/null +++ b/ldig/ldigcpp/ldig.sln @@ -0,0 +1,38 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ldig", "ldig\ldig.vcxproj", "{006B0427-A9EF-4C07-B38E-F02312A0533C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ldigtest", "ldigtest\ldigtest.vcxproj", "{6A2AD6DB-6312-41C9-BED9-940F6BAC2130}" + ProjectSection(ProjectDependencies) = postProject + {006B0427-A9EF-4C07-B38E-F02312A0533C} = {006B0427-A9EF-4C07-B38E-F02312A0533C} + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {006B0427-A9EF-4C07-B38E-F02312A0533C}.Debug|Win32.ActiveCfg = Debug|Win32 + {006B0427-A9EF-4C07-B38E-F02312A0533C}.Debug|Win32.Build.0 = Debug|Win32 + {006B0427-A9EF-4C07-B38E-F02312A0533C}.Debug|x64.ActiveCfg = Debug|x64 + {006B0427-A9EF-4C07-B38E-F02312A0533C}.Debug|x64.Build.0 = Debug|x64 + {006B0427-A9EF-4C07-B38E-F02312A0533C}.Release|Win32.ActiveCfg = Release|Win32 + {006B0427-A9EF-4C07-B38E-F02312A0533C}.Release|Win32.Build.0 = Release|Win32 + {006B0427-A9EF-4C07-B38E-F02312A0533C}.Release|x64.ActiveCfg = Release|x64 + {006B0427-A9EF-4C07-B38E-F02312A0533C}.Release|x64.Build.0 = Release|x64 + {6A2AD6DB-6312-41C9-BED9-940F6BAC2130}.Debug|Win32.ActiveCfg = Debug|Win32 + {6A2AD6DB-6312-41C9-BED9-940F6BAC2130}.Debug|Win32.Build.0 = Debug|Win32 + {6A2AD6DB-6312-41C9-BED9-940F6BAC2130}.Debug|x64.ActiveCfg = Debug|Win32 + {6A2AD6DB-6312-41C9-BED9-940F6BAC2130}.Release|Win32.ActiveCfg = Release|Win32 + {6A2AD6DB-6312-41C9-BED9-940F6BAC2130}.Release|Win32.Build.0 = Release|Win32 + {6A2AD6DB-6312-41C9-BED9-940F6BAC2130}.Release|x64.ActiveCfg = Release|x64 + {6A2AD6DB-6312-41C9-BED9-940F6BAC2130}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/ldig/ldigcpp/ldig/corpus.hpp b/ldig/ldigcpp/ldig/corpus.hpp new file mode 100644 index 0000000..3c892e0 --- /dev/null +++ b/ldig/ldigcpp/ldig/corpus.hpp @@ -0,0 +1,802 @@ +#pragma once +/** + @file + @brief corpus loader for ldig + + Copyright (C) 2013 Nakatani Shuyo / Cybozu Labs, Inc., all rights reserved. +*/ + +#include +#ifdef __linux__ +#include +namespace std { + using boost::regex; + using boost::regex_match; + using boost::regex_search; + using boost::regex_iterator; + using boost::match_results; + using boost::smatch; +} +#else +#include +#endif +#include + +#include "cybozu/string.hpp" +#include "cybozu/mmap.hpp" +#include "cybozu/regex.hpp" +#include "type.hpp" + +namespace cybozu { +namespace ldig { + +namespace { + +const std::regex REXLINE("^([0-9\\.]+\\t)?([-a-z]+)(\\t[^\\t\\n]+)*\\t([^\\t\\n]+)$"); +const cybozu::regex REXHTTP("(@|#|https?:\\/\\/)[A-Za-z0-9][^ ]+"); +const cybozu::regex REXFACE("(^| )[:;x]-?[\\(\\)DOPSop]($| )"); +const cybozu::regex REXLAUGH("([hj])+([aeio])+(\\1+\\2+){1,}"); +const cybozu::regex REXNUMBER("[0-9][-:,\\.0-9]+[0-9]"); + +// for CJK Kanji +const char kanji_table[] = { +8,7,0,7,0,0,0,8,7,8,8,8,6,8,8,4,6,7,0,2,7,6,8,0,7,7,2,2,8,2,7,4,0,1,2,0,8,2,5,2,2,0,8,6,2,8,4,0,6,4,7,4,2,0,2,0,7,7,8,8,4,2,2,2, +0,0,4,7,0,8,0,6,8,2,0,8,2,7,7,7,2,0,6,6,2,0,6,1,6,7,0,0,6,7,7,8,2,2,0,0,0,0,2,0,0,6,0,0,0,0,0,0,2,3,0,7,0,0,0,0,0,0,0,0,0,0,7,4, +1,0,4,4,4,0,8,0,7,3,0,8,8,6,8,2,0,7,7,6,8,7,0,0,3,5,2,8,1,0,4,6,2,7,6,0,8,7,7,2,7,2,0,7,8,7,7,0,0,0,2,6,0,2,4,0,0,4,8,2,0,0,0,2, +8,7,6,6,6,2,6,7,4,6,8,7,0,6,2,1,0,2,0,2,7,7,8,6,7,7,4,0,4,6,6,6,0,6,0,8,7,8,0,0,6,4,2,2,8,0,1,0,7,4,7,6,4,6,8,6,0,0,0,7,0,6,0,6, +4,7,4,0,4,4,0,0,4,6,7,4,0,7,7,7,7,7,4,4,4,4,0,2,6,6,8,2,0,5,2,2,2,0,6,0,2,2,2,2,0,0,2,2,4,4,0,7,6,0,2,4,7,0,7,0,7,0,7,4,7,7,4,4, +0,4,0,7,0,0,8,4,4,4,0,0,4,8,7,7,7,7,4,8,4,8,4,6,6,7,6,6,8,6,6,6,8,4,4,6,6,2,0,6,0,6,4,4,6,0,4,6,6,0,0,7,6,5,6,4,4,4,4,6,7,4,6,8, +4,4,4,7,6,4,8,4,6,6,0,7,0,7,0,6,4,6,4,0,6,4,4,6,4,0,4,7,4,7,4,0,3,1,0,2,0,2,2,2,2,2,2,0,2,1,7,7,0,0,4,4,0,7,5,4,0,4,4,4,0,0,0,7, +4,4,5,7,7,6,0,4,0,4,7,4,0,4,6,6,6,6,0,4,4,0,4,7,6,4,6,4,6,8,6,6,4,8,0,3,0,0,2,0,2,2,2,0,4,2,7,6,0,6,0,7,4,5,4,4,7,0,8,0,0,0,6,0, +4,0,0,0,0,4,4,4,0,5,0,8,6,7,4,6,0,8,7,4,6,4,5,4,6,8,6,4,6,0,4,7,4,6,4,5,1,6,7,4,6,6,6,5,6,7,2,4,4,4,0,4,0,4,1,4,0,1,2,0,6,0,2,0, +4,4,0,6,0,4,4,6,6,5,4,4,6,4,6,7,0,4,0,4,0,6,0,4,0,0,8,4,7,4,4,4,4,4,4,4,4,8,0,0,4,4,4,4,2,4,4,4,4,0,5,4,5,5,7,6,0,0,4,2,0,5,2,2, +6,0,4,4,0,6,0,4,2,0,0,4,4,7,4,0,0,5,4,0,4,4,4,0,5,5,4,4,4,4,4,0,0,0,4,6,0,2,0,2,2,2,0,0,7,5,4,4,4,4,6,4,4,5,4,5,4,0,6,6,0,4,5,4, +0,4,4,0,4,5,4,4,4,4,4,4,0,1,4,8,0,5,0,4,4,5,6,4,0,0,7,4,0,4,0,0,4,0,0,4,4,4,6,7,4,4,4,0,6,6,6,4,4,4,0,6,0,6,4,0,4,4,0,7,0,4,4,4, +5,0,4,4,5,4,6,6,4,4,4,6,4,0,0,0,4,4,7,4,4,4,0,4,4,0,4,0,4,0,0,5,4,6,4,0,4,4,4,0,0,4,5,0,0,4,4,0,4,4,5,4,4,4,0,4,4,4,4,4,4,4,0,6, +6,7,0,8,7,7,7,5,8,8,0,7,4,7,1,0,1,2,4,0,6,6,2,4,0,0,7,0,7,0,0,4,0,0,6,0,0,8,0,4,8,4,0,7,8,7,6,0,2,8,0,8,2,7,8,7,7,2,0,2,7,2,0,0, +6,2,2,0,0,8,1,4,2,6,5,0,2,8,0,4,0,4,7,4,4,6,2,7,4,3,0,2,2,0,4,0,7,0,6,0,6,7,0,0,1,0,4,2,7,0,0,2,6,6,2,2,1,2,7,7,0,4,0,2,6,6,4,0, +2,0,0,0,7,4,7,2,4,2,4,7,7,5,4,2,4,2,0,0,4,0,0,4,4,0,0,2,4,7,4,0,6,7,0,0,2,0,1,1,0,0,1,2,0,2,0,2,6,5,0,6,0,6,7,0,7,7,8,2,2,7,0,2, +7,6,2,7,0,0,8,7,7,4,7,0,4,2,6,0,4,7,6,4,0,0,6,7,2,2,2,2,4,8,4,0,2,4,0,0,7,5,0,0,6,8,4,8,0,2,6,0,8,4,4,6,0,4,8,7,7,2,7,7,0,2,0,2, +2,6,2,7,4,0,4,5,0,4,7,4,6,8,4,0,2,2,4,0,6,4,7,0,0,0,4,5,6,4,6,4,0,6,4,1,1,3,0,2,0,6,6,4,4,4,4,7,1,0,7,0,4,5,0,4,4,0,4,4,4,6,0,6, +4,6,6,5,4,0,0,5,6,5,4,4,4,4,0,0,2,4,0,6,0,0,4,4,4,4,0,8,0,2,2,7,8,2,2,7,0,0,4,0,8,7,7,7,6,6,4,0,0,3,2,2,1,0,0,0,0,1,0,4,4,0,7,2, +4,4,4,7,0,1,0,7,0,7,0,2,0,4,0,0,2,0,6,4,0,8,6,4,7,5,0,4,0,5,4,7,0,0,5,4,7,0,4,1,0,4,0,4,0,0,0,4,6,4,1,4,4,4,0,4,4,2,7,4,4,0,7,7, +2,1,1,0,0,8,6,0,6,4,4,4,0,6,4,6,6,4,4,0,0,6,8,8,0,7,6,0,4,7,0,4,7,7,4,6,0,0,2,0,0,0,7,0,0,4,2,4,4,4,0,0,4,0,0,4,0,7,3,3,4,4,6,7, +4,8,0,7,0,6,0,7,7,6,7,0,4,4,2,2,0,7,7,7,5,2,2,8,1,0,7,0,7,0,6,2,7,8,2,6,2,0,7,2,0,2,0,2,4,0,6,7,7,7,4,7,3,7,0,6,7,4,2,4,4,0,0,7, +0,0,6,0,7,2,2,0,0,2,4,2,2,2,4,4,0,0,4,0,4,2,4,4,7,4,7,0,4,6,4,8,0,0,2,2,4,6,2,4,3,3,0,0,4,5,2,0,0,0,4,1,4,0,2,0,0,4,0,8,0,0,0,2, +0,2,3,4,0,0,0,0,8,7,7,8,3,8,1,0,0,8,0,0,7,0,8,8,2,3,0,7,0,0,0,6,2,5,5,8,7,7,6,0,6,7,8,7,7,6,6,8,8,7,7,7,0,6,3,3,8,2,0,6,6,2,0,0, +0,6,0,8,7,0,6,4,8,7,7,5,8,8,8,7,7,8,6,2,0,2,2,2,4,4,0,7,4,6,6,7,7,2,0,2,4,4,7,8,6,6,4,7,6,6,6,2,4,6,2,4,2,6,4,4,7,7,0,7,6,4,7,0, +6,4,5,6,0,4,7,4,7,1,8,2,0,0,4,4,2,1,2,2,6,2,2,2,8,2,0,2,2,0,0,0,4,4,8,4,6,4,6,4,8,0,1,4,4,0,0,4,4,6,6,8,4,6,6,6,6,0,4,6,7,7,4,4, +6,4,6,0,6,0,6,4,4,0,0,7,8,4,6,2,6,4,6,0,2,6,6,0,4,2,6,2,0,2,0,0,4,4,4,2,2,4,6,6,6,6,6,6,6,6,4,6,4,6,1,7,2,0,4,4,6,0,4,6,4,7,4,6, +7,8,6,4,6,4,6,6,8,7,0,0,2,2,6,6,2,2,2,2,2,2,4,2,0,2,2,0,2,2,6,2,4,5,4,0,4,8,6,6,7,7,6,4,0,6,6,0,0,4,7,6,0,0,0,4,4,0,6,4,6,6,0,6, +0,6,0,4,5,4,7,7,4,6,4,4,4,0,4,6,7,6,4,0,6,0,1,4,0,0,4,2,0,0,0,0,2,0,2,2,2,0,4,6,0,0,6,0,6,4,6,7,6,7,4,6,4,4,4,6,4,4,0,4,6,0,7,2, +4,6,0,6,7,4,7,0,4,2,8,4,0,4,4,5,6,4,4,1,0,6,6,4,0,0,0,0,6,0,4,4,0,6,4,4,6,6,8,2,0,0,6,0,2,2,2,0,0,0,0,0,0,6,6,6,2,0,0,6,6,4,6,4, +6,6,6,6,7,0,0,6,6,7,6,7,4,4,4,6,0,6,4,4,6,4,0,0,6,6,5,0,8,7,0,6,0,4,4,4,4,4,4,7,4,0,5,5,5,4,4,0,1,6,4,6,0,6,1,2,0,2,0,6,0,2,2,4, +4,0,4,4,6,6,4,4,4,6,4,4,6,6,8,4,4,6,6,6,6,4,6,0,0,4,4,4,6,6,0,6,0,6,4,7,6,6,6,0,6,4,2,2,2,0,0,6,0,0,6,2,0,2,4,6,0,4,4,0,4,6,6,4, +6,6,4,0,4,0,5,0,6,7,0,0,6,4,6,6,4,0,4,4,4,4,4,5,3,0,0,6,4,4,2,6,0,0,0,2,2,0,0,6,0,5,4,0,6,2,4,4,4,3,6,4,6,4,6,0,4,6,4,6,0,4,4,6, +4,4,5,0,0,4,4,0,4,4,4,0,7,2,6,0,0,0,0,4,2,0,0,6,6,6,4,1,2,0,4,0,4,0,6,4,6,4,4,0,7,6,6,6,6,4,4,4,4,6,4,4,5,0,6,4,5,4,1,2,2,0,4,4, +4,4,4,4,4,6,6,5,0,0,0,0,4,4,6,6,4,0,0,6,0,4,0,4,4,4,4,0,4,4,0,0,0,0,1,2,0,4,4,4,4,0,4,4,4,4,4,2,0,0,4,4,4,4,4,6,0,0,0,0,6,4,4,0, +4,4,4,4,0,4,4,0,4,4,6,4,4,4,0,0,0,4,0,4,6,0,0,6,0,0,7,8,0,6,8,6,8,6,2,1,6,4,0,4,0,0,4,6,0,2,4,0,7,2,1,1,2,2,0,4,0,6,7,0,0,8,8,6, +0,4,4,7,6,0,2,4,6,6,6,8,4,4,0,1,0,0,5,4,4,0,4,0,4,0,4,4,6,0,4,7,4,0,4,6,0,0,0,1,8,6,6,0,6,7,6,6,8,0,0,6,4,0,0,0,0,2,8,6,0,0,6,0, +6,4,3,0,0,4,0,7,0,4,7,4,6,6,6,6,7,7,4,0,0,0,0,2,0,0,2,2,2,2,2,2,2,6,4,0,7,0,7,0,6,6,7,6,0,6,0,6,4,4,4,6,4,4,6,6,0,0,0,6,6,4,0,0, +4,0,7,6,2,2,2,0,0,0,0,7,6,0,0,4,0,0,2,6,4,4,0,4,4,4,4,6,0,4,4,4,6,2,7,7,6,4,2,2,0,2,0,2,0,2,6,0,0,0,2,0,2,4,4,0,6,4,4,0,4,4,0,4, +0,4,6,6,0,0,4,4,0,0,0,7,4,0,7,6,4,0,6,0,6,6,0,0,2,2,2,0,5,2,0,7,7,4,4,4,6,4,0,4,0,4,0,0,4,6,4,2,4,4,4,4,7,4,4,5,6,7,8,4,5,6,0,0, +7,4,7,0,4,5,7,6,4,4,4,6,4,6,4,0,4,2,0,0,4,3,0,0,0,6,0,4,4,4,6,0,6,6,0,4,7,4,0,4,4,4,7,0,4,4,4,4,7,8,4,4,8,7,4,4,4,4,1,4,0,4,0,4, +1,1,0,0,2,0,0,0,4,4,5,4,6,6,4,4,0,7,4,4,7,4,0,5,7,5,5,4,0,4,7,0,0,0,4,4,4,6,0,0,4,1,0,7,2,4,0,4,0,4,0,0,4,5,4,0,0,4,4,4,4,4,7,4, +6,6,4,7,0,6,4,4,4,6,4,4,0,0,4,4,4,4,2,7,4,0,0,1,4,2,2,0,5,4,6,6,4,4,0,4,0,4,4,0,7,6,0,4,4,0,4,4,0,4,0,5,0,0,0,0,0,0,4,4,6,4,5,4, +0,7,4,0,0,6,4,5,4,4,1,0,1,0,4,4,0,6,4,4,4,7,4,0,4,4,4,4,0,4,4,4,0,0,4,4,6,0,0,4,4,4,0,8,7,0,3,4,8,1,1,2,4,0,2,1,0,6,4,0,4,4,4,4, +0,0,2,4,2,0,4,2,0,1,0,0,4,6,4,7,0,0,4,0,6,7,8,4,0,7,8,0,8,0,0,2,4,0,5,0,6,6,0,8,0,8,8,7,4,6,7,6,0,7,0,0,8,0,0,7,6,2,2,0,6,0,4,0, +4,2,2,0,7,4,0,7,7,7,4,2,0,0,6,7,4,7,0,4,7,6,2,7,6,0,6,0,4,0,0,0,6,4,6,0,0,3,0,4,1,4,5,4,0,4,5,0,4,4,4,8,7,0,6,4,6,8,0,4,4,8,4,4, +4,6,8,7,7,4,2,2,2,0,7,0,0,6,4,4,4,0,6,7,0,0,7,6,4,7,0,0,0,4,6,0,4,4,4,6,6,7,4,4,7,2,2,2,1,0,6,6,0,4,6,4,4,4,4,0,0,7,4,7,4,4,7,0, +4,4,0,4,0,4,6,4,4,1,6,8,4,4,4,4,7,7,6,7,7,0,4,2,6,0,6,4,6,6,4,0,4,4,0,6,4,7,5,0,6,4,5,1,6,4,4,0,0,4,4,4,4,4,5,4,0,2,4,7,4,4,4,7, +4,7,0,7,2,2,2,2,2,6,4,0,6,0,0,4,0,6,0,6,0,4,4,4,7,4,0,4,6,0,4,6,7,0,0,6,0,6,0,0,0,7,0,0,0,4,4,1,0,2,2,4,2,4,6,4,4,4,0,0,7,0,4,0, +6,5,4,4,4,0,7,4,4,6,6,0,4,4,0,0,4,4,4,4,0,6,4,4,4,0,7,4,4,4,4,4,4,0,6,0,4,4,5,6,0,4,6,0,4,4,0,0,4,0,0,0,2,2,2,6,4,0,6,4,4,4,0,7, +0,0,0,4,4,0,0,0,0,0,4,4,4,0,4,4,4,0,7,4,4,4,0,4,0,0,6,7,4,4,4,4,0,0,4,0,0,4,4,4,0,4,2,0,4,0,4,4,4,4,6,6,4,6,4,4,6,4,4,4,4,4,6,4, +4,7,6,0,4,0,4,4,4,7,4,4,7,4,0,0,0,0,2,0,2,4,6,4,6,4,4,4,6,4,4,4,6,7,4,6,0,4,6,0,4,6,4,6,4,4,4,0,0,2,0,4,4,4,4,4,4,4,4,4,0,4,0,4, +0,4,4,4,0,4,0,4,4,7,0,4,4,0,0,4,4,0,0,4,4,0,6,6,0,4,4,4,0,4,4,0,4,4,1,4,4,4,4,4,4,0,4,0,5,4,4,4,5,0,6,0,6,0,0,2,4,0,0,0,4,4,4,4, +6,0,0,4,0,4,0,4,4,0,0,4,4,4,4,0,8,6,0,6,7,6,4,8,7,2,6,6,7,7,0,7,0,0,6,7,7,6,8,0,0,6,2,5,6,0,4,0,6,6,4,6,0,6,0,4,8,0,6,4,0,6,0,4, +2,6,0,6,6,7,0,7,7,8,0,7,8,1,4,7,0,0,4,6,0,7,0,7,8,7,8,7,7,3,8,1,2,2,7,7,7,7,6,4,4,0,2,2,4,4,5,0,7,0,0,7,7,7,8,0,6,7,0,0,0,2,2,7, +4,4,7,0,7,7,7,6,0,0,4,4,7,4,4,0,6,4,7,7,4,0,4,0,4,4,0,1,0,3,6,7,4,7,4,4,6,6,4,5,6,5,4,4,4,0,7,4,6,4,4,0,0,5,4,0,7,8,7,2,2,0,1,3, +0,7,1,4,7,0,8,4,4,7,7,5,4,8,5,8,4,8,4,0,2,6,7,0,2,0,7,0,2,2,0,4,0,0,6,0,7,6,0,2,4,0,4,0,6,1,0,0,4,8,0,4,2,0,0,4,6,6,7,7,7,3,7,7, +7,6,2,0,4,7,4,4,7,2,3,7,4,5,6,6,6,7,0,0,4,7,4,0,4,6,0,0,4,4,3,0,7,3,4,6,5,7,2,4,4,4,4,0,4,4,6,7,0,8,0,4,4,0,0,0,0,6,6,4,4,0,4,2, +0,2,2,0,0,0,4,0,6,4,4,4,6,6,0,4,7,6,4,4,6,4,2,2,2,2,2,8,2,4,0,4,4,5,6,6,4,4,4,4,5,7,4,6,7,6,4,4,4,7,0,7,0,6,4,6,7,0,0,0,0,2,0,2, +0,2,0,0,2,0,4,4,4,4,4,6,4,0,4,4,4,0,6,4,4,0,4,4,4,6,4,4,0,0,4,4,1,3,0,0,2,2,2,0,7,0,6,0,4,6,4,1,7,4,0,0,4,0,5,4,4,4,0,7,0,4,0,4, +4,4,2,2,0,0,6,7,0,0,0,4,4,4,7,4,0,4,4,0,6,0,7,4,0,4,4,6,0,4,6,4,4,0,4,4,6,4,6,6,4,7,0,0,0,2,6,0,4,4,4,4,6,4,4,4,4,4,4,0,4,6,2,4, +4,4,4,4,0,4,0,6,0,4,6,6,6,0,4,0,5,4,4,0,0,4,0,0,2,4,0,2,0,2,4,0,0,0,4,4,0,4,0,4,4,7,0,6,6,0,0,7,0,4,4,0,2,0,0,4,0,4,4,0,4,4,4,4, +4,4,6,0,4,0,4,4,4,4,4,1,0,4,0,0,0,0,4,4,4,4,0,4,0,6,4,0,4,6,4,4,4,4,4,0,0,0,0,4,4,4,4,0,4,4,4,4,4,4,4,0,4,4,0,6,4,0,5,0,4,4,0,0, +4,0,4,4,0,2,4,4,0,4,0,4,1,6,0,4,0,4,4,0,4,4,4,0,4,0,0,2,0,7,7,4,4,7,6,1,0,8,7,7,7,2,0,6,0,0,7,2,4,8,8,7,7,0,0,7,0,4,0,1,0,7,7,4, +0,2,8,7,4,2,7,0,2,0,4,0,8,0,4,2,2,6,0,0,6,6,7,4,6,6,6,6,2,7,0,4,4,4,4,4,4,5,2,2,4,4,0,5,0,7,2,1,1,2,0,5,4,0,4,6,8,0,0,2,2,7,4,0, +4,4,2,4,6,7,0,0,0,0,4,4,7,4,4,4,0,0,0,4,6,7,0,4,4,4,0,6,4,4,2,4,4,7,6,5,0,0,4,4,4,4,4,4,4,4,4,4,4,0,7,8,8,4,6,0,7,5,2,7,7,7,5,2, +6,1,4,1,7,0,2,7,4,4,7,6,4,4,0,7,2,2,0,2,2,7,7,8,0,2,7,4,7,0,2,2,6,0,4,4,4,6,8,8,4,0,4,5,4,7,4,0,4,4,4,6,4,7,7,7,7,6,0,0,0,0,6,0, +0,4,4,1,4,4,4,4,4,7,7,4,4,0,4,0,0,6,6,7,4,4,6,4,4,4,4,6,4,4,4,5,5,4,4,4,0,4,4,4,6,4,2,0,4,0,4,4,0,4,4,4,2,0,7,7,0,0,7,1,1,0,6,7, +8,7,6,2,7,4,0,4,6,0,7,6,0,0,0,7,1,2,4,7,5,7,0,7,7,0,4,7,0,4,0,7,2,0,4,4,4,3,7,7,4,6,2,0,0,6,4,2,4,7,0,4,0,8,4,5,4,2,2,0,7,0,1,0, +6,0,0,4,4,0,4,0,4,4,5,4,4,0,4,4,2,0,2,8,4,2,6,6,6,4,0,0,0,6,0,0,0,2,7,0,6,4,3,4,0,7,7,5,7,6,0,4,7,8,0,6,4,0,4,6,4,7,0,2,7,4,4,4, +7,7,6,0,3,8,4,6,8,6,6,7,8,0,0,0,7,4,7,1,0,2,4,8,6,6,0,4,6,0,4,4,4,7,0,0,0,4,4,0,6,5,7,4,4,6,7,4,0,0,4,1,1,6,4,6,0,5,0,4,6,7,4,4, +4,4,0,8,2,7,2,0,0,6,0,0,7,7,0,6,6,6,6,0,4,4,6,7,7,7,0,0,1,6,4,0,7,6,0,4,6,4,0,2,4,0,6,8,0,6,6,4,0,6,0,4,4,7,0,4,6,0,4,6,0,7,2,6, +6,2,2,2,2,2,2,0,0,4,6,4,4,6,8,6,4,4,7,4,6,6,7,4,0,6,4,6,7,8,4,0,7,6,4,0,4,7,6,8,7,6,7,6,4,4,4,7,0,0,4,4,4,6,0,4,0,4,0,2,2,0,0,2, +4,6,6,6,4,4,4,4,0,4,0,8,4,6,0,0,7,0,7,4,4,7,0,0,4,6,6,4,0,6,4,4,0,0,7,6,6,5,4,6,7,7,6,6,6,7,4,7,7,0,4,2,0,1,2,0,2,2,2,2,2,2,0,6, +4,4,0,6,6,4,4,4,4,7,4,0,7,6,4,0,4,0,6,0,7,4,6,4,0,0,6,6,4,6,0,7,7,0,4,7,0,0,3,0,6,1,1,2,2,2,0,2,4,6,7,0,6,4,5,4,6,4,4,6,7,4,4,4, +4,4,0,4,4,8,6,5,4,4,6,6,4,4,4,4,0,7,0,4,4,6,0,0,6,4,7,4,7,6,0,7,6,4,4,1,4,0,6,2,3,2,0,2,2,2,2,2,7,4,4,8,6,4,6,4,4,7,6,4,4,0,4,4, +6,7,0,4,4,4,6,0,7,7,4,4,0,6,6,8,4,0,4,4,4,6,4,0,4,0,7,8,4,4,0,8,2,0,0,6,2,0,2,6,4,4,0,6,4,0,4,4,0,0,4,0,4,0,4,4,0,0,0,4,0,0,4,6, +4,4,0,0,4,4,4,4,7,4,6,5,7,4,7,4,0,2,4,4,4,7,4,0,4,0,4,4,0,6,4,4,0,4,7,5,0,4,4,7,7,0,4,4,4,0,5,0,7,4,4,4,4,6,5,6,0,4,4,0,4,0,5,0, +4,0,5,4,0,0,0,0,0,4,4,6,4,4,7,0,5,4,4,4,6,0,4,0,0,0,4,4,0,6,0,4,0,4,4,0,5,0,0,7,6,7,4,4,6,4,4,4,4,4,5,4,4,4,5,2,4,0,4,0,4,0,7,4, +0,4,6,4,0,4,4,5,6,4,6,6,4,4,0,0,1,2,2,0,2,0,4,0,4,0,0,0,0,0,4,4,4,0,0,4,4,4,6,4,4,4,4,4,0,4,4,0,4,4,5,0,0,6,4,4,5,4,4,4,4,4,4,6, +8,4,0,4,4,0,2,4,6,4,7,2,6,6,7,2,8,8,7,0,4,6,7,2,2,4,7,6,0,0,0,7,4,6,6,4,6,6,1,4,0,4,6,4,2,4,6,1,4,0,4,6,7,0,4,2,1,0,4,1,0,6,6,7, +8,6,4,6,0,0,4,7,6,7,4,8,2,8,6,0,4,6,6,8,6,1,0,0,7,4,4,6,4,0,4,0,4,4,4,6,4,4,6,2,0,2,2,2,2,6,7,6,6,5,4,6,4,0,7,4,0,7,4,4,6,4,8,7, +7,4,0,4,7,0,4,4,4,6,8,0,4,0,4,4,0,7,6,6,4,7,6,7,7,0,2,2,1,0,1,2,2,2,2,0,2,8,0,0,6,4,4,7,6,4,4,4,4,7,0,4,4,7,4,0,4,7,0,6,7,7,4,6, +0,0,6,0,6,3,6,6,6,6,6,4,6,7,6,4,7,4,7,7,6,0,6,6,7,7,6,7,6,1,0,2,1,1,2,2,0,2,2,2,2,2,0,4,7,7,6,6,4,6,4,7,6,4,7,7,4,4,4,4,6,6,7,6, +4,8,6,4,0,0,0,8,6,7,0,4,4,4,6,4,4,7,0,4,4,4,6,0,0,1,2,2,0,2,2,3,2,2,2,2,2,2,0,0,7,4,6,7,4,4,0,7,0,0,6,4,4,0,4,0,4,6,7,4,4,7,4,1, +4,4,6,6,4,6,6,4,4,7,4,6,7,6,6,6,6,4,0,0,4,7,4,5,4,4,4,0,1,0,2,2,0,2,2,2,0,4,0,7,5,6,0,4,0,6,7,4,4,6,5,0,0,4,6,7,4,0,7,7,4,4,0,0, +6,4,6,5,4,4,0,6,7,6,6,0,7,4,6,6,6,4,7,0,4,0,6,4,7,4,0,5,4,4,4,4,7,5,7,6,4,8,0,7,8,7,7,4,7,6,6,4,6,4,1,2,3,0,0,2,2,0,2,1,2,4,6,0, +4,0,4,5,6,4,6,4,4,6,4,4,4,6,6,7,7,0,6,4,0,4,7,4,4,4,5,5,4,4,2,4,6,7,0,6,4,4,0,4,4,6,6,4,0,6,5,4,4,4,6,4,7,4,6,0,2,4,1,0,0,2,0,2, +2,2,2,0,0,2,4,0,0,4,4,6,6,5,4,6,6,0,4,6,6,4,4,4,4,0,4,6,6,0,6,4,6,6,4,4,4,4,6,4,4,0,6,4,7,7,4,4,4,0,0,4,6,4,4,4,0,4,3,0,0,6,5,6, +4,6,1,4,2,2,2,2,2,0,2,4,0,4,4,0,4,4,6,4,6,0,0,0,7,4,0,4,4,4,6,4,4,4,0,0,0,4,4,6,4,7,0,4,4,6,4,4,4,0,4,4,4,4,4,4,7,6,7,4,0,4,0,4, +0,0,6,1,2,6,0,6,4,4,4,4,4,0,0,4,4,2,7,4,0,6,6,4,4,6,5,0,4,4,7,4,4,0,4,4,7,4,4,0,0,6,0,5,6,7,7,0,7,4,5,4,0,2,0,2,2,1,2,4,6,4,4,4, +2,5,6,4,4,6,0,4,0,4,4,4,0,7,6,4,6,0,6,0,4,0,4,6,6,4,4,4,0,0,2,0,4,0,7,4,6,0,7,0,4,4,0,4,5,4,0,4,4,4,4,4,4,0,0,4,4,0,4,4,4,4,5,4, +6,4,0,4,4,0,4,4,0,6,0,0,4,4,4,4,4,0,2,4,4,4,4,4,6,4,0,4,4,4,0,0,4,4,4,4,4,6,4,0,0,4,4,6,4,4,6,7,0,0,4,4,2,2,8,4,6,7,0,7,0,4,8,8, +0,4,0,4,0,7,4,0,6,6,4,0,2,0,0,7,0,7,0,4,4,6,6,5,4,8,0,2,4,6,6,0,0,0,7,7,4,4,7,4,4,0,4,2,7,0,0,4,8,0,6,4,7,5,4,7,4,4,4,4,4,0,0,4, +4,4,4,4,4,0,0,8,0,1,0,2,7,0,1,0,7,7,4,2,4,4,4,7,0,8,0,6,7,4,4,6,4,7,4,0,7,7,0,7,4,2,4,6,5,3,4,7,8,0,4,4,0,0,4,4,4,8,0,4,7,7,0,4, +0,6,4,6,6,7,6,0,0,0,0,7,6,4,6,7,4,0,6,4,0,0,6,7,0,0,4,4,0,4,4,4,8,4,7,0,0,8,7,3,7,8,0,0,7,7,6,6,6,6,4,4,4,4,8,2,0,0,7,4,4,4,0,0, +6,0,7,6,4,4,7,5,4,4,6,4,7,4,8,7,4,4,4,7,7,6,0,0,0,2,0,0,4,6,0,8,7,4,4,0,4,7,4,7,7,0,0,4,0,7,4,8,0,6,4,4,6,6,6,0,0,4,4,0,3,0,2,0, +0,6,8,7,0,4,0,4,0,4,4,3,6,0,0,6,0,4,7,2,2,2,2,2,0,4,8,4,4,4,4,6,0,6,4,0,6,4,7,0,6,1,4,0,4,0,7,7,6,4,4,0,7,0,7,6,4,4,7,4,4,0,6,0, +4,1,2,0,6,0,4,7,4,4,4,4,6,4,0,0,4,7,0,0,4,4,7,7,4,4,0,0,0,6,0,4,4,4,5,0,0,0,1,2,6,4,4,5,0,0,7,4,4,4,4,0,7,4,0,4,4,6,4,4,0,4,6,0, +4,0,0,0,4,0,4,5,4,4,4,4,4,0,0,4,0,0,4,0,0,0,4,0,4,7,4,6,7,7,4,0,4,0,0,4,4,0,6,0,4,6,0,4,4,4,4,0,6,0,7,7,8,0,4,6,5,7,0,0,6,1,7,7, +8,4,0,8,4,4,0,0,8,8,6,8,0,8,0,4,6,0,4,4,7,7,0,7,4,0,0,8,0,8,0,8,4,4,4,4,0,0,6,4,7,0,8,7,8,7,4,2,0,7,0,4,7,6,0,0,4,4,8,4,4,7,4,4, +2,0,2,2,0,4,6,4,6,7,0,4,6,4,8,7,7,7,0,7,0,4,7,4,0,4,4,0,7,4,6,7,6,3,1,0,0,8,0,0,2,2,6,0,4,7,0,7,6,8,6,6,4,7,4,7,4,4,4,4,6,4,7,7, +0,6,0,4,6,4,4,7,0,6,0,6,4,4,4,0,7,4,4,4,4,7,0,7,6,4,7,0,8,7,2,4,1,0,3,2,0,2,0,2,2,0,2,2,0,2,4,7,6,0,4,6,4,6,7,6,6,4,4,4,0,0,0,0, +4,7,4,6,7,4,4,0,4,4,5,4,4,4,4,7,7,7,6,7,7,0,0,0,7,6,7,4,6,6,6,4,2,0,6,4,4,6,4,4,0,6,4,4,6,4,4,6,6,7,4,7,7,5,4,4,4,0,4,1,4,2,1,7, +2,0,1,1,1,2,0,2,2,2,2,2,2,0,2,2,0,2,4,7,4,0,7,7,4,0,4,0,4,6,0,4,4,8,1,0,0,4,4,0,4,6,7,4,0,4,4,4,0,4,6,6,5,4,0,8,7,7,4,4,8,7,2,0, +6,7,7,7,6,6,6,0,7,6,2,4,6,4,6,4,7,7,0,7,7,2,0,0,0,0,0,0,1,1,0,1,2,2,2,2,2,2,2,3,2,2,0,6,0,4,4,4,0,4,4,0,6,4,7,6,4,4,0,4,4,4,4,4, +4,7,4,6,0,7,6,4,0,4,4,4,4,0,0,6,4,4,4,7,4,0,4,7,0,0,0,4,4,4,0,4,4,4,7,4,4,0,2,7,7,4,4,4,4,6,4,7,7,5,4,6,4,6,1,0,0,0,0,0,1,0,0,0, +2,0,2,0,5,0,4,4,4,7,0,7,4,6,4,0,4,4,7,4,4,6,4,4,6,0,7,0,4,4,4,5,6,4,0,6,4,0,4,4,4,4,4,4,4,0,7,4,6,6,5,4,4,6,4,4,4,6,7,4,6,4,0,0, +1,2,0,0,4,7,4,4,4,0,4,7,4,7,7,4,6,4,6,4,4,4,0,4,0,1,0,1,1,0,0,2,2,0,0,0,2,4,0,0,0,0,4,0,0,2,0,4,6,0,4,4,7,4,0,4,4,6,0,4,4,6,0,7, +4,4,6,0,4,4,0,0,4,4,5,4,4,0,4,4,0,4,4,5,6,1,4,6,4,4,7,4,4,6,6,4,7,0,5,6,0,4,6,0,4,4,4,6,4,5,6,5,4,6,0,1,4,5,4,6,6,6,4,4,3,8,0,0, +2,0,7,4,2,0,6,2,2,2,1,0,0,6,5,0,4,4,0,4,7,6,4,4,2,4,4,7,6,0,4,0,4,4,0,4,4,4,4,6,6,4,4,6,4,6,4,4,4,6,0,4,6,4,4,6,0,4,0,6,4,4,4,4, +0,6,4,4,4,0,4,0,0,4,6,5,7,5,6,4,6,0,0,4,6,0,0,0,1,5,0,2,0,0,0,2,2,0,4,0,4,4,4,4,4,0,0,4,4,6,4,0,0,4,6,4,4,0,4,4,4,0,0,5,4,7,4,6, +4,4,4,0,4,4,4,4,4,4,6,1,0,4,0,4,0,4,0,4,4,4,4,7,6,5,0,4,0,4,4,7,4,7,0,8,0,4,4,4,6,1,3,1,0,0,0,2,0,2,4,0,4,7,0,0,4,5,5,4,4,7,6,4, +4,4,0,0,6,0,4,6,4,4,0,5,0,4,4,4,6,4,0,0,4,4,4,0,7,6,4,6,0,4,4,8,4,7,4,0,4,2,4,4,4,4,4,4,0,4,0,4,0,2,0,0,0,0,4,0,0,2,0,0,2,0,4,5, +7,4,0,4,6,4,0,4,0,4,0,0,4,4,7,0,6,6,4,4,4,4,4,6,0,0,4,4,4,0,4,4,6,4,4,4,4,4,4,0,4,2,0,2,6,4,4,4,0,0,0,4,4,0,4,4,4,4,4,4,0,4,0,0, +0,0,4,4,0,4,4,4,0,0,0,4,4,4,0,4,4,4,0,5,0,0,0,0,0,4,4,5,4,4,4,4,4,4,0,0,0,4,0,4,5,0,4,4,4,0,4,4,4,4,0,4,0,0,0,0,4,4,4,4,4,0,0,0, +4,0,4,4,5,0,0,0,4,4,4,4,0,0,0,4,4,4,4,4,0,0,4,4,4,4,4,0,0,1,4,0,7,8,2,7,2,4,0,3,4,0,0,0,4,4,0,4,0,4,7,4,4,0,4,6,4,6,7,4,4,5,7,4, +0,4,4,6,0,4,6,6,4,6,4,4,8,4,5,0,4,4,0,1,4,4,4,0,0,6,0,4,4,0,4,4,4,8,7,8,8,6,8,6,0,1,7,0,0,4,0,1,0,0,4,1,1,0,4,4,4,6,0,8,2,0,4,4, +4,2,6,6,6,0,7,2,4,7,7,3,4,6,4,4,0,4,2,2,4,4,7,4,4,4,2,6,0,0,4,4,4,2,4,4,4,4,4,4,0,0,6,4,0,4,4,4,4,0,4,6,3,7,4,6,0,0,5,1,4,4,0,7, +4,2,2,4,4,7,4,4,4,4,4,6,4,7,1,8,4,0,7,6,8,2,6,6,5,2,4,7,0,0,4,0,4,2,4,4,4,0,4,4,4,0,2,6,4,0,0,6,4,0,4,6,0,2,0,4,4,6,0,4,4,6,4,4, +4,4,4,4,4,6,6,2,4,4,0,4,4,6,0,7,6,8,0,6,6,6,6,8,6,6,6,6,0,4,0,6,4,6,2,8,6,4,6,6,6,2,6,4,4,0,6,6,6,0,2,4,8,2,4,1,7,0,0,4,0,2,5,4, +7,7,8,4,0,0,6,2,0,2,6,4,4,4,5,4,7,0,4,0,6,6,0,7,0,4,1,6,6,7,6,7,7,6,0,0,2,4,4,4,6,6,6,4,0,4,0,4,7,4,7,4,6,0,6,0,4,2,5,4,0,7,6,0, +4,6,6,7,4,6,6,4,7,6,4,4,7,4,4,6,6,0,8,7,6,4,5,0,4,7,4,6,4,4,0,2,0,8,1,2,2,2,2,2,0,2,2,7,4,6,6,0,4,6,2,7,4,0,4,4,7,7,4,7,7,6,6,7, +4,5,4,4,6,6,4,4,0,7,7,0,7,4,0,4,6,4,4,6,6,8,6,6,0,4,4,6,4,4,6,0,6,7,7,7,0,7,0,4,7,4,2,6,4,4,6,6,7,6,4,7,0,6,2,2,2,4,2,2,2,2,2,0, +4,6,0,4,6,0,0,6,4,4,4,7,6,4,6,4,4,4,6,0,0,0,4,7,4,6,6,7,0,4,7,4,4,0,4,0,0,7,0,6,4,5,7,6,4,4,6,4,4,6,7,6,4,6,4,4,4,6,4,8,6,6,7,4, +4,8,4,2,1,3,2,2,2,0,2,2,0,2,2,2,0,2,2,2,2,0,0,0,4,6,6,0,3,0,6,4,6,4,4,6,4,4,7,4,4,7,7,0,5,4,7,6,4,0,0,0,7,4,4,8,7,4,4,4,6,4,4,4, +4,0,6,4,4,6,4,4,8,6,4,4,7,4,6,0,4,6,4,6,6,6,0,4,4,1,0,3,1,2,2,2,2,2,0,2,2,0,2,2,2,2,6,6,4,0,6,7,0,0,7,4,4,6,0,4,6,0,4,4,5,4,4,6, +7,0,4,0,6,6,6,6,4,4,4,7,6,4,0,4,4,7,4,4,4,4,6,4,7,6,4,4,4,6,6,4,6,7,4,4,6,4,6,0,4,4,4,7,6,4,6,4,4,7,4,7,4,5,4,7,0,6,4,7,6,4,0,0, +4,0,0,4,0,8,0,1,1,1,2,1,2,2,2,0,2,2,0,1,2,0,2,2,0,4,7,5,4,6,0,4,7,7,4,6,6,7,5,4,4,3,0,6,5,6,4,7,4,4,6,4,6,4,4,0,6,4,6,4,4,4,4,4, +4,4,0,6,6,4,4,4,0,4,5,4,0,6,6,0,0,4,4,6,6,4,7,0,7,0,4,7,4,4,4,6,4,4,4,4,4,4,4,5,4,4,0,6,0,0,6,5,0,4,4,4,4,0,0,4,4,4,0,0,0,0,3,3, +1,0,0,2,0,2,2,0,4,6,0,0,1,4,4,6,7,0,4,4,4,0,5,4,6,4,0,4,7,5,4,6,4,4,7,4,4,6,4,6,0,0,6,4,0,0,4,6,4,6,6,4,6,0,7,6,0,4,7,2,4,6,4,4, +4,6,6,4,4,5,4,6,4,4,4,7,4,4,4,6,4,7,4,6,6,6,4,2,4,0,2,0,4,1,3,2,2,2,2,0,2,2,2,0,2,2,0,4,4,4,4,4,0,4,4,0,7,4,4,4,4,6,0,4,4,4,4,4, +4,5,7,4,0,4,7,4,4,7,4,0,0,4,4,7,0,0,4,6,8,7,0,0,4,4,4,0,4,0,4,4,7,4,5,5,2,4,4,4,0,6,6,7,5,6,4,6,4,6,4,6,0,4,6,4,5,4,4,4,4,0,6,4, +4,4,0,4,0,1,2,2,0,0,0,2,0,2,4,4,4,4,4,4,5,4,0,4,6,0,4,4,3,4,6,5,4,4,6,4,5,0,6,4,0,4,4,4,4,6,7,4,5,0,6,4,2,0,4,4,6,0,6,4,6,4,4,4, +4,0,4,0,7,4,4,4,6,6,0,4,6,6,6,0,4,0,4,4,4,4,4,5,0,0,0,0,2,0,4,0,4,6,4,4,4,4,4,6,4,4,4,4,4,4,4,4,4,5,4,6,4,0,6,0,4,6,4,0,4,4,0,4, +7,5,6,5,4,0,4,4,4,6,4,4,4,4,4,4,0,2,2,0,4,4,0,0,4,0,0,4,4,4,6,4,7,7,4,4,4,0,4,4,4,4,0,5,4,4,6,7,4,4,4,0,4,0,0,4,0,0,4,4,4,0,4,4, +4,4,0,0,4,4,4,4,0,4,4,4,4,4,4,4,0,6,0,0,4,5,4,4,4,4,6,6,4,4,1,4,4,4,4,6,4,0,5,5,4,4,4,4,1,0,0,4,4,4,4,4,4,6,0,4,4,6,4,4,4,0,4,4, +4,4,4,4,4,4,4,0,4,4,4,0,6,0,0,2,0,4,4,0,0,4,4,4,5,0,4,4,0,4,6,4,4,4,4,4,4,4,4,0,4,4,4,7,2,2,0,3,7,4,0,0,4,2,6,0,7,0,4,0,7,5,2,2, +2,0,4,4,4,6,4,0,0,3,7,0,0,0,7,0,0,4,6,4,6,6,6,0,4,6,4,0,2,2,0,4,0,4,0,0,4,0,0,0,0,4,0,6,6,7,6,6,4,6,0,6,4,4,0,6,6,8,8,2,2,2,4,0, +2,2,2,2,0,4,4,4,7,0,6,4,0,4,4,5,0,4,4,4,4,0,0,4,6,6,4,2,4,4,4,2,4,4,4,0,6,0,2,2,2,2,0,2,2,2,0,6,4,0,0,4,4,0,4,6,4,7,4,4,4,6,0,4, +4,0,4,0,4,0,4,0,0,6,6,4,4,4,4,0,6,0,0,6,1,2,2,4,2,6,7,4,4,0,4,4,4,8,4,4,0,4,7,0,4,0,0,0,0,0,4,6,6,6,4,0,0,0,8,0,0,0,4,0,1,0,0,0, +0,4,4,4,4,2,4,4,0,5,2,4,6,4,7,0,4,0,4,4,4,0,4,0,4,5,4,0,6,4,6,4,4,4,4,4,7,4,6,8,6,5,4,0,4,0,7,0,4,0,6,2,0,0,0,0,6,0,2,4,0,7,0,0, +4,4,4,0,6,4,4,4,0,4,7,0,0,0,0,6,4,0,4,0,7,0,0,4,2,6,4,4,4,4,4,7,6,4,0,0,4,4,0,4,6,4,4,0,6,0,0,4,4,5,4,6,0,6,0,0,4,6,0,0,4,4,4,4, +4,4,4,7,4,4,4,4,5,4,4,4,0,0,6,4,5,0,4,0,6,7,4,0,4,4,4,4,4,0,0,4,6,4,4,0,4,7,5,6,4,0,0,0,4,5,6,0,4,4,4,0,4,0,0,0,4,6,0,4,4,4,4,4, +0,4,4,4,0,4,7,4,0,0,4,0,4,4,0,0,4,0,0,4,4,0,0,0,0,4,4,4,0,6,4,4,0,0,4,4,0,0,4,4,6,4,7,0,6,4,0,0,6,8,0,0,0,7,7,2,6,6,5,6,0,7,5,6, +0,4,4,0,4,0,4,8,8,4,4,4,7,2,0,4,0,0,7,4,0,0,6,0,4,7,4,7,0,7,4,7,4,7,7,4,0,0,2,7,0,8,4,0,4,0,6,6,4,0,7,4,4,2,4,4,4,8,2,4,4,5,6,6, +7,6,0,0,6,4,4,0,4,4,2,6,4,6,4,2,4,4,6,4,0,4,4,4,4,0,4,4,0,4,4,2,1,4,4,4,4,4,4,4,4,4,4,0,7,2,4,7,6,0,0,0,6,4,3,2,2,2,4,0,0,4,0,4, +4,6,7,6,6,4,4,0,2,4,4,4,4,2,6,0,7,4,6,0,4,0,4,7,4,7,4,1,4,0,2,4,6,6,0,4,4,0,4,0,6,7,4,4,3,3,2,2,2,2,2,6,6,0,4,6,7,4,6,6,7,5,4,4, +4,6,0,2,0,0,0,4,4,0,6,4,4,0,2,4,0,4,4,6,0,2,6,6,4,4,0,7,6,6,6,1,0,2,6,4,0,6,4,4,0,6,3,3,2,4,3,0,4,6,4,4,6,4,5,7,2,2,4,4,4,0,6,7, +4,0,4,4,5,5,0,0,0,4,4,0,4,6,4,0,6,4,6,0,0,0,0,6,4,4,4,4,0,4,4,4,6,4,4,1,0,4,4,4,4,4,4,4,6,2,4,6,4,0,5,4,0,4,4,4,4,0,4,4,4,4,6,4, +4,4,4,4,7,4,4,7,4,7,4,7,0,0,6,0,0,2,4,4,4,4,7,4,0,0,0,2,0,4,0,6,4,4,6,0,4,4,4,0,4,8,0,6,4,4,2,2,8,0,7,6,4,4,4,6,4,4,2,6,4,0,4,4, +6,0,7,4,0,4,4,4,6,2,7,4,4,7,0,2,2,2,4,4,4,0,4,4,4,6,4,4,4,4,6,0,7,0,0,4,0,6,0,6,4,6,5,4,0,7,4,0,0,0,2,0,4,4,4,0,4,0,4,0,4,4,8,4, +4,4,0,8,4,6,8,4,4,7,6,4,4,4,0,2,2,0,0,0,0,0,4,0,0,0,6,6,0,4,0,0,4,4,7,4,4,6,6,0,6,4,6,4,6,4,6,4,6,4,4,7,7,7,7,0,0,0,4,0,2,0,0,4, +4,6,4,0,4,0,4,0,0,0,4,4,0,4,4,4,4,4,4,0,4,6,0,6,0,6,7,7,6,0,7,6,1,0,4,4,4,0,0,4,0,4,4,0,0,6,4,4,6,4,4,5,0,4,2,2,0,4,0,0,4,4,6,4, +6,6,0,7,0,4,4,6,4,4,4,6,0,0,2,0,6,0,4,0,4,4,0,4,4,0,4,0,6,0,6,4,4,4,0,4,0,4,4,6,6,6,4,4,0,4,0,4,5,4,4,0,0,4,4,4,4,0,6,4,0,5,4,4, +4,4,4,4,0,4,0,0,0,0,4,4,0,0,0,4,0,0,2,0,4,4,4,4,4,4,4,4,7,4,6,4,6,4,7,6,6,4,7,0,4,0,0,0,4,0,6,2,0,0,0,0,6,4,7,6,0,0,0,4,0,4,4,6, +4,0,4,4,6,0,0,4,4,0,0,4,4,6,0,6,4,7,4,6,4,4,4,4,7,2,7,0,7,4,0,8,0,4,4,1,0,7,4,0,8,6,4,7,6,6,4,6,8,8,7,7,0,8,0,8,6,4,7,8,0,4,6,4, +6,0,0,0,0,2,0,4,6,0,0,6,8,0,6,7,0,1,0,0,7,0,0,0,0,7,6,6,7,5,0,4,1,0,5,4,4,7,7,0,0,0,7,4,4,0,0,4,5,0,2,1,2,0,4,5,6,6,0,0,0,4,4,7, +4,0,0,2,4,0,6,4,0,0,4,7,4,0,1,7,4,7,2,0,6,4,2,2,4,6,6,0,0,6,0,2,2,2,4,6,6,6,0,4,0,0,4,7,2,0,2,2,6,2,7,6,2,6,4,0,6,7,4,4,6,6,7,4, +4,4,6,2,6,7,0,7,2,2,6,4,4,6,4,4,4,4,6,0,7,7,2,4,7,4,4,7,0,4,6,4,4,4,7,6,6,0,6,6,2,1,2,2,0,4,0,4,6,6,4,4,7,4,4,4,4,6,4,4,6,4,4,6, +6,6,0,6,0,2,0,0,4,4,6,4,6,4,0,4,6,4,0,4,4,6,4,2,2,6,4,6,4,4,4,6,6,4,6,4,6,6,6,4,4,6,2,2,0,6,0,4,6,4,4,6,6,6,0,0,6,0,4,0,6,4,2,2, +2,0,5,6,0,0,4,4,4,4,0,0,7,2,0,0,4,0,5,4,2,0,7,4,4,4,4,0,6,0,2,4,4,4,4,2,4,4,0,0,0,4,4,2,4,4,4,2,4,4,4,0,0,4,0,0,6,4,1,7,8,8,7,4, +0,4,6,0,8,0,7,7,6,4,4,6,0,0,6,4,1,2,4,6,0,4,6,0,0,6,4,4,4,4,4,0,0,0,0,0,6,0,4,0,0,0,4,4,0,4,7,4,4,2,2,0,6,4,0,0,4,0,4,4,0,4,4,7, +0,0,6,5,4,6,7,0,7,4,7,0,0,6,6,2,2,2,6,4,6,0,2,3,2,0,4,7,4,4,4,7,0,4,0,5,5,6,4,4,0,4,4,0,4,4,8,6,4,6,7,4,8,4,0,4,8,6,4,4,6,0,7,0, +0,7,0,4,6,4,0,6,6,7,4,8,1,2,0,0,4,4,4,4,0,4,0,0,0,6,6,4,0,4,0,8,7,0,6,4,0,4,2,4,6,6,0,0,0,6,0,6,0,4,4,4,4,6,6,6,6,4,7,4,8,4,4,0, +8,2,0,2,4,4,4,6,0,0,4,4,4,4,4,4,2,2,4,0,4,4,4,0,0,4,6,6,4,0,4,4,4,8,6,7,0,6,7,4,6,4,4,6,6,4,4,4,0,0,0,0,0,0,0,0,0,6,0,0,4,6,6,6, +6,4,4,4,6,6,0,4,4,4,0,4,6,6,6,4,0,6,2,0,0,4,0,4,0,4,4,4,4,4,4,6,6,4,6,4,0,7,0,6,4,2,6,4,7,5,0,0,6,4,4,7,4,6,4,4,0,0,4,6,4,6,0,6, +0,0,4,0,4,0,0,4,0,4,4,0,4,6,4,4,4,0,0,4,4,4,0,6,4,4,4,7,6,0,4,0,4,0,7,6,0,8,0,7,4,7,0,2,6,7,6,5,4,4,4,7,0,0,2,4,6,4,4,4,4,6,2,2, +2,2,7,4,0,4,4,0,0,6,0,0,6,6,4,4,4,6,6,4,7,1,2,2,2,0,2,0,2,6,0,6,4,4,4,6,0,7,7,7,4,6,4,4,6,6,4,4,6,4,5,4,7,4,0,6,6,2,3,2,2,0,2,1, +2,0,0,4,0,6,0,2,4,4,4,0,6,4,6,0,6,0,6,0,0,2,2,2,0,0,0,0,4,7,4,0,4,0,4,0,4,4,0,0,4,4,6,7,7,6,6,5,4,4,1,0,0,0,0,2,0,4,0,4,6,0,4,4, +4,5,0,4,4,4,4,7,0,6,0,0,6,3,6,4,0,7,0,7,4,5,4,7,6,4,6,2,2,0,4,6,4,6,4,6,4,6,0,7,4,5,4,4,4,4,0,0,6,2,6,6,6,0,0,0,0,2,5,4,4,0,6,0, +0,7,0,4,4,6,0,0,4,6,6,6,4,4,4,4,7,4,0,0,6,6,0,0,0,2,4,4,0,4,4,4,4,4,4,4,0,4,0,4,7,4,4,0,6,4,0,5,0,0,6,4,6,0,0,6,0,4,6,4,4,4,4,4, +0,7,4,0,4,6,0,0,0,4,0,0,4,0,5,0,4,4,4,6,4,0,0,4,0,4,0,4,4,4,6,0,0,4,0,4,6,4,4,4,4,4,4,4,4,4,0,4,0,4,0,0,2,4,0,0,4,4,8,2,3,4,8,4, +6,7,4,0,4,4,6,5,7,7,4,4,4,0,0,4,5,4,4,6,4,4,7,6,0,0,6,6,6,7,8,6,6,4,3,4,4,7,0,6,7,4,4,4,0,7,0,2,4,0,4,4,4,0,0,3,2,4,6,0,4,4,0,4, +2,7,4,0,3,3,0,0,4,0,6,4,0,5,5,8,4,0,4,4,4,4,4,4,4,0,6,4,4,0,0,0,4,4,4,0,4,0,5,6,4,0,4,4,4,4,4,0,5,4,4,6,4,0,4,4,4,6,6,6,0,7,7,5, +7,8,0,2,0,4,2,0,4,6,0,7,0,8,4,4,0,7,7,0,0,6,4,0,7,0,0,0,4,4,4,7,4,0,0,6,7,0,7,6,0,7,4,6,4,6,4,2,3,0,0,0,0,0,4,4,6,0,4,7,0,2,0,0, +7,0,6,6,4,4,2,0,4,0,4,7,4,6,3,0,4,4,4,4,7,4,0,7,4,4,7,4,5,0,6,4,6,0,4,2,0,0,4,0,4,0,0,4,0,0,5,4,4,4,1,2,0,0,0,6,0,6,0,6,7,7,0,7, +5,0,1,0,4,0,7,4,4,0,4,4,4,5,5,1,1,2,0,0,4,0,4,6,4,0,4,4,4,0,0,4,4,4,4,1,0,0,0,4,4,4,0,5,4,4,4,0,6,4,0,0,7,4,7,2,6,6,8,4,0,0,4,7, +6,7,0,3,7,4,6,4,6,4,4,4,4,2,0,4,4,2,7,1,4,6,6,6,6,4,0,0,2,2,4,7,6,0,4,4,0,2,2,0,6,4,5,4,6,2,5,5,0,4,4,6,4,4,4,4,4,0,5,0,0,0,4,6, +4,4,0,1,4,4,0,4,0,0,4,8,0,0,0,0,0,4,0,0,0,0,2,0,4,6,0,0,1,0,2,6,7,0,0,7,4,7,6,0,0,0,1,4,0,6,4,7,0,0,0,0,0,0,5,4,0,7,7,4,0,6,0,7, +4,4,0,2,6,4,6,0,7,4,6,2,0,0,4,6,4,8,4,4,2,2,0,0,4,6,4,7,0,4,6,0,7,0,4,4,6,7,7,0,6,0,6,6,8,4,6,4,4,6,4,6,4,4,0,0,6,1,2,4,2,0,2,0, +4,0,0,0,4,6,5,6,5,8,4,7,6,4,4,7,6,7,7,0,7,0,7,0,6,0,2,2,0,2,0,0,6,4,2,4,4,4,4,4,0,4,0,0,0,4,6,0,4,6,6,4,4,6,4,6,4,2,0,2,0,0,2,0, +2,0,4,0,4,6,1,5,4,0,4,4,4,6,4,4,6,4,0,0,7,7,4,7,4,4,0,4,6,6,0,0,4,7,2,0,4,0,2,2,2,2,3,2,6,7,0,4,0,7,0,0,6,4,0,4,7,4,0,0,0,0,4,0, +5,6,0,0,5,0,6,7,0,5,4,4,6,0,4,0,0,2,0,2,4,4,0,0,4,6,6,4,4,6,4,4,5,6,4,4,5,6,6,4,4,4,6,4,0,1,2,0,4,6,4,4,4,0,0,6,4,4,0,4,2,4,6,4, +4,4,4,4,0,4,4,6,0,4,4,6,6,4,4,6,4,4,0,0,0,0,2,0,0,4,0,0,4,4,4,6,4,5,4,4,0,4,6,6,4,4,6,4,4,4,0,0,4,0,0,4,0,0,0,4,7,4,0,4,4,4,5,7, +6,2,0,4,0,4,0,4,4,4,4,0,4,7,0,0,4,0,0,4,4,0,0,4,0,4,4,4,4,0,0,4,4,0,0,4,4,4,4,4,0,4,4,4,4,0,4,4,0,0,4,7,2,4,0,0,4,4,4,2,2,6,1,4, +4,5,1,0,4,4,0,0,4,7,4,1,4,5,0,0,0,6,7,0,4,7,4,7,7,0,0,1,2,2,6,7,0,4,6,4,2,7,0,5,4,0,2,0,0,0,2,4,0,6,6,6,4,4,0,0,0,6,4,4,6,6,7,4, +0,2,0,0,0,6,0,2,6,0,7,4,6,2,1,0,4,4,4,0,4,6,7,6,0,6,0,0,6,4,5,7,7,0,4,0,0,0,0,5,6,0,4,0,4,0,4,6,4,4,4,0,4,0,4,4,7,0,0,8,0,4,5,0, +5,4,4,4,5,5,4,4,4,4,6,5,4,5,4,4,5,4,4,4,5,4,4,5,5,5,5,5,4,4,4,4,7,5,7,0,0,0,0,2,4,4,0,7,5,0,4,7,5,4,4,5,0,4,4,0,4,5,5,4,4,4,4,4, +4,4,5,5,5,4,4,4,0,0,4,0,1,0,4,4,8,4,4,4,4,4,4,0,4,0,0,4,4,0,5,4,0,5,5,4,0,0,5,4,4,4,4,4,0,4,6,4,4,5,4,4,0,1,1,2,0,5,4,4,4,4,0,4, +4,4,0,4,4,4,4,0,4,0,0,0,4,4,4,4,0,4,4,4,4,0,4,0,0,1,1,0,5,4,4,4,4,4,4,4,0,0,6,4,0,4,4,0,5,5,6,4,4,5,5,0,5,4,0,4,4,4,4,5,4,4,5,5, +4,4,4,0,4,4,4,4,0,4,5,5,4,0,4,1,0,1,5,0,0,0,0,4,4,4,5,4,0,4,4,4,5,4,0,4,0,0,4,4,5,5,4,0,5,0,4,5,4,4,4,4,5,0,4,4,0,4,4,4,0,0,0,0, +0,1,0,4,1,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,4,5,4,4,5,4,4,4,4,4,4,4,1,0,0,4,4,5,0,4,5,4,4,4,4,4,4,4,4,4,4,4,4,6,4,4,5,4, +4,7,4,4,4,4,4,6,4,4,1,1,4,1,0,0,4,4,4,4,5,5,4,4,4,4,4,0,4,0,4,4,4,4,4,4,0,0,0,0,4,4,4,4,0,5,0,4,5,0,4,4,4,4,4,4,4,4,4,4,4,4,4,0, +4,4,7,0,0,0,4,4,4,0,4,4,4,4,0,5,0,4,0,4,4,4,4,4,4,4,4,6,4,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,0,2,2,2,2,2,2,0,0,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,8,2,2,2,2,2,2,0,2,2,8,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,7,0,6,4,6,0,0,4,4,4, +0,0,2,4,6,6,0,0,4,0,4,4,4,4,0,4,6,8,0,0,6,6,0,2,6,0,2,4,4,4,4,6,4,6,2,4,0,4,4,4,6,6,7,5,4,4,7,0,5,2,7,4,2,5,4,5,0,6,4,4,4,4,6,4, +0,2,0,4,0,5,4,4,4,4,7,4,6,4,8,0,0,4,4,0,6,4,4,0,0,0,6,4,4,6,6,2,4,6,4,0,7,4,4,6,5,5,0,0,4,4,0,6,6,4,6,4,0,4,4,4,6,6,4,4,6,7,4,6, +4,7,4,4,0,6,0,4,0,4,6,4,7,4,6,4,4,4,5,0,6,6,0,4,2,0,0,4,4,0,4,6,7,6,4,4,0,6,6,0,4,6,4,5,4,4,6,4,7,6,4,6,4,4,0,4,4,4,0,7,7,4,4,4, +7,8,0,8,6,8,6,4,0,0,0,6,8,6,4,4,7,4,6,0,6,7,6,7,6,6,0,4,6,0,4,4,2,4,2,0,4,2,6,2,6,6,6,0,4,0,0,0,4,2,0,7,4,6,7,6,2,4,0,2,0,7,4,6, +0,0,2,6,0,0,6,4,4,0,6,2,2,2,0,4,4,4,6,0,2,0,5,0,6,0,6,0,4,4,8,0,0,1,0,0,4,0,0,4,0,2,2,0,4,0,0,5,4,6,4,4,1,4,4,5,4,4,0,0,0,4,5,6, +2,0,4,2,6,4,6,7,0,7,4,7,7,0,0,4,4,0,4,6,0,4,7,0,7,4,6,6,6,7,0,2,2,7,7,4,2,7,0,0,0,7,7,6,0,6,6,7,0,7,7,0,7,4,0,2,4,0,7,0,2,2,2,2, +2,2,6,7,6,4,3,4,4,4,4,0,7,6,7,4,4,4,0,0,4,4,6,6,4,6,6,6,6,6,7,0,4,7,0,4,7,6,4,2,2,2,2,2,2,6,0,6,6,6,6,6,7,4,2,0,7,4,6,4,6,8,4,0, +4,4,7,0,0,5,7,1,5,2,7,0,0,2,2,2,2,2,2,2,2,4,6,0,6,4,3,4,0,4,6,4,0,4,4,4,4,4,0,4,0,4,0,4,6,4,0,6,4,3,2,1,0,0,2,0,2,5,4,0,0,4,6,0, +0,0,0,4,4,0,6,4,2,0,6,6,6,4,5,4,7,6,4,6,7,7,0,0,0,2,2,4,4,0,4,0,6,4,4,0,4,6,4,6,0,6,0,5,0,2,6,4,7,6,4,4,6,0,4,4,5,7,7,2,2,2,2,7, +6,0,6,4,0,0,4,4,6,4,6,4,4,4,0,7,0,2,0,0,0,4,0,4,6,4,5,6,7,7,4,4,4,0,4,6,0,0,6,4,7,4,2,4,4,0,4,0,4,4,4,7,4,4,0,4,0,4,6,6,4,4,4,5, +6,2,6,6,4,4,7,4,0,4,6,0,6,4,0,4,4,4,4,1,0,4,0,4,4,4,4,4,0,4,4,4,4,4,4,7,0,5,4,6,5,4,8,0,6,7,4,0,0,0,4,7,7,0,0,4,4,4,4,6,7,0,6,4, +6,6,6,0,6,6,2,4,5,4,4,4,7,6,1,0,6,4,6,0,6,4,4,1,1,0,0,7,7,4,7,7,4,6,6,2,0,4,0,0,6,0,7,6,7,2,0,6,2,2,4,6,6,7,7,7,6,7,4,2,4,4,2,4, +4,0,4,0,6,4,0,7,0,6,0,6,0,0,4,6,4,4,4,4,0,4,4,4,6,4,6,4,4,0,4,6,0,4,0,4,4,0,5,0,6,4,0,4,4,4,7,8,2,4,8,2,6,4,1,4,4,2,2,0,4,6,6,6, +4,0,2,4,6,4,0,0,2,0,6,7,0,6,6,6,4,6,6,4,4,0,0,2,6,7,4,4,2,7,4,6,4,6,4,4,6,7,3,4,6,6,2,6,6,7,6,7,6,8,0,7,6,4,4,6,7,7,4,4,4,7,6,0, +4,2,4,4,2,1,0,2,2,0,2,2,2,2,2,2,0,7,6,7,7,6,4,7,2,4,0,7,6,0,6,6,6,6,0,6,6,7,7,5,4,0,4,7,4,4,0,6,4,8,4,4,6,4,4,2,0,6,4,6,0,0,4,0, +4,6,7,6,7,7,6,6,6,6,0,0,6,4,3,2,0,2,0,0,2,2,4,6,0,4,2,6,7,0,4,0,4,0,4,0,4,4,4,6,7,4,4,6,6,6,0,6,0,6,4,6,6,6,7,4,7,6,6,4,6,0,0,4, +6,4,4,6,4,4,2,6,4,7,5,4,4,4,4,7,6,6,7,4,6,0,4,0,1,0,2,2,2,0,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,4,4,4,4,7,6,0,4,7,6,6,4,4, +0,4,0,4,0,2,6,4,4,6,4,4,4,4,6,4,4,0,6,6,4,4,4,4,6,4,4,6,2,4,7,0,6,0,4,4,4,4,4,4,6,6,6,7,0,0,4,4,6,3,2,2,2,0,2,2,2,2,2,0,2,6,0,4, +6,6,4,4,4,7,4,6,4,4,7,4,7,0,4,6,0,4,0,1,6,4,7,4,6,4,0,4,7,6,4,7,6,6,4,4,4,6,0,4,4,7,6,4,4,0,4,5,7,7,6,4,4,4,4,0,6,6,4,4,4,6,4,4, +0,6,0,6,7,0,6,4,0,4,4,6,7,6,7,6,4,6,4,4,0,0,0,0,2,0,0,4,2,2,0,0,0,0,0,4,2,2,2,2,2,5,0,4,4,4,0,4,4,7,4,4,4,4,4,4,6,4,4,4,6,7,0,4, +4,0,4,4,4,4,6,4,0,5,0,4,4,4,5,0,4,6,4,0,4,0,4,7,0,6,6,7,2,4,4,4,4,7,0,7,0,4,5,4,4,6,0,6,7,6,4,4,4,3,0,6,4,7,6,4,6,4,7,0,0,4,4,0, +0,0,6,0,0,0,4,2,2,2,0,3,2,4,6,4,5,4,0,0,5,0,0,6,4,7,4,4,7,4,4,4,4,6,4,0,4,0,0,4,4,4,4,4,4,0,4,6,4,4,7,0,6,0,4,0,7,6,6,4,5,2,0,6, +4,6,4,0,7,4,4,4,0,7,6,5,4,6,4,4,6,7,4,6,4,0,6,4,0,0,0,4,0,2,0,2,2,0,0,2,0,2,2,4,4,4,4,4,7,0,5,4,6,4,4,4,4,0,4,4,0,4,4,4,6,4,4,6, +5,0,4,0,0,0,4,4,4,4,4,4,6,4,4,4,0,7,4,7,4,4,4,6,4,4,7,0,4,4,4,6,4,6,0,4,4,4,5,4,4,4,4,6,6,5,4,4,4,4,0,0,0,1,0,2,2,2,2,6,2,7,4,0, +4,4,0,7,4,4,4,4,6,7,7,0,0,4,5,0,0,4,0,4,4,4,6,5,4,6,0,4,0,4,6,0,4,4,4,4,6,4,4,4,7,5,5,4,4,4,4,0,0,4,2,0,2,4,4,4,4,6,6,6,4,0,6,0, +4,4,4,4,7,6,4,6,4,4,4,4,4,4,4,6,4,4,0,0,4,4,4,1,4,5,4,6,6,4,4,4,4,4,4,4,6,0,5,4,6,5,7,1,1,0,3,7,6,4,0,4,4,4,4,6,4,6,4,0,0,4,4,4, +4,3,4,4,4,4,4,4,4,6,0,4,0,5,4,6,6,4,4,2,0,6,0,4,4,4,4,0,6,4,4,4,0,4,4,4,7,4,4,0,4,7,4,4,4,4,0,4,4,4,4,0,0,0,4,5,4,4,4,7,0,4,4,6, +4,4,0,0,4,6,4,5,0,4,4,4,4,0,0,0,0,6,0,0,0,0,2,4,4,4,4,4,4,0,4,4,4,4,4,4,4,4,4,6,0,6,4,0,4,5,4,0,0,4,4,4,4,4,4,0,6,4,4,4,6,0,4,4, +4,0,0,4,0,0,4,4,4,0,0,4,4,6,7,2,7,2,4,4,6,4,4,0,0,4,3,4,5,0,7,4,0,4,6,4,4,4,0,4,4,4,4,7,2,4,6,4,4,6,0,4,4,0,0,4,0,7,6,7,6,2,2,2, +2,2,2,0,0,4,4,4,0,0,7,6,6,6,4,0,4,4,0,6,4,7,4,4,4,4,4,0,6,6,4,0,0,4,4,6,7,4,0,6,6,6,6,0,2,0,0,6,6,6,0,4,6,6,6,4,4,4,6,4,4,4,4,4, +6,4,4,4,6,4,6,7,4,6,2,7,4,1,3,2,6,6,0,4,6,0,4,4,6,7,4,6,4,4,6,6,0,0,4,4,7,0,4,0,4,6,4,4,4,7,3,0,2,2,2,2,2,4,4,4,7,6,4,4,0,0,7,0, +6,4,7,6,4,4,4,6,6,6,6,4,4,6,4,0,0,4,6,6,0,2,0,2,7,4,6,4,7,0,6,0,4,6,6,6,4,6,4,4,4,6,4,0,4,4,6,0,4,6,4,4,6,4,0,6,4,0,4,6,4,0,6,6, +4,4,4,4,0,0,4,2,2,3,0,1,6,4,6,4,4,4,4,6,4,5,4,6,4,6,4,4,4,4,4,4,6,4,4,6,6,6,5,4,4,4,4,4,4,4,6,4,2,0,0,4,6,4,7,4,4,4,4,6,2,2,2,1, +0,4,6,6,4,6,0,4,6,4,0,2,0,7,0,4,4,4,4,6,4,0,4,6,4,0,4,4,4,4,4,6,0,0,4,4,4,0,0,0,2,0,4,6,6,6,4,6,4,0,4,6,4,6,4,4,4,4,7,4,4,6,4,4, +6,0,4,4,4,4,6,0,4,4,6,6,4,0,0,0,0,6,6,6,4,0,0,4,4,4,0,6,4,4,4,4,6,4,4,4,4,6,4,4,4,0,6,4,4,4,2,4,0,0,4,4,4,0,4,4,0,7,4,5,4,0,6,4, +4,4,4,6,0,4,4,0,4,4,6,4,4,4,0,0,4,4,0,6,4,6,6,4,0,4,0,6,4,4,0,4,4,6,6,4,4,4,4,0,4,4,4,4,4,0,4,4,4,4,6,4,0,4,4,4,4,6,0,4,6,4,4,4, +7,4,0,4,6,2,1,0,4,0,4,4,8,6,4,0,0,0,4,5,2,4,4,7,0,6,4,5,0,5,0,0,0,7,6,7,2,2,0,4,8,6,4,6,2,4,2,4,7,4,6,0,4,4,4,7,0,4,0,0,4,6,6,7, +4,6,6,4,2,2,0,0,7,4,0,7,4,6,4,0,0,4,6,4,0,4,7,4,4,4,4,4,2,0,4,4,0,4,6,0,6,0,0,4,4,0,4,8,4,2,0,0,0,6,4,0,1,0,4,7,4,4,4,0,6,4,4,0, +4,7,7,0,0,3,2,0,0,6,4,4,4,4,6,5,4,0,6,0,6,7,4,4,6,6,4,4,5,4,4,7,0,5,2,2,2,2,0,4,6,0,0,4,4,0,4,4,6,6,4,7,6,0,4,4,7,6,4,4,6,5,7,0, +0,4,6,0,0,4,4,5,0,4,6,4,4,0,4,0,7,4,7,6,4,4,4,4,4,6,6,2,0,0,4,4,0,6,4,4,0,6,4,4,0,4,6,6,4,4,4,4,6,4,4,4,2,4,6,4,4,0,0,4,4,4,4,0, +0,6,4,0,6,0,4,0,0,4,0,4,4,0,0,4,4,4,4,4,0,0,5,4,4,4,4,4,4,4,6,7,4,4,4,4,4,0,6,0,0,4,4,4,4,4,4,4,0,4,5,4,4,0,4,0,0,4,4,6,4,0,4,8, +0,8,4,6,0,4,7,1,4,0,0,8,0,0,0,5,0,0,0,4,0,4,5,5,4,0,1,4,4,4,4,4,0,4,4,4,4,0,4,1,0,0,5,0,4,4,4,4,0,0,4,1,0,0,4,4,0,4,8,0,0,4,4,4, +4,2,2,0,2,2,8,2,2,8,2,2,2,0,2,2,2,2,7,4,4,4,6,0,0,4,6,4,6,4,2,4,4,4,4,8,4,6,7,0,4,4,0,6,4,4,0,2,4,4,4,6,4,0,4,4,4,0,4,4,4,0,4,4, +8,0,5,4,4,0,0,6,5,0,5,0,4,0,5,4,4,4,4,5,0,4,4,5,5,0,0,4,0,4,4,5,0,0,4,5,0,4,0,4,0,0,5,0,4,5,0,0,4,5,0,1,5,0,4,0,0,4,5,5,5,0,6,4, +4,4,0,0,4,4,4,0,6,0,4,0,4,4,4,4,5,5,4,0,5,5,4,4,4,4,0,4,0,0,5,0,5,4,4,5,0,0,5,0,4,5,0,5,4,4,5,0,5,8,5,5,4,4,4,4,0,6,4,4,4,0,0,4, +0,4,4,4,4,4,4,5,0,3,2,4,5,5,0,4,0,4,4,7,0,5,4,0,5,4,4,0,0,0,5,0,5,4,0,4,5,4,4,4,4,0,8,4,1,1,0,0,5,0,5,0,0,0,4,0,4,5,4,4,5,4,4,5, +4,0,4,4,4,4,4,5,4,4,0,5,1,4,0,5,0,4,5,4,4,4,5,4,4,4,0,4,5,4,4,4,4,4,4,0,4,0,5,4,4,0,0,4,0,5,5,4,4,4,4,4,4,4,4,4,5,0,5,4,4,0,5,4, +5,5,5,0,5,4,4,6,4,0,4,4,0,4,5,4,4,4,4,4,4,4,4,4,4,5,4,5,4,5,4,0,4,1,4,4,4,4,6,4,4,0,4,4,5,0,4,4,4,4,0,4,0,4,4,4,0,5,4,4,4,4,4,0, +4,4,4,0,0,4,4,4,4,4,4,4,0,0,4,4,4,4,4,4,4,4,4,4,5,4,4,0,5,4,0,4,4,0,0,4,0,4,8,4,4,0,4,4,6,4,0,4,5,0,1,0,4,0,0,5,4,4,4,4,0,4,4,4, +4,0,4,1,4,4,4,0,4,0,4,4,4,0,4,0,1,0,4,4,4,4,4,0,4,4,4,0,4,0,4,4,2,2,2,2,2,2,2,2,2,8,2,2,0,2,2,2,2,0,2,2,2,2,2,2,2,2,2,0,2,2,2,2, +2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,8,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,8,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,7,0,4,0,4,4,4,4,4, +0,6,4,4,0,4,7,6,4,6,5,4,6,4,4,4,4,0,0,0,4,6,4,4,0,0,7,0,4,4,0,4,0,7,6,0,4,4,4,0,4,4,7,6,4,4,0,4,4,4,4,6,0,4,4,4,6,7,6,4,0,4,0,0, +4,4,6,0,4,6,4,0,0,6,6,0,7,4,0,4,4,4,4,4,6,4,0,4,6,4,4,0,4,5,5,0,5,5,5,4,4,4,0,5,5,5,4,5,5,0,0,5,5,0,4,4,5,4,4,8,5,4,4,5,5,4,4,5, +5,4,5,5,5,4,0,5,4,0,5,0,4,0,1,4,0,5,4,5,0,4,0,4,0,4,4,1,5,4,5,4,5,4,5,4,4,4,5,4,4,0,5,0,4,5,4,0,4,4,0,4,4,4,0,0,4,4,4,4,5,4,4,0, +4,0,4,0,4,4,4,4,5,4,4,1,0,4,0,4,4,0,0,4,4,4,4,4,0,4,0,4,0,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,2,2,2,2,0,2,2,2,2,2,2,2,0,2,2,2,2,7,0,7,6,4,4,0,7,4,6,4,4,8,0,4,6,7,2,6,8,4,4,0,4,0,4,0,0, +4,6,0,0,6,8,0,0,0,4,8,2,4,4,4,4,4,6,4,4,6,4,4,0,0,4,0,4,4,0,0,6,4,4,0,7,0,4,0,4,5,0,4,4,4,4,4,4,0,2,4,7,6,6,4,4,2,4,6,0,6,0,6,6, +0,4,4,2,2,4,6,4,4,0,0,6,6,4,6,6,4,6,0,4,0,4,6,6,4,4,6,6,4,7,2,8,4,5,4,6,6,0,4,4,7,4,6,6,6,0,4,8,4,4,4,7,4,3,0,2,2,2,6,2,4,6,4,4, +4,0,4,4,4,6,4,4,0,6,7,0,2,4,0,7,4,4,4,4,6,4,4,4,4,4,4,4,4,6,6,6,4,4,6,6,4,4,4,4,0,6,2,4,2,0,6,2,4,6,0,4,4,6,4,0,4,6,2,0,4,6,4,4, +6,6,6,0,7,4,0,6,6,6,6,6,4,4,4,0,4,2,2,4,4,4,4,4,0,6,4,4,4,4,4,5,4,4,4,4,4,4,6,4,0,6,4,0,6,6,0,6,2,0,6,4,7,0,6,0,4,0,4,4,6,0,0,2, +0,6,4,0,4,6,4,6,4,4,4,4,4,5,4,2,6,4,4,4,6,4,4,4,4,0,4,0,2,4,6,4,4,4,0,4,4,4,4,0,4,4,4,8,6,0,0,3,0,0,6,0,0,0,0,0,0,0,6,0,0,4,0,0, +4,0,4,0,0,0,0,0,0,4,5,4,5,5,2,4,0,4,5,4,4,0,0,4,4,0,0,4,4,4,4,5,4,4,1,0,0,4,4,4,4,4,0,4,4,0,4,4,0,4,0,0,4,4,4,4,5,4,4,4,4,1,4,4, +4,4,4,5,0,4,4,4,4,5,4,4,0,4,4,0,4,4,4,4,5,4,4,4,4,0,4,4,4,5,4,4,4,0,0,4,4,4,4,0,0,5,5,0,4,0,4,5,0,0,4,4,4,4,4,4,5,4,0,4,0,0,4,5, +4,0,4,4,5,4,4,4,4,4,0,4,0,5,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,4,4,5,4,5,4,4,4,0,8,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +0,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,0,2,2,2,2,2,2,2,2,7,6,0,3,6,0,0,0,6,0,0,4,0,6,2,0,2,0,4,4,4,7,7,5,0,4,0,2,0,0,2,1,1,1,2,2,5, +0,2,7,0,7,7,4,8,2,4,0,4,0,4,7,0,2,8,4,6,7,6,4,4,8,8,0,8,2,2,2,2,4,4,6,4,6,6,7,0,6,3,6,7,0,7,6,0,7,0,0,2,4,4,4,7,6,2,4,4,4,7,4,4, +7,8,6,7,6,6,7,0,0,2,2,6,4,6,0,7,7,6,2,1,7,4,6,7,0,8,8,6,4,7,6,7,7,6,7,5,4,0,2,0,0,0,0,0,0,6,7,6,0,5,5,0,4,6,6,0,7,0,0,2,7,4,6,4, +0,7,7,0,6,1,0,7,0,4,8,8,0,7,8,6,6,6,6,8,5,5,0,2,6,4,0,6,5,4,4,0,5,1,6,7,0,3,0,4,6,5,0,4,0,7,7,4,4,0,4,4,6,7,4,5,5,4,5,4,5,6,4,7, +6,4,6,6,8,4,4,4,6,0,4,6,0,4,0,4,4,7,0,2,4,6,0,6,4,6,0,6,0,2,4,4,4,6,6,8,0,4,7,4,0,0,7,0,2,0,2,6,6,6,4,6,6,6,6,0,7,2,2,2,0,4,6,4, +0,7,0,4,2,6,0,6,4,0,7,4,0,0,7,2,2,2,0,2,4,4,4,6,4,4,4,6,6,6,0,4,4,7,6,4,4,4,2,2,8,4,4,6,4,7,0,6,4,4,4,4,6,5,0,1,2,4,4,4,4,8,6,4, +4,4,6,4,6,4,4,4,4,4,0,4,0,4,4,4,4,4,4,0,4,0,4,4,4,6,4,4,4,4,6,4,4,4,6,6,4,0,4,4,4,4,4,4,4,5,4,6,4,6,4,4,4,4,4,0,4,6,4,4,0,0,4,4, +4,4,0,6,4,4,6,4,4,7,6,7,7,7,7,6,6,0,7,4,1,4,4,6,4,0,6,0,0,2,2,4,4,6,7,6,6,6,0,0,4,6,7,0,7,0,6,6,2,2,6,4,6,7,2,7,7,6,4,0,0,2,2,2, +4,4,4,4,4,6,4,7,0,6,4,6,2,7,0,4,7,6,7,4,0,0,0,1,0,4,6,6,5,4,4,4,4,4,6,6,1,4,0,4,4,0,6,4,4,6,6,6,4,4,4,4,6,6,0,4,1,4,6,0,4,4,4,0, +4,4,4,4,0,4,5,7,1,6,2,4,8,8,7,8,4,8,0,4,4,4,0,4,5,4,4,0,7,5,0,0,0,0,4,5,4,0,5,5,4,4,4,4,4,4,4,0,0,4,0,4,4,4,0,4,4,4,0,0,0,4,0,4, +4,4,4,4,4,4,4,4,0,4,4,0,4,5,1,4,4,4,4,0,4,4,4,4,0,4,4,0,4,0,4,0,0,0,0,4,4,4,4,4,0,0,0,0,0,4,4,0,4,4,4,4,5,0,4,5,4,4,4,0,0,4,4,4, +4,0,0,0,1,4,4,0,4,4,4,4,4,4,4,4,4,4,4,4,4,0,4,4,0,0,4,5,0,0,4,0,4,4,1,4,4,4,5,4,0,0,0,0,4,4,0,4,4,1,4,0,2,0,4,0,4,4,4,4,4,4,5,4, +5,0,4,5,0,5,4,4,4,0,4,4,4,4,6,0,0,5,0,4,4,4,4,4,5,4,5,4,4,4,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,1,2,0,0,0,4,4,4,4,4,4,0,4,0,4,4,0,0,0, +4,4,4,4,4,4,4,4,6,4,4,4,4,4,4,4,4,4,5,4,0,4,0,4,4,4,0,0,0,4,4,4,4,4,0,0,5,0,4,4,4,4,5,0,0,1,4,4,4,4,1,1,0,0,0,4,5,4,4,4,5,0,4,4, +4,4,4,0,4,0,5,0,4,4,0,4,4,4,4,4,5,0,4,4,4,4,4,0,5,4,4,4,0,4,4,4,5,4,4,4,4,4,5,4,5,4,4,5,1,4,4,5,0,0,1,4,4,4,4,0,4,4,0,0,4,0,2,0, +0,0,0,0,0,0,4,4,0,4,4,5,4,5,4,4,4,4,4,0,5,4,4,4,4,4,4,5,4,0,4,0,4,4,0,4,4,4,0,4,0,0,6,0,5,4,0,0,4,4,0,0,0,5,4,4,0,4,4,4,4,0,5,0, +4,0,4,4,0,0,0,0,4,4,4,0,5,4,4,6,0,4,4,0,4,4,5,5,4,4,5,4,0,4,4,4,0,4,4,4,4,4,4,5,4,4,4,0,4,0,5,4,4,4,4,4,4,4,0,4,0,0,0,0,0,0,0,0, +4,0,4,4,4,0,0,4,4,0,6,0,4,4,4,4,4,5,4,0,4,4,6,4,4,4,4,0,4,4,4,4,0,5,4,4,4,0,4,4,4,0,0,0,4,0,4,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,4,4, +4,0,0,4,0,0,4,4,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,5,5,0,0,0,0,0,0,4,0,0,0,0,0,0,0,4,4,4,4,4,0,4,0,4,4,4,4,0,4,4,4,5,4,4,4,4,4,2,4, +4,0,0,0,4,4,4,4,4,4,4,4,4,0,0,4,4,5,4,1,0,4,0,4,0,0,0,0,0,4,4,0,4,0,4,4,4,0,0,0,4,4,4,6,0,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,4,4,4,4, +4,4,4,4,0,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,0,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,0,2,2,2,0,0,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,0,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,0,2,2, +2,2,2,0,2,2,2,2,0,2,2,0,2,2,2,2,2,2,2,2,2,0,2,2,2,2,0,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,0,0,2,8,0,0,4,4,4,4,0,8, +5,0,4,5,0,0,4,0,4,5,0,8,4,4,4,5,4,5,4,8,4,0,0,0,4,0,0,4,4,0,4,4,0,4,1,5,5,5,0,0,4,4,0,4,4,4,4,0,4,4,1,0,0,4,4,4,0,4,4,4,4,4,4,4, +4,0,0,4,0,4,4,5,4,4,4,4,4,4,0,0,4,4,4,4,4,4,4,0,1,0,4,4,4,0,4,4,4,4,4,4,4,4,0,0,2,2,2,2,0,2,8,2,2,2,2,2,8,2,2,2,2,2,2,2,2,2,2,0, +2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,0,2,2,0,7,2,4,2,4,6,6,4,4,0,0,0,4,0,7,0,4,4,6,4,4,6,7,2,2,2,2,0,0,4,4,7,6,6,0,8, +7,0,6,4,7,2,2,2,2,2,4,6,6,7,4,4,7,4,0,4,6,2,0,0,4,0,0,7,4,4,4,6,0,6,7,5,7,1,0,2,2,2,7,4,6,4,0,4,5,4,6,5,6,7,7,6,5,0,1,0,4,5,4,0, +4,0,0,4,4,7,7,4,7,4,5,6,0,6,5,3,2,4,4,4,7,4,0,6,6,7,0,5,7,0,4,0,1,4,4,1,4,0,0,6,4,4,4,0,4,0,4,0,6,4,0,6,4,0,2,1,4,6,0,5,7,2,2,4, +7,7,4,4,7,7,8,7,4,6,4,4,7,6,6,2,0,1,6,4,4,6,4,4,4,4,4,5,4,4,4,4,2,4,5,5,0,4,0,0,7,6,7,1,0,0,0,6,5,4,5,2,0,4,7,7,4,6,4,8,0,4,2,4, +7,2,4,0,6,4,6,7,6,6,1,4,0,6,6,6,4,4,4,6,0,0,6,0,4,4,0,0,7,4,7,4,4,0,4,4,4,4,4,5,4,4,6,4,4,2,4,4,6,0,7,0,0,4,0,0,6,6,4,0,0,4,6,4, +0,0,4,4,4,0,4,4,4,4,0,4,0,0,0,0,0,0,7,2,0,0,7,0,4,3,4,6,4,0,8,0,6,6,8,0,0,2,4,0,4,7,4,0,4,1,4,0,4,0,4,6,7,0,6,4,4,0,4,4,6,4,4,4, +4,4,4,4,5,6,0,0,4,0,4,6,0,7,4,4,0,2,2,0,6,0,0,4,7,4,4,0,4,4,4,0,7,4,4,6,4,4,4,0,4,0,4,6,4,7,4,2,0,0,2,4,2,0,4,4,0,4,0,4,0,0,0,4, +0,4,0,4,4,4,4,4,0,4,0,4,4,4,4,4,4,0,0,5,4,4,4,4,4,4,0,0,4,4,4,4,0,4,0,4,0,4,2,2,0,2,2,2,2,6,1,0,4,4,0,8,0,2,6,0,4,4,4,5,0,4,4,5, +4,5,5,5,4,5,5,4,5,0,4,0,4,4,4,4,5,5,5,5,0,0,4,5,5,0,1,4,4,4,4,0,4,4,0,0,4,0,4,4,4,4,0,4,1,5,0,4,4,0,4,0,1,4,0,4,4,4,0,5,1,0,0,0, +0,4,0,4,4,4,4,0,4,4,4,0,5,5,5,4,4,4,4,4,1,1,0,4,8,4,0,5,4,4,5,4,4,0,4,4,4,4,0,5,0,4,4,4,0,0,0,4,4,4,4,4,4,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,2,2,2,0,2,2,0,2,2,2,0,2,2,2,2,2,2,2,2,2,0,2,2,2,2,5,4,0,0,4,4,4,4,0,4,4,4,0,0,4,0,4,0,4,4,4,4,4,4, +4,4,4,0,4,0,4,0,0,4,0,4,4,0,2,0,0,2,2,2,0,2,0,0,2,2,2,5,0,0,2,8,0,0,5,4,0,4,0,6,2,4,4,4,0,4,0,5,0,0,5,0,5,0,4,0,0,4,4,0,5,5,5,0, +4,0,4,4,0,5,0,4,4,4,5,0,5,2,0,0,7,4,4,5,4,4,4,4,4,0,4,4,0,0,4,4,0,4,0,0,4,4,0,4,5,4,4,4,4,4,6,4,4,4,4,4,0,4,0,0,0,0,4,0,4,4,4,4, +0,4,0,4,0,4,0,4,4,4,0,4,4,0,4,0,4,4,4,4,6,6,4,5,4,4,0,4,4,0,4,4,0,4,0,2,0,2,0,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,0,0,2,0,2,2,0,2, +2,2,0,0,2,2,2,2,2,0,2,2,0,2,0,2,2,2,2,2,2,2,8,6,6,8,0,0,4,4,4,0,0,4,0,4,0,6,4,4,7,0,0,4,5,4,4,4,4,4,4,5,5,4,0,0,0,4,4,4,0,4,0,0, +0,5,4,4,1,1,1,4,1,4,0,4,4,4,4,4,5,4,5,4,4,5,4,4,4,4,0,4,4,4,0,4,0,0,4,4,4,4,0,4,0,4,4,0,4,4,4,0,4,4,0,0,4,0,4,4,4,4,4,4,4,4,4,5, +0,4,4,4,4,4,4,4,0,4,4,4,0,4,5,4,0,4,1,1,4,4,4,0,0,4,4,4,4,4,4,0,4,0,4,4,4,4,0,4,1,4,4,4,4,4,4,0,5,4,4,0,4,4,4,4,4,4,4,0,0,4,4,4, +4,4,4,4,4,4,4,0,4,4,4,0,4,4,4,4,4,0,4,4,4,4,4,4,0,4,5,4,0,0,4,4,4,0,4,0,4,4,4,4,4,4,4,4,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,0,2,2,2,2,2,0,2,2,2,0,0,2,2,2,2,2,0,0,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,0,2,7,0,0,4,0,4,0,4,6,6,0,4,4,0,2,6,7,4,2,4,6,0,4,4, +6,6,6,0,1,2,4,4,0,0,4,2,2,4,0,4,4,6,4,6,4,4,4,0,8,0,0,0,4,0,0,6,0,6,0,4,0,0,6,4,0,0,1,6,4,7,4,6,0,4,4,4,0,0,4,4,0,6,4,6,4,4,4,0, +0,4,0,6,4,4,4,0,6,0,4,4,4,4,4,2,4,4,4,2,0,4,4,4,4,4,4,0,0,0,4,6,4,0,4,6,4,4,0,4,4,4,0,4,0,0,4,6,0,4,6,4,0,4,0,4,0,0,4,6,7,0,4,4, +0,7,7,6,6,7,4,2,6,2,4,4,4,6,4,6,0,6,4,0,7,4,4,0,4,4,5,4,0,0,0,4,4,4,0,0,4,0,4,4,4,0,0,0,4,0,0,5,4,4,0,0,4,4,4,4,0,0,4,4,4,4,4,0, +4,0,4,0,0,4,4,4,4,0,0,0,0,0,1,0,4,4,5,4,0,4,0,0,0,0,4,4,0,0,4,0,4,4,4,0,4,4,4,0,4,0,5,5,0,5,5,4,0,0,0,0,0,4,4,0,4,4,0,0,0,4,0,4, +4,4,0,4,4,0,4,4,4,5,4,0,0,0,0,0,0,0,0,4,4,4,5,4,0,4,4,5,4,0,4,0,4,4,4,0,4,4,4,4,5,0,4,4,4,0,0,0,4,0,0,0,0,1,0,4,4,0,0,0,0,4,0,0, +0,0,0,0,0,4,4,4,4,4,0,4,0,5,4,0,1,0,4,4,4,0,0,4,0,0,0,0,4,4,0,0,0,4,0,4,4,4,0,0,4,4,0,4,4,5,0,1,0,4,4,4,4,0,4,4,0,5,0,5,4,4,4,4, +4,4,0,0,4,0,4,0,5,4,4,4,4,4,4,0,4,0,5,0,4,4,4,5,4,4,0,0,0,0,4,4,4,0,4,4,0,0,4,4,4,0,0,0,0,4,4,0,0,4,0,4,4,4,0,4,4,4,4,0,2,0,0,2, +0,2,2,0,0,2,2,2,2,0,0,2,0,2,2,0,2,2,2,0,2,2,0,0,0,0,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,0,2,0,2,2,0,2,2,2,2,2,2,2,2,2,0,2,2,2,2,0,0, +0,0,0,2,2,2,2,2,0,0,2,2,2,2,2,2,2,0,0,2,2,2,2,2,2,2,0,0,2,2,2,2,0,0,2,0,0,5,4,4,0,5,4,0,0,4,0,0,0,4,4,5,5,4,5,4,0,4,4,4,4,4,0,4, +4,0,0,4,4,4,4,5,4,4,0,0,0,0,1,0,4,0,4,0,4,4,0,4,4,4,0,5,0,4,4,4,4,0,4,4,0,4,4,0,5,4,0,1,1,4,4,4,4,4,0,4,0,0,4,4,4,0,0,5,0,4,4,4, +4,4,4,4,0,4,0,0,0,0,4,4,4,0,0,4,0,4,4,4,4,0,4,4,4,4,4,4,5,4,0,4,5,5,0,0,0,0,0,4,4,4,4,4,5,0,0,4,4,4,4,4,4,4,0,4,4,4,0,4,0,4,0,4, +4,4,4,0,4,4,4,4,4,4,4,4,4,0,0,1,4,0,4,0,4,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,4,4,4,4,4,0,4,4,0,4,0,4,4,4,5,4,4,4,4,4,4,4,4,0,4,4, +0,4,4,4,0,4,0,4,4,0,4,4,4,4,4,4,4,4,4,4,0,4,4,4,4,4,4,4,4,4,4,4,0,4,4,4,4,4,4,0,4,4,0,4,4,4,4,4,4,0,5,4,4,4,4,4,4,5,5,4,0,4,4,4, +4,4,4,4,4,4,4,4,0,4,0,4,0,4,0,4,4,4,4,4,4,4,0,4,0,4,4,4,0,4,4,2,2,2,2,2,0,2,2,0,2,2,2,2,2,2,0,2,0,2,2,2,0,2,2,2,2,2,2,0,0,2,2,2, +0,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,2,0,0,0,2,0,2,2,0,2,2,2,0,2,0,0,0,0,2,2,0,2,2,2,2,2,2,2,2,0,0,2,2,0,2,0,4,0,0,1,4,4,0,4,4,2,7, +4,0,6,4,0,0,4,6,6,4,4,6,4,4,4,0,0,4,6,7,4,0,0,5,0,4,4,4,4,6,0,7,4,4,0,0,4,4,3,4,0,4,0,0,0,4,4,0,4,0,0,0,6,4,4,4,2,1,1,7,8,2,6,1, +4,0,4,4,3,0,0,0,4,2,0,0,4,7,6,6,4,8,1,4,6,4,4,0,6,1,4,7,6,6,8,6,6,0,2,0,4,6,4,6,4,2,2,4,0,4,4,6,4,0,4,4,4,4,4,4,0,6,4,6,6,4,2,4, +4,4,0,0,0,0,4,4,0,4,4,2,0,2,7,4,6,0,4,7,0,4,4,2,4,6,4,4,4,0,4,0,7,0,6,4,4,4,0,0,4,4,4,4,6,4,4,6,4,4,4,4,4,4,4,6,4,2,0,7,0,6,6,0, +4,4,4,4,2,0,4,4,4,4,4,4,4,4,4,4,2,2,4,0,4,4,4,4,4,4,0,4,4,4,4,4,4,4,1,4,4,4,4,4,0,0,4,4,4,0,4,4,4,4,4,0,4,4,4,4,4,4,4,4,0,0,4,2, +2,0,0,2,2,2,2,2,2,2,2,2,2,5,0,0,4,4,4,0,4,4,0,0,4,2,2,2,4,0,0,2,6,0,4,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0, +}; +const int kanji_replace[] = { +0x4e02,0x4e21,0x4e13,0x4e71,0x4e0f,0x4e26,0x4e0c,0x4e01, +}; + +// for Vietnamese +const int diacriticals[] = {0x0300,0x0301,0x0303,0x0309,0x0323}; +const int vowels[] = {0x0061,0x0065,0x0069,0x006F,0x0075,0x0079,0x00E2,0x00EA,0x00F4,0x0103,0x01A1,0x01B0}; +const int VIETNAMESES[] = { + 0x00E0,0x00E8,0x00EC,0x00F2,0x00F9,0x1EF3,0x1EA7,0x1EC1,0x1ED3,0x1EB1,0x1EDD,0x1EEB, + 0x00E1,0x00E9,0x00ED,0x00F3,0x00FA,0x00FD,0x1EA5,0x1EBF,0x1ED1,0x1EAF,0x1EDB,0x1EE9, + 0x00E3,0x1EBD,0x0129,0x00F5,0x0169,0x1EF9,0x1EAB,0x1EC5,0x1ED7,0x1EB5,0x1EE1,0x1EEF, + 0x1EA3,0x1EBB,0x1EC9,0x1ECF,0x1EE7,0x1EF7,0x1EA9,0x1EC3,0x1ED5,0x1EB3,0x1EDF,0x1EED, + 0x1EA1,0x1EB9,0x1ECB,0x1ECD,0x1EE5,0x1EF5,0x1EAD,0x1EC7,0x1ED9,0x1EB7,0x1EE3,0x1EF1 +}; +const size_t D=5, V=12; + +bool replace_vietnamese(cybozu::String &text, const int x) { + for(size_t d=0;d=0x30) x = '0';*/ + } else if (x<=0x5a) { + if (x>=0x41 && x!=0x49) x += 0x20; + } else if (x<=0xa0) { + if (x>=0x7f) x = ' '; + } else if (x<=0xde) { + if (x>=0xc0) { + if (x!=0xd7) x += 0x20; + } else if (x==0xb4) x = 0x27; + } else if (x<=0x4ff) { + if (x<=0x136) { + if (x>=0x100 && (x & 1) == 0 && x != 0x130) ++x; + } else if (x<=0x147) { + if (x>=0x139 && (x & 1) == 1) ++x; + } else if (x<=0x176) { + if (x>=0x14a && (x & 1) == 0) ++x; + } else if (x<=0x17d) { + if (x==0x178) { + x = 0xff; + } else if (x>=0x179 && (x & 1) == 1) ++x; + } else if (x==0x17f) { + x = 's'; // long s + } else if (x<=0x24f) { + if (x==0x1a0 || x==0x1af) { // for Vietnamese + ++x; + } else if (x>=0x1f8 && x<=0x232) { + if ((x & 1) == 0) ++x; + if (x==0x219) { // for Romanian + x=0x15f; + } else if (x==0x21b) { + x=0x163; + } + } + } else if (x<=0x323) { + if (x==0x30c) { + auto pre = text.end() - 1; + if (*pre == 'c') { + *pre = 0x10d; + continue; + } else if (*pre == 's') { + *pre = 0x161; + continue; + } else if (*pre == 'z') { + *pre = 0x17e; + continue; + } + } else if (x>=0x300 && replace_vietnamese(text, x)) continue; + } else if (x<=0x3aa) { + if (x>=0x391 && x!=0x3a2) x += 0x20; // Greek lowerize + } else if (x>=0x400) { // Cyrillic lowerize + if (x<=0x40f) { + x += 0x50; // 0400-040f => 0450-045f + } else if (x<=0x42f) { + x += 0x20; // 0410-042f => 0430-044f + } else if (x<=0x4bf) { + if (x>=0x48a && (x & 1)==0) ++x; // 048a-04bf + } + } + } else if (x<=0x1ef9) { + if (x==0x6cc) { + x = 0x64a; // Farsi yeh => Arabic yeh + } else if (x>=0x1ea0 && (x & 1) == 0) ++x; // for Vietnamese + } else if (x<=0x206f) { + if (x>=0x2000) { + if (x<=0x2015) { + x = (x<=0x200f)?' ':'-'; + } else if (x==0x2018 || x==0x2019) { + x = 0x0027; // apostrophe + } + } + } else if (x<=0x24e9) { + if (x>=0x24b6) x = ((x-0x24b6) % 26) + 'a'; + } else if (x<=0x3096) { + if (x==0x3000) { + x = ' '; + } else if (x>=0x3041) { + x = 0x3042; // HIRAGANA 'A' + } + } else if (x<=0x30fa) { + if (x>=0x30a1) { + x = 0x30a2; // KATAKANA 'A' + } + } else if (x<=0x30fa) { + if (x>=0x30a1) { + x = 0x30a2; // KATAKANA 'A' + } + } else if (x<=0x9fcb) { + if (x>=0x4e00) { + unsigned char c = kanji_table[x - 0x4e00]; + if ( c < sizeof(kanji_replace) / sizeof(kanji_replace[0])) { + x = kanji_replace[c]; + } + } + } else if (x<=0xd7af) { + if (x>=0xac00) { + x = 0xc774; // hangeul + } + } else if (x<=0xff5e) { + if (x>=0xff01) { + x -= 0xff00-0x20; + if (x>=0x30 && x<=0x39) x = '0'; + } else if (x==0xff5e) x = 0x301c; // FULLWIDTH TILDE => WAVE DASH + } else if (x>=0x10000) { + x = ' '; + } + + auto j = text.end(); + if (text.length() < 2 || *(j-1) != x || *(j-2) != x) text += x; + + } + result = cybozu::regex_replace(text, REXLAUGH, "$1$2$1$2"); +} + + +class Corpus { + size_t totalN, maxN; + cybozu::String fulltext; + std::vector texts_; + std::unordered_map labels_; + +public: + const cybozu::String& text() const { return fulltext; } + const std::vector& texts() const { return texts_; } + const std::unordered_map &labels() const { return labels_; } + size_t size() const { return totalN; } + size_t maxsize() const { return maxN; } + + Corpus() : totalN(0), maxN(0), fulltext(LF) { + } + + void clear() { + totalN = maxN = 0; + fulltext = LF; + texts_.clear(); + labels_.clear(); + } + + void load(const std::vector &files) { + for (std::vector::const_iterator i=files.begin(), ie=files.end();i!=ie;++i) { + std::cout << "loading... " << *i << std::endl; + try { + cybozu::Mmap map(*i); + //cybozu::Utf8ref ref(map.get(), map.size()); + //cybozu::String s(map.get(), (cybozu::String::size_type)map.size()); + + const char *p = map.get(); + const char *end = p + map.size(); + loadOne(p, end); + + } catch (std::exception& e) { + printf("%s\n", e.what()); + } + + } + postLoad(); + } + + void loadOne(const char *begin, const char *end) { + std::regex_iterator i( begin, end, REXLINE ), iend; + for (; i != iend; ++i) { + const std::string& label = (*i)[2].str(); + const cybozu::String org_text((*i)[4].str()); + addText(label, org_text); + } + } + + void addText(const std::string &label, const cybozu::String &org_text) { + cybozu::String text; + normalize(text, org_text); + + auto i = labels_.find(label); + size_t j; + if (i==labels_.end()) { + labels_[label] = j = texts_.size(); + texts_.push_back(TextVec(label)); + } else { + j = i->second; + } + TextVec &vec = texts_[j]; + + size_t begin = fulltext.size() - 1; + fulltext += text; + fulltext += LF; + vec.vec.push_back(TextPos(begin, text.size() + 2)); + } + + void postLoad() { + std::sort(texts_.begin(), texts_.end(), + [](const TextVec &x, const TextVec &y){ return x.label < y.label; }); + labels_.clear(); + for (size_t i=0;i filenames; + typedef std::unordered_map > Dataset; + Dataset dataset; + size_t N; + Dataset::iterator test_iterator; + size_t test_index; +public: + std::vector combination; + Corpus train; + + CorpusFactory(const size_t div, const size_t test) : n_div(div), n_test(test) { + if (n_div <= n_test) + throw Exception("need [div] > [test]"); + } + + size_t size() const { return N; } + + void load(const std::vector &files) { + const size_t BUFSIZE = 4096; + char buf[BUFSIZE]; + for (std::vector::const_iterator i=files.begin(), ie=files.end();i!=ie;++i) { + std::ifstream ifs(*i); + if (!ifs.is_open()) { + throw Exception("cannot open the file"); + } + size_t n = 0; + while (!ifs.eof()) { + ifs.getline(buf, BUFSIZE); + std::string line(buf); + ++n; + + std::smatch m; + if (std::regex_match(line, m, REXLINE)) { + const std::string& label = m[2].str(); + dataset[label].push_back(TextLine(line, filenames.size(), n)); + } + } + filenames.push_back(*i); + std::cout << "loaded... " << *i << " : " << n << " lines " << std::endl; + } + + N = 0; + for (auto i=dataset.begin(), ie=dataset.end();i!=ie;++i) { + if (i->second.size() < n_div) + throw Exception("all labels need more than [div] texts."); + std::random_shuffle(i->second.begin(), i->second.end()); + N += i->second.size(); + } + } + + void sortDataset() { + for (auto i=dataset.begin(), ie=dataset.end();i!=ie;++i) { + std::sort(i->second.begin(), i->second.end(), + [](const TextLine &a, const TextLine &b)-> bool { + if (a.filename < b.filename) return true; + if (a.filename == b.filename && a.lineno < b.lineno) return true; + return false; + } + ); + } + } + + bool next() { + if (combination.size()==0) { + for (size_t i=0;isecond.size(); + for (size_t j=0;jsecond.size();++j) { + if (std::find(combination.begin(), combination.end(), j * n_div / M)==combination.end()) { + std::smatch m; + if (std::regex_match(i->second[j].line, m, REXLINE)) + train.addText(i->first, cybozu::String(m[4].str())); + } + } + } + train.postLoad(); + + test_iterator = dataset.begin(); + test_index = -1; + return true; + } + + bool nexttest() { + if (test_iterator==dataset.end()) return false; + ++test_index; + while (true) { + size_t M = test_iterator->second.size(); + if (test_index>=M) { + ++test_iterator; + test_index = 0; + if (test_iterator==dataset.end()) return false; + continue; + } + + size_t m = test_index * n_div / M; + if (std::find(combination.begin(), combination.end(), m)==combination.end()) { + test_index = ((m + 1) * M + n_div - 1) / n_div; + continue; + } + + return true; + } + } + + const std::string &testlabel() const { + return test_iterator->first; + } + + cybozu::String testtext() const { + std::smatch m; + if (std::regex_match(test().line, m, REXLINE)) { + cybozu::String s; + normalize(s, m[4].str()); + cybozu::String st(LF); + st.append(s); + st.append(LF); + return st; + } + throw Exception("bug?"); + } + + TextLine &test() const { + return test_iterator->second.at(test_index); + } + + size_t n_test_for_block() const { + size_t num = 1, den = 1; + for (size_t n = n_div - 1, r = n_test - 1;r>0;--n,--r) { + num *= n; + den *= r; + } + return num / den; + } + + void output(std::ofstream &os, const unsigned int score) const { + os << "********* score : " << score << " **********" << std::endl; + for (auto i=dataset.begin(), ie=dataset.end();i!=ie;++i) { + for (auto j=i->second.begin(), je=i->second.end();j!=je;++j) { + if (j->score == score) { + os << filenames[j->filename] << "(" << (unsigned int)j->lineno << ") : " << j->line << std::endl; + } + } + } + os << std::endl; + } +}; + +}} diff --git a/ldig/ldigcpp/ldig/da.hpp b/ldig/ldigcpp/ldig/da.hpp new file mode 100644 index 0000000..14ac766 --- /dev/null +++ b/ldig/ldigcpp/ldig/da.hpp @@ -0,0 +1,327 @@ +#pragma once + +/** + @file + @brief double array + + Copyright (C) 2013 Nakatani Shuyo / Cybozu Labs, Inc., all rights reserved. +*/ + +#include +#include +#include +#include "cybozu/string.hpp" +#include "type.hpp" +#include "esaxx/esa.hxx" + +namespace cybozu { +namespace ldig { + +typedef int CHAR; +const int K = 0x10000; // max CHAR + 1 +const LdigChar charLF = 1; + +typedef std::vector FeatureVec; + +class Features : public FeatureVec { + LdigString fulltext; + int nodeNum; +public: + const LdigString &text() const { return fulltext; } + int nodesize() const { return nodeNum; } + + void settext(const std::string &str) { + fulltext = str; + } + void settext(const LdigString &str) { + fulltext = str; + } + Features() : nodeNum(0) {} + + Features(const LdigString &str, size_t bound_feature_freq) : nodeNum(0) { + extract(str, bound_feature_freq); + } + + Features(const LdigString &str, const FeatureVec &features) : + FeatureVec(features), fulltext(str), nodeNum(0) + {} + + void extract(const LdigString &str, size_t bound_feature_freq) + { + fulltext = str; + size_t len = fulltext.size(); + std::vector SA(len), L(len), R(len), D(len), rank(len); + auto icv = fulltext.begin(), icvend=fulltext.end(); + for (;icv!=icvend;++icv) { + if (*icv == 0 || *icv >= K) *icv = 32; + } + + if (esaxx(fulltext.begin(), SA.begin(), L.begin(), R.begin(), D.begin(), (int)len, K, nodeNum) == -1){ + nodeNum = -1; + return; + } + + int r = 0; + for (size_t i = 0; i < len; i++) { + if (i == 0 || fulltext[(SA[i] + len - 1) % len] != fulltext[(SA[i - 1] + len - 1) % len]) r++; + rank[i] = r; + } + + for (int i = 0; i < nodeNum; ++i){ + unsigned int c = rank[ R[i] - 1 ] - rank[ L[i] ]; + if (D[i] > 0 && c + 1 >= bound_feature_freq) { + size_t begin = SA[L[i]], len = D[i]; + bool noLF = true, hasLetter = false; + for (size_t j=0;j0 && j=0x61 && c<=0x7a) || (c>=0xc0 && c<0x2000) || (c>=0x20a0 && c<0x20d0) || (c>=0x2c00 && c<0x3000) || (c>=0x3040)) { + hasLetter = true; + } + } + if (noLF && hasLetter) { + push_back(Feature(begin, len, c + 1)); + } + } + } + std::sort(begin(), end(), + [&](const Feature &a, const Feature &b)->bool { + size_t i=a.begin, j=b.begin; + while (i fulltext[j]) return false; + if (fulltext[i] < fulltext[j]) return true; + ++i; + ++j; + } + if (a.len>b.len) return false; + return true; + }); + } + + Features(const Features &features) : FeatureVec(features), nodeNum(0) { + typedef std::pair IdxFtr; + std::vector list; + for(size_t i=0;ibool {return a.second.len > b.second.len;}); + + for(auto i=list.begin(), ie=list.end();i!=ie;++i) { + LdigString st(i->second.str(features.text())); + size_t j = fulltext.find(st); + if (j==fulltext.npos) { + j = fulltext.length(); + fulltext.append(st); + } + at(i->first).begin = j; + } + } + + void shrink() { + typedef std::pair IdxFtr; + std::vector list; + for(size_t i=0;ibool { + if (a.second.begin == b.second.begin) return a.second.len > b.second.len; + return a.second.begin < b.second.begin; + }); + + LdigString orgstr; + orgstr.swap(fulltext); + + size_t pre_begin = 0, cur_begin = 0, pre_end = 0; + for(auto i=list.begin(), ie=list.end();i!=ie;++i) { + if (i->second.begin > pre_begin) { + if (i->second.begin > pre_end) { + cur_begin += pre_end - pre_begin; + } else { + cur_begin += i->second.begin - pre_begin; + } + } + at(i->first).begin = cur_begin; + pre_begin = i->second.begin; + if (pre_end < pre_begin + i->second.len) { + pre_end = pre_begin + i->second.len; + size_t cur_end = pre_begin + (fulltext.size() - cur_begin); + if (pre_end > cur_end) + fulltext.append(orgstr.begin() + cur_end, orgstr.begin() + pre_end); + } + } + } + + inline LdigString str(size_t index) const { + return at(index).str(fulltext); + } +}; + + +class Pos { +public: + unsigned int index, left, right, depth; + Pos(unsigned int i, unsigned int l, unsigned int r, unsigned int d) : index(i), left(l), right(r), depth(d) {} +}; + +class Branch { +public: + unsigned int index; + LdigChar chr; + Branch(unsigned int i, LdigChar c) : index(i), chr(c) {} +}; + +class DoubleArray { + size_t N; +public: + std::vector base, check, value; + + size_t size() const { return N; } + + void extend_array(size_t max_cand) { + size_t oldN = N; + while (N <= max_cand) N *= 2; + if (N <= oldN) return; + base.resize(N); + check.resize(N); + value.resize(N, -1); + for (size_t n=oldN;n queue; + queue.push(Pos(0, 0, (unsigned int)features.size(), 0)); + int max_index = 0; + const LdigString &text = features.text(); + while(!queue.empty()) { + const Pos &pos = queue.front(); + unsigned int index = pos.index, left = pos.left, right = pos.right, depth = pos.depth; + queue.pop(); + if (depth >= features[left].len) { + value[index] = left++; + if (left >= right) continue; + } + + // get branches of current node + std::stack stack; + std::vector result; + stack.push(Branch(right, -1)); + unsigned int cur_index = left; + LdigChar cur_chr = text[features[left].begin + depth]; + while (!stack.empty()) { + while (cur_chr == stack.top().chr) { + cur_index = stack.top().index; + cur_chr = stack.top().chr; + stack.pop(); + } + unsigned int mid = (cur_index + stack.top().index) / 2; + if (cur_index == mid) { + result.push_back(Branch(cur_index + 1, cur_chr)); + cur_index = stack.top().index; + cur_chr = stack.top().chr; + stack.pop(); + } else { + LdigChar c2 = text[features[mid].begin + depth]; + if (cur_chr != c2) { + stack.push(Branch(mid, c2)); + } else { + cur_index = mid; + } + } + } + + // search empty index for current node + LdigChar v0 = result[0].chr; + int j = - check[0] - (int)v0; + while (true) { + auto i = result.begin(), ie = result.end(); + for (;i!=ie;++i) { + size_t k = j + i->chr; + if (k < N && check[k] >= 0) break; + } + if (i==ie) break; + j = - check[j + v0] - v0; + } + int tail_index = j + result.back().chr; + if (max_index < tail_index) { + max_index = tail_index; + extend_array(tail_index + 2); + } + + // insert current node into DA + base[index] = j; + depth++; + for (auto i = result.begin(), ie = result.end();i!=ie;++i) { + int child = j + i->chr; + check[base[child]] = check[child]; + base[-check[child]] = base[child]; + check[child] = index; + queue.push(Pos(child, left, i->index, depth)); + left = i->index; + } + } + shrink(max_index); + } + + void shrink(size_t max_index) { + N = max_index + 1; + check.resize(N); + base.resize(N); + value.resize(N); + } + + DoubleArray() {} + DoubleArray(const Features &features) { + construct(features); + } + + int get(const LdigString &key) const { + size_t cur = 0; + for(auto i=key.begin(), ie=key.end();i!=ie;++i) { + size_t next = base[cur] + *i; + if (next >= N || (unsigned int)check[next] != cur) return -1; + cur = next; + } + int v = value[cur]; + if (v>=0) return v; + return -1; + } + + void extract_features(Events &events, LdigString::const_iterator begin, LdigString::const_iterator end) const { + for (auto i1=begin;i1!=end;++i1) { + size_t pointer = 0; + for(auto i2=i1;i2!=end;++i2) { + size_t next = base[pointer] + *i2; + if (next >= N || (unsigned int)check[next] != pointer) break; + int id = value[next]; + if (id >= 0) { + events[id]++; + } + pointer = next; + } + } + } + + void extract_features(Events &events, const LdigString &key) const { + extract_features(events, key.begin(), key.end()); + } + + void extract_features(Events &events, const LdigString &fulltext, const TextPos &pos) const { + LdigString::const_iterator begin = fulltext.begin() + pos.begin; + extract_features(events, begin, begin + pos.len); + } +}; + +}} diff --git a/ldig/ldigcpp/ldig/ldig.cpp b/ldig/ldigcpp/ldig/ldig.cpp new file mode 100644 index 0000000..a9919b0 --- /dev/null +++ b/ldig/ldigcpp/ldig/ldig.cpp @@ -0,0 +1,476 @@ +/** + @file + @brief language detection with infinite-gram + + Copyright (C) 2013 Nakatani Shuyo / Cybozu Labs, Inc., all rights reserved. +*/ + +//#define CYBOZU_USE_STACKTRACE +//#define CYBOZU_STACKTRACE_RESOLVE_SYMBOL + +#include +#include +#include +#include + +#include "type.hpp" +#include "corpus.hpp" +#include "da.hpp" +#include "ldig.hpp" +#include "util.hpp" + +//ifstream.exception(std::ifstream::failbit | std::ifstream::badbit); + +#if defined(_WIN32) && defined(_DEBUG) +#include +void worksize() { + PROCESS_MEMORY_COUNTERS info; + GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + std::cout << info.WorkingSetSize << std::endl; +} +#else +void worksize() {} +#endif + + +enum MODE { + initialization, + learning, + detection, + shrink, + maxsubst, + dump, + varidation +}; + +const size_t BUFSIZE = 4096; + +void saveModel(const cybozu::ldig::Model &model, const std::string &modelpath) { + std::ofstream ofs(modelpath, std::ios::binary); + if (!ofs.is_open()) { + throw cybozu::ldig::Exception("cannot open the model path"); + } + model.save(ofs); + ofs.close(); +} + +void loadModel(cybozu::ldig::Model &model, const std::string &modelpath) { + std::ifstream ifs(modelpath, std::ios::binary); + if (!ifs.is_open()) { + throw cybozu::ldig::Exception("cannot open the model path"); + } + model.load(ifs); + ifs.close(); +} + +void ldig_dump(const std::string &modelpath, const std::string &outputpath) { + cybozu::ldig::Model model; + loadModel(model, modelpath); + + std::ofstream ofs; + ofs.open(outputpath, std::ios::binary); + if (!ofs.is_open()) { + throw cybozu::ldig::Exception("cannot open output file"); + } + //ofs.precision(3); + //ofs << std::fixed; + + for (auto i=model.features.cbegin(), ie=model.features.cend();i!=ie;++i) { + ofs << i->str(model.features.text()) << std::endl; + } + ofs.close(); +} + +void ldig_shrink(const std::string &modelpath) { + cybozu::ldig::Model model; + loadModel(model, modelpath); + + const size_t K = model.K; + const size_t org_M = model.M; + const size_t org_text_len = model.features.text().length(); + std::cout << "labels : " << K << std::endl; + + std::vector summary = model.shrink(); + const size_t new_M = model.M; + + std::cout << "features : " << org_M << " => " << new_M << std::endl; + std::cout << "feature text length : " << org_text_len << " => " << model.features.text().length() << std::endl; + std::cout << "nonzero params"; + size_t nonzeros = 0; + for (size_t k=0;k<=K;++k) { + std::cout << " " << k << ":" << summary[k]; + nonzeros += k * summary[k]; + } + std::cout << std::endl; + std::cout << "# of nonzeros : " << nonzeros << std::endl; + std::cout << "density : " << (LdigFloat)nonzeros / (org_M * K) << " => " << (LdigFloat)nonzeros / (new_M * K) << std::endl; + + saveModel(model, modelpath); +} + +size_t extract_label(std::string &label, const std::string &line, const std::unordered_map &labelmap) { + size_t i = line.find("\t"); + size_t label_k = -1; + if (i!=std::string::npos) { + label = line.substr(0, i); + auto lb = labelmap.find(label); + if (lb == labelmap.end()) { + size_t j = line.find("\t", i+1); + if (j!=std::string::npos) { + label = line.substr(i+1, j-i-1); + lb = labelmap.find(label); + } + } + if (lb != labelmap.end()) { + label_k = lb->second; + } else { + label = ""; + } + } + return label_k; +} + +void ldig_detect(const std::string &modelpath, const std::string &outputpath, const std::vector &files, LdigFloat margin) { + cybozu::ldig::Model model; + loadModel(model, modelpath); + const size_t K = model.K; + std::cout << "labels : " << K << std::endl; + std::cout << "features : " << model.M << std::endl; + + std::ofstream ofs; + if (outputpath.length() > 0) { + ofs.open(outputpath, std::ios::binary); + if (!ofs.is_open()) { + throw cybozu::ldig::Exception("cannot open output file"); + } + ofs.precision(3); + ofs << std::fixed; + } + + char buf[BUFSIZE]; + + std::map > predicted; + LdigFloat log_likelihood = 0; + for (auto i=files.begin(), ie=files.end();i!=ie;++i) { + std::cout << "loading... " << *i << std::endl; + std::ifstream ifs(*i); + if (!ifs.is_open()) { + throw cybozu::ldig::Exception("cannot open a test file"); + } + + while (!ifs.eof()) { + ifs.getline(buf, BUFSIZE); + std::string line(buf); + + std::string label; + size_t label_k = extract_label(label, line, model.labelmap); + + size_t i = line.rfind("\t"); + cybozu::String text; + if (i!=std::string::npos) { + cybozu::ldig::normalize(text, line.substr(i+1, line.length() - i - 1)); + } else { + cybozu::ldig::normalize(text, line); + } + if (text.length()<=0) continue; + text = "\x01" + text + "\x01"; + + + //size_t predict_k = model.predict(y, text); + cybozu::ldig::Events events; + model.trie.extract_features(events, text); + if (margin > 0 && events.size() < 10) continue; + + std::vector y(K); + size_t predict_k = model.predict(y, events); + if (label_k != (size_t)-1) { + const size_t label_k = model.labelmap.at(label); + predicted[label_k][(y[predict_k] >= 0.6)? predict_k : -1] += 1; + if (y[label_k] > 0) log_likelihood -= log(y[label_k]); + } + + const std::string &predict_label = model.label(predict_k); + LdigFloat score = y[predict_k]; + if (margin > 0) { + LdigFloat top = 0, second = 0; + for (auto i=y.begin(), ie=y.end();i!=ie;++i) { + if (*i>top) { + second = top; + top = *i; + } else if (*i>second) { + second = *i; + } + } + score = top - second; + if (score > margin) continue; + } + ofs << score << "\t" << predict_label << "\t" << line << std::endl; + } + ifs.close(); + } + + size_t cor = 0, sum = 0; + for (auto k=predicted.begin(), ke=predicted.end();k!=ke;++k) { + size_t s = 0; + std::ostringstream buf; + for (auto j=k->second.begin(), je=k->second.end();j!=je;++j) { + s += j->second; + buf << " " << model.label(j->first) << ":" << j->second; + } + size_t c = 0; + auto l = k->second.find(k->first); + if (l!=k->second.end()) c = l->second; + std::cout << model.label(k->first) << " " << c << " / " << s << " = " << (LdigFloat)(c) / s << " (" << buf.str() << " )" << std::endl; + cor += c; + sum += s; + } + if (sum>0) { + std::cout << "total : " << cor << " / " << sum << " = " << (LdigFloat)cor/sum << ", neg log likelihood " << log_likelihood << std::endl; + } + +} + +void ldig_init(const std::string &modelpath, const std::vector &files, size_t bound_feature_freq, LdigFloat eta, LdigFloat reg) { + worksize(); + + cybozu::ldig::Corpus corpus; + corpus.load(files); + worksize(); + + const cybozu::String& fulltext = corpus.text(); + std::cout << "corpus : " << corpus.size() << std::endl; + std::cout << " chars : " << fulltext.size() << std::endl; + + const size_t K = corpus.labels().size(); + std::cout << "labels : " << K; + + cybozu::ldig::Model model(K); + for (auto i=corpus.texts().begin(), iend=corpus.texts().end();i!=iend;++i) { + std::cout << " " << i->label; + model.labellist.push_back(i->label); + } + std::cout << std::endl; + model.generate_labelmap(); + + model.generate_features(fulltext, bound_feature_freq); + + std::cout << "features : " << model.M << std::endl; + //for(auto i=features.begin(), ie=features.end();i!=ie;++i) std::cout << i->str(fulltext) << "\t" << i->count << std::endl; + + std::cout << "darray : " << model.trie.size() << std::endl; + worksize(); + + time_t t = time(0); + for(size_t n=0;n<10;++n) { + model.learn(corpus, eta, (n<5)?0:reg); + if (n>3) { + const size_t pre_M = model.M; + model.shrink(); + std::cout << "fetures : " << pre_M << " => " << model.M << std::endl; + } + std::vector correct(K); + LdigFloat lh = model.likelihood(correct, corpus); + size_t c = 0, s = 0; + for (size_t k=0;k &files, const std::string &outputpath, const size_t cvn, const size_t cvt, size_t bound_feature_freq) { + cybozu::ldig::CorpusFactory validator(cvn, cvt); + worksize(); + validator.load(files); + worksize(); + while (validator.next()) { + std::cout << "testing .."; + for (auto i=validator.combination.begin(), ie=validator.combination.end();i!=ie;++i) { + std::cout << " " << *i; + } + std::cout << std::endl; + + const cybozu::ldig::Corpus &corpus = validator.train; + cybozu::ldig::Model model(corpus, bound_feature_freq); + + LdigFloat eta = 0.1; + for(size_t n=0;n<5;++n) { + model.learn(corpus, eta); + eta *= 0.8; + } + + while (validator.nexttest()) { + std::vector prob; + size_t predict_k = model.predict(prob, validator.testtext()); + if (model.labellist[predict_k] == validator.testlabel()) ++validator.test().score; + } + } + + if (outputpath != "") { + std::ofstream ofs; + ofs.open(outputpath, std::ios::binary); + if (!ofs.is_open()) { + throw cybozu::ldig::Exception("cannot open output file"); + } + ofs.imbue(std::locale("C")); + + validator.sortDataset(); + size_t T = validator.n_test_for_block(); + for (unsigned int score=0;score < T; ++score) { + validator.output(ofs, score); + } + } +} + +void maxsubstring(const std::string &input, const std::string &output) { + std::ifstream ifs(input, std::ios::binary); + cybozu::String str(std::istreambuf_iterator(ifs.rdbuf()), std::istreambuf_iterator()); + std::cerr << " chars:" << str.size() << std::endl; + + cybozu::ldig::replace(str, "\n", 1); // replace \n => \u0001 + cybozu::ldig::replace(str, "\t", 32); // replace \t => ' ' + + cybozu::ldig::Features result; + result.extract(str, 2); + std::cerr << " nodes:" << result.nodesize() << std::endl; + std::cerr << " maxsubst:" << result.size() << std::endl; + + std::ofstream ofs(output, std::ios::binary); + for (auto i=result.begin(), ie=result.end();i!=ie;++i) { + ofs << str.substr(i->begin, i->len) /* << "\t" << i->count */ << std::endl; } +} + + + + +int main(int argc, char* argv[]) +#ifndef _DEBUG + try +#endif +{ + int bound_feature_freq = 5; + LdigFloat eta = 0.1, reg = 0; + size_t cvn = 5, cvt = 2; + MODE mode = detection; + LdigFloat margin = -1; + std::string modelpath("ldig.model"), outputpath(""); + + std::vector files; + for(int i=1;i=argc) goto ERROR_OPT_FF; + bound_feature_freq = atoi(argv[i]); + } else if (st == "-e") { + if (++i>=argc) goto ERROR_OPT_E; + eta = atof(argv[i]); + } else if (st == "-r") { + if (++i>=argc) goto ERROR_OPT_R; + reg = atof(argv[i]); + } else if (st == "-m") { + modelpath = argv[++i]; + } else if (st == "-o") { + outputpath = argv[++i]; + } else if (st == "--init") { + mode = initialization; + } else if (st == "--learning") { + mode = learning; + } else if (st == "--detection") { + mode = detection; + } else if (st == "--shrink") { + mode = shrink; + } else if (st == "--maxsubst") { + mode = maxsubst; + } else if (st == "--dump") { + mode = dump; + } else if (st == "--cv") { + mode = varidation; + } else if (st == "--cvn") { + if (++i>=argc) goto ERROR_OPT_CV; + cvn = atoi(argv[i]); + } else if (st == "--cvt") { + if (++i>=argc) goto ERROR_OPT_CV; + cvt = atoi(argv[i]); + } else if (st == "--margin") { + if (++i>=argc) goto ERROR_OPT_MARGIN; + margin = atof(argv[i]); + } else { + files.push_back(st); + } + } + + switch (mode) { + case initialization: + ldig_init(modelpath, files, bound_feature_freq, eta, reg); + break; + + case learning: + std::cerr << "learning is not implemented yet" << std::endl; + break; + + case detection: + ldig_detect(modelpath, outputpath, files, margin); + break; + + case shrink: + ldig_shrink(modelpath); + break; + + case varidation: + ldig_varidation(files, outputpath, cvn, cvt, bound_feature_freq); + break; + + case maxsubst: // for debug + maxsubstring(files[0], files[1]); + break; + + case dump: + ldig_dump(modelpath, outputpath); + break; + } + + return 0; + + + + /* error */ + + char *p; +ERROR_OPT_E: + p = (char *)"[ERROR] -e option needs positive real number"; + goto ERROR_EXIT; + +ERROR_OPT_R: + p = (char *)"[ERROR] -r option needs positive real number"; + goto ERROR_EXIT; + +ERROR_OPT_FF: + p = (char *)"[ERROR] --ff option needs non-negative integer"; + goto ERROR_EXIT; + +ERROR_OPT_CV: + p = (char *)"[ERROR] --cvn/cvt option needs positive integer"; + goto ERROR_EXIT; + +ERROR_OPT_MARGIN: + p = (char *)"[ERROR] --margin option needs positive integer"; + goto ERROR_EXIT; + +ERROR_EXIT: + std::cerr << p << std::endl; + return 1; + +#ifndef _DEBUG +} catch (std::exception ex) { + printf("err = %s\n", ex.what()); +#endif +} diff --git a/ldig/ldigcpp/ldig/ldig.hpp b/ldig/ldigcpp/ldig/ldig.hpp new file mode 100644 index 0000000..de9a600 --- /dev/null +++ b/ldig/ldigcpp/ldig/ldig.hpp @@ -0,0 +1,363 @@ +#pragma once + +/** + @file + @brief language detection with infinite grams + + Copyright (C) 2013 Nakatani Shuyo / Cybozu Labs, Inc., all rights reserved. +*/ + +#include +#include +#include +#include "type.hpp" +#include "da.hpp" +#include "corpus.hpp" + +namespace cybozu { +namespace ldig { + +static const std::string nomatchedlabel = "**"; + +class Model { +public: + size_t K, M; + Features features; + DoubleArray trie; + std::unordered_map labelmap; + std::vector labellist; + std::vector params; + + Model() { + std::srand((unsigned int)time(0)); + } + + Model(const size_t K_) : K(K_) { + std::srand((unsigned int)time(0)); + } + + Model(const Corpus &corpus, const size_t bound_feature_freq) : K(corpus.labels().size()) { + for (auto i=corpus.texts().begin(), iend=corpus.texts().end();i!=iend;++i) { + labellist.push_back(i->label); + } + generate_labelmap(); + generate_features(corpus.text(), bound_feature_freq); + } + + void generate_features(const LdigString &str, size_t bound_feature_freq) { + features.extract(str, bound_feature_freq); + post_features(); + } + + void post_features() { + M = features.size(); + params.resize(K * M); + trie.construct(features); + } + + void generate_labellist() { + labellist.resize(K); + for (auto i=labelmap.begin(),ie=labelmap.end();i!=ie;++i) { + labellist[i->second] = i->first; + } + } + + void generate_labelmap() { + for(size_t k=0;k=K) return nomatchedlabel; + return labellist[k]; + }; + + size_t label(const std::string &st) const { + return labelmap.at(st); + } + + /* + @brief parameter update for unit test only ( same with inner code of learn method ) + */ + void update(const Events &events, const size_t label_k, const LdigFloat eta) { + std::vector y(K); + predict(y, events); + y[label_k] -= 1; + + for (auto k=y.begin(), ke=y.end(); k!=ke; ++k) *k *= eta; + for (auto i=events.begin(), ie=events.end(); i!=ie; ++i) { + auto j = params.begin() + i->first * K; + for (auto k=y.begin(), ke=y.end(); k!=ke; ++k, ++j) { + *j -= i->second * *k; + } + } + } + + void learn(const Corpus &corpus, LdigFloat eta, const LdigFloat reg = 0) { + const size_t D = corpus.maxsize(); + const size_t N = D * K; + std::vector index(N); + for (size_t i=0;i 0; + std::vector penalties; + LdigFloat uk = 0, alpha = 0; + if (withreg) { + penalties.resize(M * K); + alpha = pow(0.9, -1.0 / N); + } + const size_t N_WHOLE_REG = 100; + const size_t WHOLE_REG_INT = (N / N_WHOLE_REG) + 1; + + std::vector::iterator prm, pnl, pe=params.end(); + auto L1regularize = [&prm, &pnl, &uk](){ + LdigFloat w = *prm; + if (w > 0) { + LdigFloat w1 = w - uk - *pnl; + if (w1 > 0) { + *prm = w1; *pnl += w1 - w; + } else { + *prm = 0; *pnl -= w; + } + } else if (w < 0) { + LdigFloat w1 = w + uk - *pnl; + if (w1 < 0) { + *prm = w1; *pnl += w1 - w; + } else { + *prm = 0; *pnl -= w; + } + } + }; + + + const LdigString &fulltext = corpus.text(); + size_t ni = 0; + std::vector y(K); + for (auto n=index.begin(), ne=index.end(); n!=ne; ++n, ++ni) { + size_t label_k = *n / D; + const std::vector &v = corpus.texts()[label_k].vec; + + size_t r = *n % D; + if (r / v.size() == D / v.size()) { + r = std::rand() % v.size(); + } else { + r %= v.size(); + } + + Events events; + trie.extract_features(events, fulltext, v[r]); + y.clear(); + predict(y, events); + y[label_k] -= 1; // t_n - y_n + + if (withreg) { + // with L1 regularization + eta *= alpha; + uk += reg * eta / N; + for (auto k=y.begin(), ke=y.end(); k!=ke; ++k) *k *= eta; + + if ((N-ni) % WHOLE_REG_INT == 1) { + for (auto i=events.begin(), ie=events.end(); i!=ie; ++i) { + prm = params.begin() + i->first * K; + for (auto k=y.begin(), ke=y.end(); k!=ke; ++k, ++prm) { + *prm -= *k * i->second; + } + } + + for (prm=params.begin(), pnl=penalties.begin(); prm!=pe; ++prm, ++pnl) { + L1regularize(); + } + } else { + for (auto i=events.begin(), ie=events.end(); i!=ie; ++i) { + prm = params.begin() + i->first * K; + pnl = penalties.begin() + i->first * K; + for (auto k=y.begin(), ke=y.end(); k!=ke; ++k, ++prm, ++pnl) { + *prm -= *k * i->second; + L1regularize(); + } + } + } + + } else if (y[label_k] < -LdigAlmostZero) { + + // same with update(events, label_k, eta); + for (auto k=y.begin(), ke=y.end(); k!=ke; ++k) *k *= eta; + for (auto i=events.begin(), ie=events.end(); i!=ie; ++i) { + auto j = params.begin() + i->first * K; + for (auto k=y.begin(), ke=y.end(); k!=ke; ++k, ++j) { + *j -= *k * i->second; + } + } + } + } + } + + /* + @brief return negative log likelihood + */ + LdigFloat likelihood(std::vector &correct, const Corpus &corpus) { + if (correct.size() != corpus.texts().size()) correct.resize(corpus.texts().size()); + + const LdigString &fulltext = corpus.text(); + LdigFloat log_likelihood = 0; + const std::vector &texts = corpus.texts(); + auto cor = correct.begin(); + std::vector y(K); + for (auto k=texts.begin(), ke=texts.end();k!=ke;++k,++cor) { + const std::vector &v = k->vec; + const size_t label_k = labelmap.at(k->label); + for (auto t=v.begin(), te=v.end(); t!=te; ++t) { + Events events; + trie.extract_features(events, fulltext, *t); + + y.clear(); + size_t predict_k = predict(y, events); + if (predict_k == label_k) *cor += 1; + if (y[label_k] > 0) log_likelihood -= log(y[label_k]); + } + } + return log_likelihood; //exp(log_likelihood / corpus.size()); + } + + size_t predict(std::vector &result, const Events &events) const { + if (result.size() != K) result.resize(K); + + for (auto i=events.begin(), ie=events.end(); i!=ie; ++i) { + auto j = params.begin() + i->first * K; + for (auto k=result.begin(), ke=result.end(); k!=ke; ++k, ++j) { + *k += i->second * *j; + } + } + + LdigFloat max = LdigFloatMin; + size_t maxindex = 0; + for (auto k=result.begin(), ke=result.end(); k!=ke; ++k) { + if (max < *k) { + max = *k; + maxindex = k - result.begin(); + } + } + + LdigFloat sum = 0; + for (auto k=result.begin(), ke=result.end(); k!=ke; ++k) { + *k = exp(*k - max); + sum += *k; + } + + for (auto k=result.begin(), ke=result.end(); k!=ke; ++k) { + *k /= sum; + } + + return maxindex; + } + + size_t predict(std::vector &result, const LdigString &fulltext, const TextPos &pos) const { + Events events; + trie.extract_features(events, fulltext, pos); + return predict(result, events); + } + + size_t predict(std::vector &result, const LdigString &text) const { + Events events; + trie.extract_features(events, text); + return predict(result, events); + } + + + void save(std::ostream &ofs) const { + unsigned char size_size_t = sizeof(size_t); + ofs.write((const char*)(&size_size_t), sizeof(size_size_t)); + + const unsigned int K_ = (unsigned int)K, M_ = (unsigned int)M; + ofs.write((const char*)(&K_), sizeof(K_)); + ofs.write((const char*)(&M_), sizeof(M_)); + + for (auto i=labellist.begin(), ie=labellist.end();i!=ie;++i) { + unsigned char c = (unsigned char)i->size(); + ofs.write((const char*)(&c), sizeof(c)); + ofs.write(i->c_str(), i->size()); + } + + std::string text; + features.text().toUtf8(text); + const size_t textsize = text.size(); + ofs.write((const char*)(&textsize), sizeof(textsize)); + ofs.write(text.c_str(), textsize); + + ofs.write((const char*)&features[0], sizeof(Feature) * M); + ofs.write((const char*)¶ms[0], sizeof(LdigFloat) * K * M); + } + + void load(std::istream &ifs) { + int size_size_t = ifs.get(); + if (size_size_t != sizeof(size_t)) { + if (size_size_t==4) throw Exception("cannot load a model builded on 32 bit platform"); + throw Exception("cannot load a model builded on 64 bit platform"); + } + + unsigned int K_, M_; + ifs.read((char *)&K_, sizeof(K_)); + ifs.read((char *)&M_, sizeof(M_)); + K = K_; + M = M_; + + labellist.resize(K); + for (size_t k=0;k shrink() { + Features new_features; + new_features.settext(features.text()); + + std::vector new_params; + + std::vector summary(K+1); + auto p=params.begin(); + for(auto f=features.begin(), fe=features.end(); f!=fe; ++f, p+=K) { + size_t nonzeros = 0; + for (size_t k=0;k LdigAlmostZero) ++nonzeros; + } + ++summary[nonzeros]; + if (nonzeros > 0) { + new_features.push_back(*f); + //for (size_t k=0;k + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {006B0427-A9EF-4C07-B38E-F02312A0533C} + Win32Proj + ldig + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;NOMINMAX;_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..;../cybozulib/include + MultiThreadedDebug + + + + + Console + true + psapi.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + WIN32;NOMINMAX;_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..;../cybozulib/include + + + MultiThreadedDebug + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NOMINMAX;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..;../cybozulib/include + MultiThreaded + + + + + Console + true + true + true + psapi.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NOMINMAX;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..;../cybozulib/include + + + MultiThreaded + + + Console + true + true + true + psapi.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ldig/ldigcpp/ldig/type.hpp b/ldig/ldigcpp/ldig/type.hpp new file mode 100644 index 0000000..0279661 --- /dev/null +++ b/ldig/ldigcpp/ldig/type.hpp @@ -0,0 +1,60 @@ +#pragma once +/** + @file + @brief data type for ldig + + Copyright (C) 2013 Nakatani Shuyo / Cybozu Labs, Inc., all rights reserved. +*/ + +#include +#include +#include "cybozu/string.hpp" + +typedef double LdigFloat; +const double LdigFloatMin = DBL_MIN; +const double LdigAlmostZero = 1e-7; + +typedef cybozu::String LdigString; +typedef cybozu::Char LdigChar; + +namespace cybozu { +namespace ldig { + +typedef std::unordered_map Events; + +class Feature { +public: + size_t begin, len; + //unsigned int count; + Feature() {} + Feature(size_t b, size_t l, unsigned int c) : begin(b), len(l) {} + inline LdigString str(const LdigString& text) const { return text.substr(begin, len); } +}; + +class TextPos { +public: + size_t begin, len; + TextPos(size_t b, size_t l) : begin(b), len(l) {} +}; + +class TextVec +{ +public: + std::string label; + std::vector vec; + TextVec(const std::string &l) : label(l) {} +}; + +class Exception : public std::exception { +private: + std::string message_; +public: + Exception(const std::string& message) : message_(message) {} + virtual ~Exception() throw() {} + + virtual const char* what() const throw() { + return message_.c_str(); + } +}; + +}} diff --git a/ldig/ldigcpp/ldig/util.hpp b/ldig/ldigcpp/ldig/util.hpp new file mode 100644 index 0000000..24cd028 --- /dev/null +++ b/ldig/ldigcpp/ldig/util.hpp @@ -0,0 +1,31 @@ +#pragma once +/** + @file + @brief utility for ldig + + Copyright (C) 2013 Nakatani Shuyo / Cybozu Labs, Inc., all rights reserved. +*/ + +#include "cybozu/string.hpp" +#include "esaxx/esa.hxx" + +namespace cybozu { +namespace ldig { + +inline void replace(cybozu::String& str, const cybozu::String& from, cybozu::Char to) { + cybozu::String::size_type pos = 0; + while (pos = str.find(from, pos), pos != cybozu::String::npos) { + str[pos] = to; + ++pos; + } +} + +template +void printvec(const std::vector &vec) { + auto i=vec.begin(), iend=vec.end(); + std::cout << "( "; + for(;i!=iend;++i) std::cout << *i << " "; + std::cout << ")" << std::endl; +} + +}} diff --git a/ldig/ldigcpp/ldigtest/ldigtest.vcxproj b/ldig/ldigcpp/ldigtest/ldigtest.vcxproj new file mode 100644 index 0000000..0f95a8c --- /dev/null +++ b/ldig/ldigcpp/ldigtest/ldigtest.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {6A2AD6DB-6312-41C9-BED9-940F6BAC2130} + ldigtest + + + + Application + true + MultiByte + + + Application + true + MultiByte + + + Application + false + true + MultiByte + + + Application + false + true + MultiByte + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + WIN32;NOMINMAX;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..;../cybozulib/include;../ldig + MultiThreadedDebug + + + true + + + + + Level3 + Disabled + WIN32;NOMINMAX;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..;../cybozulib/include;../ldig + MultiThreadedDebug + + + true + + + + + Level3 + MaxSpeed + true + true + WIN32;NOMINMAX;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..;../cybozulib/include;../ldig + MultiThreaded + + + true + true + true + + + + + Level3 + MaxSpeed + true + true + WIN32;NOMINMAX;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..;../cybozulib/include;../ldig + MultiThreaded + + + true + true + true + + + + + + + + + + \ No newline at end of file diff --git a/ldig/ldigcpp/ldigtest/test_da.cpp b/ldig/ldigcpp/ldigtest/test_da.cpp new file mode 100644 index 0000000..d7d2efa --- /dev/null +++ b/ldig/ldigcpp/ldigtest/test_da.cpp @@ -0,0 +1,297 @@ +#define CYBOZU_TEST_DISABLE_AUTO_RUN +inline void worksize() {} + +#include +#ifdef __linux__ +#include +namespace std { + using boost::regex; + using boost::regex_match; + using boost::regex_search; + using boost::regex_iterator; + using boost::match_results; + using boost::smatch; +} +#else +#include +#endif +#include "cybozu/regex.hpp" +#include "cybozu/test.hpp" +#include "da.hpp" +#include "util.hpp" + +typedef cybozu::ldig::Feature Feature; + +CYBOZU_TEST_AUTO(test_doublearray1) +{ + cybozu::String st("cat"); + std::vector list; + list.push_back(Feature(0,3,1)); + cybozu::ldig::Features features(st, list); + cybozu::ldig::DoubleArray da(features); + CYBOZU_TEST_EQUAL(da.size(), 4); + CYBOZU_TEST_EQUAL(da.get("cat"), 0); + CYBOZU_TEST_EQUAL(da.get("ca"), -1); + CYBOZU_TEST_EQUAL(da.get(""), -1); + CYBOZU_TEST_EQUAL(da.get("catt"), -1); + CYBOZU_TEST_EQUAL(da.get("xxx"), -1); +} + +CYBOZU_TEST_AUTO(test_doublearray2) +{ + cybozu::String st("catdog"); + std::vector list; + list.push_back(Feature(0,3,1)); + list.push_back(Feature(3,3,1)); + cybozu::ldig::Features features(st, list); + cybozu::ldig::DoubleArray da(features); + + CYBOZU_TEST_EQUAL(da.size(), 7); + CYBOZU_TEST_EQUAL(da.get("cat"), 0); + CYBOZU_TEST_EQUAL(da.get("dog"), 1); + CYBOZU_TEST_EQUAL(da.get(""), -1); + CYBOZU_TEST_EQUAL(da.get("catt"), -1); + CYBOZU_TEST_EQUAL(da.get("xxx"), -1); +} + +CYBOZU_TEST_AUTO(test_doublearray3) +{ + cybozu::String st("catdogdeerfoxrat"); + std::vector list; + list.push_back(Feature(0,2,1)); //ca + list.push_back(Feature(0,3,1)); //cat + list.push_back(Feature(6,4,1)); //deer + list.push_back(Feature(3,3,1)); //dog + list.push_back(Feature(10,3,1)); //fox + list.push_back(Feature(13,3,1)); //rat + cybozu::ldig::Features features(st, list); + cybozu::ldig::DoubleArray da(features); + + CYBOZU_TEST_EQUAL(da.size(), 17); + CYBOZU_TEST_EQUAL(da.get("ca"), 0); + CYBOZU_TEST_EQUAL(da.get("cat"), 1); + CYBOZU_TEST_EQUAL(da.get("deer"), 2); + CYBOZU_TEST_EQUAL(da.get("dog"), 3); + CYBOZU_TEST_EQUAL(da.get("fox"), 4); + CYBOZU_TEST_EQUAL(da.get("rat"), 5); + + CYBOZU_TEST_EQUAL(da.get(""), -1); + CYBOZU_TEST_EQUAL(da.get("catt"), -1); + CYBOZU_TEST_EQUAL(da.get("xxx"), -1); + + { + cybozu::ldig::Events r; + da.extract_features(r, "cat"); + CYBOZU_TEST_EQUAL(r.size(), 2); + CYBOZU_TEST_EQUAL(r[0], 1); + CYBOZU_TEST_EQUAL(r[1], 1); + } + + { + cybozu::ldig::Events r; + da.extract_features(r, "deerat"); + CYBOZU_TEST_EQUAL(r.size(), 2); + CYBOZU_TEST_EQUAL(r[2], 1); + CYBOZU_TEST_EQUAL(r[5], 1); + } + + { + cybozu::ldig::Events r; + da.extract_features(r, "abcdef"); + CYBOZU_TEST_EQUAL(r.size(), 0); + } +} + +CYBOZU_TEST_AUTO(test_doublearray4) +{ + cybozu::String st(" pt"); + std::vector list; + list.push_back(Feature(0,1,1)); //' ' + list.push_back(Feature(0,2,1)); // p + list.push_back(Feature(1,1,1)); //p + list.push_back(Feature(2,1,1)); //t + + CYBOZU_TEST_EQUAL(list[0].str(st), " "); + CYBOZU_TEST_EQUAL(list[1].str(st), " p"); + CYBOZU_TEST_EQUAL(list[2].str(st), "p"); + CYBOZU_TEST_EQUAL(list[3].str(st), "t"); + + cybozu::ldig::Features features(st, list); + cybozu::ldig::DoubleArray da(features); + //cybozu::ldig::printvec(da.base); + //cybozu::ldig::printvec(da.check); + //cybozu::ldig::printvec(da.value); + + CYBOZU_TEST_EQUAL(da.size(), 86); + CYBOZU_TEST_EQUAL(da.get(" "), 0); + CYBOZU_TEST_EQUAL(da.get(" p"), 1); + CYBOZU_TEST_EQUAL(da.get("p"), 2); + CYBOZU_TEST_EQUAL(da.get("t"), 3); + CYBOZU_TEST_EQUAL(da.get("ca"), -1); + CYBOZU_TEST_EQUAL(da.get(""), -1); + CYBOZU_TEST_EQUAL(da.get("catt"), -1); + CYBOZU_TEST_EQUAL(da.get("xxx"), -1); +} + + +CYBOZU_TEST_AUTO(test_abracadabra) +{ + cybozu::String st("abracadabra"); + cybozu::ldig::Features features(st, 2); + +/* + printvec(ex.SA); + printvec(ex.L); + printvec(ex.R); + printvec(ex.D); + printvec(ex.rank); + CYBOZU_TEST_EQUAL(ex.len, 11); +*/ + //printvec(features); + CYBOZU_TEST_EQUAL(features.nodesize(), 5); + CYBOZU_TEST_EQUAL(features.size(), 2); + CYBOZU_TEST_EQUAL(features.str(0), "a"); + CYBOZU_TEST_EQUAL(features[0].begin, 10); + CYBOZU_TEST_EQUAL(features[0].len, 1); + //CYBOZU_TEST_EQUAL(features[0].count, 5); + CYBOZU_TEST_EQUAL(features.str(1), "abra"); + CYBOZU_TEST_EQUAL(features[1].begin, 7); + CYBOZU_TEST_EQUAL(features[1].len, 4); + //CYBOZU_TEST_EQUAL(features[1].count, 2); + + // shrink features + cybozu::ldig::Features new_features(features); + CYBOZU_TEST_EQUAL(new_features.nodesize(), 0); + CYBOZU_TEST_EQUAL(new_features.size(), 2); + CYBOZU_TEST_EQUAL(new_features.text(), "abra"); + CYBOZU_TEST_EQUAL(new_features.str(0), "a"); + //CYBOZU_TEST_EQUAL(new_features[0].count, 5); + CYBOZU_TEST_EQUAL(new_features.str(1), "abra"); + //CYBOZU_TEST_EQUAL(new_features[1].count, 2); + + + +} + +CYBOZU_TEST_AUTO(test_shrink1) +{ + cybozu::String st("abracadabra"); + std::vector list; + list.push_back(Feature(10,1,1)); + list.push_back(Feature(7,4,1)); + cybozu::ldig::Features features(st, list); + + CYBOZU_TEST_EQUAL(features[0].begin, 10); + CYBOZU_TEST_EQUAL(features[0].len, 1); + CYBOZU_TEST_EQUAL(features[1].begin, 7); + CYBOZU_TEST_EQUAL(features[1].len, 4); + + features.shrink(); + CYBOZU_TEST_EQUAL(features.size(), 2); + CYBOZU_TEST_EQUAL(features.text(), "abra"); + CYBOZU_TEST_EQUAL(features[0].begin, 3); + CYBOZU_TEST_EQUAL(features[0].len, 1); + CYBOZU_TEST_EQUAL(features[1].begin, 0); + CYBOZU_TEST_EQUAL(features[1].len, 4); +} + +CYBOZU_TEST_AUTO(test_shrink2) +{ + cybozu::String st("abracadabraxy"); + std::vector list; + list.push_back(Feature(7,4,1)); + list.push_back(Feature(12,1,1)); + cybozu::ldig::Features features(st, list); + + features.shrink(); + CYBOZU_TEST_EQUAL(features.size(), 2); + CYBOZU_TEST_EQUAL(features.text(), "abray"); + CYBOZU_TEST_EQUAL(features[0].begin, 0); + CYBOZU_TEST_EQUAL(features[0].len, 4); + CYBOZU_TEST_EQUAL(features[1].begin, 4); + CYBOZU_TEST_EQUAL(features[1].len, 1); +} + +CYBOZU_TEST_AUTO(test_shrink3) +{ + cybozu::String st("abracadabraay"); + std::vector list; + list.push_back(Feature(10,3,1)); + list.push_back(Feature(7,4,1)); + cybozu::ldig::Features features(st, list); + + features.shrink(); + CYBOZU_TEST_EQUAL(features.size(), 2); + CYBOZU_TEST_EQUAL(features.text(), "abraay"); + CYBOZU_TEST_EQUAL(features[0].begin, 3); + CYBOZU_TEST_EQUAL(features[0].len, 3); + CYBOZU_TEST_EQUAL(features[1].begin, 0); + CYBOZU_TEST_EQUAL(features[1].len, 4); +} + +CYBOZU_TEST_AUTO(test_long) +{ + cybozu::String st(CYBOZU_RE("Felicitar al cap de seguretat x fer cada dia més difícil l'accès al Camp Nou.Ès + fàcil arribar a una roda de premsa a T ...")); + cybozu::ldig::Features features(st, 2); + + CYBOZU_TEST_EQUAL(features.nodesize(), 46); + CYBOZU_TEST_EQUAL(features.size(), 34); + CYBOZU_TEST_EQUAL(features.str(0), " a"); + CYBOZU_TEST_EQUAL(features.str(1), " a "); + CYBOZU_TEST_EQUAL(features.str(2), " al "); + CYBOZU_TEST_EQUAL(features.str(3), " ca"); + CYBOZU_TEST_EQUAL(features.str(4), " d"); + CYBOZU_TEST_EQUAL(features.str(5), " de "); + CYBOZU_TEST_EQUAL(features.str(6), " di"); + CYBOZU_TEST_EQUAL(features.str(7), " f"); + + cybozu::ldig::Features new_features(features); + //CYBOZU_TEST_EQUAL(new_features.text(), " al de cil ar ada d a ca di fp res tamou"); + CYBOZU_TEST_EQUAL(new_features.str(0), " a"); + CYBOZU_TEST_EQUAL(new_features.str(1), " a "); + CYBOZU_TEST_EQUAL(new_features.str(2), " al "); + CYBOZU_TEST_EQUAL(new_features.str(3), " ca"); + CYBOZU_TEST_EQUAL(new_features.str(4), " d"); + CYBOZU_TEST_EQUAL(new_features.str(5), " de "); + CYBOZU_TEST_EQUAL(new_features.str(6), " di"); + CYBOZU_TEST_EQUAL(new_features.str(7), " f"); + + features.shrink(); + CYBOZU_TEST_EQUAL(features.text(), "tat f ca di al mp us cil ar aoda de re a "); + CYBOZU_TEST_EQUAL(features.str(0), " a"); + CYBOZU_TEST_EQUAL(features.str(1), " a "); + CYBOZU_TEST_EQUAL(features.str(2), " al "); + CYBOZU_TEST_EQUAL(features.str(3), " ca"); + CYBOZU_TEST_EQUAL(features.str(4), " d"); + CYBOZU_TEST_EQUAL(features.str(5), " de "); + CYBOZU_TEST_EQUAL(features.str(6), " di"); + CYBOZU_TEST_EQUAL(features.str(7), " f"); +} + + + + +CYBOZU_TEST_AUTO(test_regex) +{ + std::regex re("^(\\S+)(\\t[^\\t\\n]+)*\\t([^\\t\\n]+)$"); + std::string st("en\tThis is a pen.\nen\thoge\tfuga\tI have no idea.\n"); + std::regex_iterator i( st.begin(), st.end(), re ), iend; + CYBOZU_TEST_EQUAL((*i)[1].str(), "en"); + CYBOZU_TEST_EQUAL((*i)[3].str(), "This is a pen."); + ++i; + CYBOZU_TEST_EQUAL((*i)[1].str(), "en"); + CYBOZU_TEST_EQUAL((*i)[3].str(), "I have no idea."); + ++i; + CYBOZU_TEST_ASSERT(i == iend); + +} + +CYBOZU_TEST_AUTO(test_cybozu_regex) +{ + cybozu::regex r("h([aiueo])(h\\1)+"); + const cybozu::String input("haha hihi hahu"); + const cybozu::String fmt("x"); + cybozu::String s; + s = cybozu::regex_replace(input, r, fmt); + CYBOZU_TEST_EQUAL(s, "x x hahu"); +} diff --git a/ldig/ldigcpp/ldigtest/test_maxsubst.cpp b/ldig/ldigcpp/ldigtest/test_maxsubst.cpp new file mode 100644 index 0000000..3936348 --- /dev/null +++ b/ldig/ldigcpp/ldigtest/test_maxsubst.cpp @@ -0,0 +1,98 @@ +#include +#include "cybozu/test.hpp" +#include + +#include "da.hpp" +#include "util.hpp" + +CYBOZU_TEST_AUTO(test_abracadabra) +{ + cybozu::String st("abracadabra"); + cybozu::ldig::Features features(st, 2); + +/* + printvec(ex.SA); + printvec(ex.L); + printvec(ex.R); + printvec(ex.D); + printvec(ex.rank); + CYBOZU_TEST_EQUAL(ex.len, 11); +*/ + //printvec(features); + CYBOZU_TEST_EQUAL(features.nodesize(), 5); + CYBOZU_TEST_EQUAL(features.size(), 2); + CYBOZU_TEST_EQUAL(features.str(0), "a"); + CYBOZU_TEST_EQUAL(features[0].count, 5); + CYBOZU_TEST_EQUAL(features.str(1), "abra"); + CYBOZU_TEST_EQUAL(features[1].count, 2); + + // shrink features + cybozu::ldig::Features new_features(features); + CYBOZU_TEST_EQUAL(new_features.nodesize(), 0); + CYBOZU_TEST_EQUAL(new_features.size(), 2); + CYBOZU_TEST_EQUAL(new_features.text(), "abra"); + CYBOZU_TEST_EQUAL(new_features.str(0), "a"); + CYBOZU_TEST_EQUAL(new_features[0].count, 5); + CYBOZU_TEST_EQUAL(new_features.str(1), "abra"); + CYBOZU_TEST_EQUAL(new_features[1].count, 2); + + + +} + +CYBOZU_TEST_AUTO(test_long) +{ + cybozu::String st(CYBOZU_RE("Felicitar al cap de seguretat x fer cada dia més difícil l'accès al Camp Nou.Ès + fàcil arribar a una roda de premsa a T ...")); + cybozu::ldig::Features features(st, 2); + + CYBOZU_TEST_EQUAL(features.nodesize(), 46); + CYBOZU_TEST_EQUAL(features.size(), 34); + CYBOZU_TEST_EQUAL(features.str(0), " a"); + CYBOZU_TEST_EQUAL(features.str(1), " a "); + CYBOZU_TEST_EQUAL(features.str(2), " al "); + CYBOZU_TEST_EQUAL(features.str(3), " ca"); + CYBOZU_TEST_EQUAL(features.str(4), " d"); + CYBOZU_TEST_EQUAL(features.str(5), " de "); + CYBOZU_TEST_EQUAL(features.str(6), " di"); + CYBOZU_TEST_EQUAL(features.str(7), " f"); + + cybozu::ldig::Features new_features(features); + CYBOZU_TEST_EQUAL(new_features.text(), " al de cil ar ada d a ca di fp res tamou"); + CYBOZU_TEST_EQUAL(new_features.str(0), " a"); + CYBOZU_TEST_EQUAL(new_features.str(1), " a "); + CYBOZU_TEST_EQUAL(new_features.str(2), " al "); + CYBOZU_TEST_EQUAL(new_features.str(3), " ca"); + CYBOZU_TEST_EQUAL(new_features.str(4), " d"); + CYBOZU_TEST_EQUAL(new_features.str(5), " de "); + CYBOZU_TEST_EQUAL(new_features.str(6), " di"); + CYBOZU_TEST_EQUAL(new_features.str(7), " f"); +} + + + + + +CYBOZU_TEST_AUTO(test_regex) +{ + std::regex re("^(\\S+)(\\t[^\\t\\n]+)*\\t([^\\t\\n]+)$"); + std::string st("en\tThis is a pen.\nen\thoge\tfuga\tI have no idea.\n"); + std::regex_iterator i( st.begin(), st.end(), re ), iend; + CYBOZU_TEST_EQUAL((*i)[1].str(), "en"); + CYBOZU_TEST_EQUAL((*i)[3].str(), "This is a pen."); + ++i; + CYBOZU_TEST_EQUAL((*i)[1].str(), "en"); + CYBOZU_TEST_EQUAL((*i)[3].str(), "I have no idea."); + ++i; + CYBOZU_TEST_ASSERT(i == iend); + +} + +CYBOZU_TEST_AUTO(test_cybozu_regex) +{ + cybozu::regex r("h([aiueo])(h\\1)+"); + const cybozu::String input("haha hihi hahu"); + const cybozu::String fmt("x"); + cybozu::String s; + s = cybozu::regex_replace(input, r, fmt); + CYBOZU_TEST_EQUAL(s, "x x hahu"); +} diff --git a/ldig/ldigcpp/ldigtest/test_model.cpp b/ldig/ldigcpp/ldigtest/test_model.cpp new file mode 100644 index 0000000..47e636c --- /dev/null +++ b/ldig/ldigcpp/ldigtest/test_model.cpp @@ -0,0 +1,293 @@ +#include +#include +#include "cybozu/test.hpp" +#include "type.hpp" +#include "da.hpp" +#include "util.hpp" +#include "ldig.hpp" + +/* +void printevents(const cybozu::ldig::Events &events, const cybozu::ldig::DoubleArray &da, const LdigString &fulltext) { + for (auto e=events.begin(), ee=events.end();e!=ee;++e) { + const cybozu::ldig::Feature &f = da.features().at(e->first); + std::cout << "|" << e->first << ":" << fulltext.substr(f.begin, f.len); + } + std::cout << "|" << std::endl; +} +*/ + +CYBOZU_TEST_AUTO(test_corpus) +{ + cybozu::ldig::Corpus corpus; + corpus.addText("en", "This is a pen."); + corpus.addText("en", "It is rainy today."); + corpus.postLoad(); + + const cybozu::String &text = corpus.text(); + CYBOZU_TEST_EQUAL(text, "\x01this is a pen.\x01It is rainy today.\x01"); + const std::vector &texts = corpus.texts()[0].vec; + CYBOZU_TEST_EQUAL(texts.size(), 2); + { + const cybozu::ldig::TextPos &x = texts[0]; + CYBOZU_TEST_EQUAL( text.substr(x.begin, x.len), "\x01this is a pen.\x01"); + } + { + const cybozu::ldig::TextPos &x = texts[1]; + CYBOZU_TEST_EQUAL( text.substr(x.begin, x.len), "\x01It is rainy today.\x01"); + } +} + +CYBOZU_TEST_AUTO(test_normalization) +{ + cybozu::ldig::Corpus corpus; + corpus.addText("en", "@ ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"); + corpus.addText("en", "`abcdefghijklmnopqrstuvwxyz{|}~"); + corpus.addText("en", CYBOZU_RE("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß")); + corpus.addText("en", CYBOZU_RE("ĀāĂ㥹ĆćĈĉĊċČčĎď")); + corpus.addText("en", CYBOZU_RE("ĐđĒēĔĕĖėĘęĚěĜĝĞğ")); + corpus.addText("en", CYBOZU_RE("ĠġĢģĤĥĦħĨĩĪīĬĭĮį")); + corpus.addText("en", CYBOZU_RE("İıIJijĴĵĶķĸĹĺĻļĽľĿ")); + corpus.addText("en", CYBOZU_RE("ŀŁłŃńŅņŇňʼnŊŋŌōŎŏ")); + corpus.addText("en", CYBOZU_RE("ŐőŒœŔŕŖŗŘřŚśŜŝŞş")); + corpus.addText("en", CYBOZU_RE("ŠšŢţŤťŦŧŨũŪūŬŭŮů")); + corpus.addText("en", CYBOZU_RE("ŰűŲųŴŵŶŷŸŹźŻżŽžſ")); + corpus.addText("en", CYBOZU_RE("ƠơƯưȘșȚț")); + corpus.addText("en", CYBOZU_RE("ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪ")); + corpus.addText("en", CYBOZU_RE("АБВГДЕЖЗИЙКЛМНОП")); + corpus.addText("en", CYBOZU_RE("РСТУФХЦЧШЩЪЫЬЭЮЯ")); + corpus.addText("en", CYBOZU_RE("ẠạẢảẤấẦầẨẩẪẫẬậẮắ")); + corpus.addText("en", CYBOZU_RE("ẰằẲẳẴẵẶặẸẹẺẻẼẽẾế")); + corpus.addText("en", CYBOZU_RE("ỀềỂểỄễỆệỈỉỊịỌọỎỏ")); + corpus.addText("en", CYBOZU_RE("ỐốỒồỔổỖỗỘộỚớỜờỞở")); + corpus.addText("en", CYBOZU_RE("ỠỡỢợỤụỦủỨứỪừỬửỮữ")); + corpus.addText("en", CYBOZU_RE("ỰựỲỳỴỵỶỷỸỹ")); + corpus.addText("en", CYBOZU_RE("A\u0300A\u0301A\u0303A\u0309A\u0323E\u0300E\u0301E\u0303E\u0309E\u0323I\u0300I\u0301I\u0303I\u0309I\u0323")); + corpus.addText("en", CYBOZU_RE("O\u0300O\u0301O\u0303O\u0309O\u0323U\u0300U\u0301U\u0303U\u0309U\u0323Y\u0300Y\u0301Y\u0303Y\u0309Y\u0323")); + corpus.addText("en", CYBOZU_RE("a\u0300a\u0301a\u0303a\u0309a\u0323e\u0300e\u0301e\u0303e\u0309e\u0323i\u0300i\u0301i\u0303i\u0309i\u0323")); + corpus.addText("en", CYBOZU_RE("o\u0300o\u0301o\u0303o\u0309o\u0323u\u0300u\u0301u\u0303u\u0309u\u0323y\u0300y\u0301y\u0303y\u0309y\u0323")); + corpus.addText("en", CYBOZU_RE("\u00c2\u0300\u00c2\u0301\u00c2\u0303\u00c2\u0309\u00c2\u0323\u00ca\u0300\u00ca\u0301\u00ca\u0303\u00ca\u0309\u00ca\u0323\u00d4\u0300\u00d4\u0301\u00d4\u0303\u00d4\u0309\u00d4\u0323")); + corpus.addText("en", CYBOZU_RE("\u00e2\u0300\u00e2\u0301\u00e2\u0303\u00e2\u0309\u00e2\u0323\u00ea\u0300\u00ea\u0301\u00ea\u0303\u00ea\u0309\u00ea\u0323\u00f4\u0300\u00f4\u0301\u00f4\u0303\u00f4\u0309\u00f4\u0323")); + corpus.addText("en", CYBOZU_RE("\u0102\u0300\u0102\u0301\u0102\u0303\u0102\u0309\u0102\u0323\u0103\u0300\u0103\u0301\u0103\u0303\u0103\u0309\u0103\u0323\u01a0\u0300\u01a0\u0301\u01a0\u0303\u01a0\u0309\u01a0\u0323")); + corpus.addText("en", CYBOZU_RE("\u01a1\u0300\u01a1\u0301\u01a1\u0303\u01a1\u0309\u01a1\u0323\u01af\u0300\u01af\u0301\u01af\u0303\u01af\u0309\u01af\u0323\u01b0\u0300\u01b0\u0301\u01b0\u0303\u01b0\u0309\u01b0\u0323")); + + corpus.addText("en", "ahahahah"); + corpus.addText("en", "hahha"); + corpus.addText("en", "hahaa"); + corpus.addText("en", "ahahahahhahahhahahaaaa"); + corpus.addText("en", "jajjajajaja"); + corpus.addText("en", "Jejeje"); + corpus.addText("en", "Bin dia!!! :-)"); + corpus.postLoad(); + + const cybozu::String &text = corpus.text(); + const std::vector &texts = corpus.texts()[0].vec; + auto checkstr = [&](size_t index, const cybozu::String &st) { + const cybozu::ldig::TextPos &x = texts[index]; + CYBOZU_TEST_EQUAL( text.substr(x.begin, x.len), st); + }; + + checkstr(0, "\x01@ abcdefghIjklmnopqrstuvwxyz[\\]^_\x01"); // I isn't lowerized for Turkish + checkstr(1, CYBOZU_RE("\x01`abcdefghijklmnopqrstuvwxyz{|}~\x01")); + checkstr(2, CYBOZU_RE("\x01àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþß\x01")); + checkstr(3, CYBOZU_RE("\x01āāăăąąććĉĉċċččďď\x01")); + checkstr(4, CYBOZU_RE("\x01đđēēĕĕėėęęěěĝĝğğ\x01")); + checkstr(5, CYBOZU_RE("\x01ġġģģĥĥħħĩĩīīĭĭįį\x01")); + checkstr(6, CYBOZU_RE("\x01İıijijĵĵķķĸĺĺļļľľŀ\x01")); // İ isn't lowerized for Turkish + checkstr(7, CYBOZU_RE("\x01ŀłłńńņņňňʼnŋŋōōŏŏ\x01")); + checkstr(8, CYBOZU_RE("\x01őőœœŕŕŗŗřřśśŝŝşş\x01")); // U+017f is nomalized to 's' + checkstr(9, CYBOZU_RE("\x01ššţţťťŧŧũũūūŭŭůů\x01")); + checkstr(10, CYBOZU_RE("\x01űűųųŵŵŷŷÿźźżżžžs\x01")); + checkstr(11, CYBOZU_RE("\x01ơơưưşşţţ\x01")); // for Romanian + checkstr(12, CYBOZU_RE("\x01αβγδεζηθικλμνξοπρστυφχψωϊ\x01")); + checkstr(13, CYBOZU_RE("\x01абвгдежзийклмноп\x01")); + checkstr(14, CYBOZU_RE("\x01рстуфхцчшщъыьэюя\x01")); + checkstr(15, CYBOZU_RE("\x01ạạảảấấầầẩẩẫẫậậắắ\x01")); + checkstr(16, CYBOZU_RE("\x01ằằẳẳẵẵặặẹẹẻẻẽẽếế\x01")); + checkstr(17, CYBOZU_RE("\x01ềềểểễễệệỉỉịịọọỏỏ\x01")); + checkstr(18, CYBOZU_RE("\x01ốốồồổổỗỗộộớớờờởở\x01")); + checkstr(19, CYBOZU_RE("\x01ỡỡợợụụủủứứừừửửữữ\x01")); + checkstr(20, CYBOZU_RE("\x01ựựỳỳỵỵỷỷỹỹ\x01")); + checkstr(21, CYBOZU_RE("\x01àáãảạèéẽẻẹìíĩỉị\x01")); + checkstr(22, CYBOZU_RE("\x01òóõỏọùúũủụỳýỹỷỵ\x01")); + checkstr(23, CYBOZU_RE("\x01àáãảạèéẽẻẹìíĩỉị\x01")); + checkstr(24, CYBOZU_RE("\x01òóõỏọùúũủụỳýỹỷỵ\x01")); + checkstr(25, CYBOZU_RE("\x01ầấẫẩậềếễểệồốỗổộ\x01")); + checkstr(26, CYBOZU_RE("\x01ầấẫẩậềếễểệồốỗổộ\x01")); + checkstr(27, CYBOZU_RE("\x01ằắẵẳặằắẵẳặờớỡởợ\x01")); + checkstr(28, CYBOZU_RE("\x01ờớỡởợừứữửựừứữửự\x01")); + + checkstr(29, "\x01" "ahahah\x01"); + checkstr(30, CYBOZU_RE("\x01haha\x01")); + checkstr(31, CYBOZU_RE("\x01haha\x01")); + checkstr(32, "\x01" "ahaha\x01"); + checkstr(33, CYBOZU_RE("\x01jaja\x01")); + checkstr(34, CYBOZU_RE("\x01jeje\x01")); + checkstr(35, "\x01" "bin dia!! \x01"); + + auto assert_normalize = [](const cybozu::String &in, const cybozu::String &out) { + cybozu::String s; + cybozu::ldig::normalize(s, in); + CYBOZU_TEST_EQUAL(s, out); + }; + cybozu::String s, s1(CYBOZU_RE("ѐёѓєѕіїјљњќџґ")); + cybozu::ldig::normalize(s, CYBOZU_RE("ЀЁЃЄЅІЇЈЉЊЌЏҐ")); + CYBOZU_TEST_EQUAL(s, s1); + cybozu::ldig::normalize(s, s1); + CYBOZU_TEST_EQUAL(s, s1); + + assert_normalize(CYBOZU_RE("ČŠŽčšž"), CYBOZU_RE("čšžčšž")); +} + + +CYBOZU_TEST_AUTO(test_model1) +{ + const size_t K = 2; + cybozu::ldig::Model model(K); + + model.features.settext(LdigString("catdog")); + model.features.push_back(cybozu::ldig::Feature(0,3,1)); + model.features.push_back(cybozu::ldig::Feature(3,3,1)); + model.post_features(); + + CYBOZU_TEST_EQUAL(model.K, K); + CYBOZU_TEST_EQUAL(model.M, 2); + + cybozu::ldig::Events events; + events[0] = 1; + std::vector prob(K); + model.predict(prob, events); + CYBOZU_TEST_EQUAL(prob.size(), K); + CYBOZU_TEST_NEAR(prob[0], 0.5, 1e-7); + CYBOZU_TEST_NEAR(prob[1], 0.5, 1e-7); + + model.params[0] = 0.1; + prob.clear(); + model.predict(prob, events); + CYBOZU_TEST_EQUAL(prob.size(), K); + CYBOZU_TEST_NEAR(prob[0]+prob[1], 1.0, 1e-7); + CYBOZU_TEST_NEAR(prob[0], exp(0.1)/(exp(0.1)+1), 1e-7); // 0.52497918747 + CYBOZU_TEST_NEAR(prob[1], 1.0/(exp(0.1)+1), 1e-7); // 0.47502081252 + + cybozu::ldig::Events events1; + events1[1] = 1; + model.update(events1, 1, 0.1); + CYBOZU_TEST_NEAR(model.params[0], 0.1, 1e-7); + CYBOZU_TEST_NEAR(model.params[1], 0.0, 1e-7); + CYBOZU_TEST_NEAR(model.params[2], -0.05, 1e-7); + CYBOZU_TEST_NEAR(model.params[3], 0.05, 1e-7); + + prob.clear(); + model.predict(prob, events1); + + CYBOZU_TEST_NEAR(prob[0]+prob[1], 1.0, 1e-7); + CYBOZU_TEST_NEAR(prob[0], exp(-0.05)/(exp(-0.05)+exp(0.05)), 1e-7); + CYBOZU_TEST_NEAR(prob[1], exp( 0.05)/(exp(-0.05)+exp(0.05)), 1e-7); +} + +CYBOZU_TEST_AUTO(test_model1a) +{ + const size_t K = 3; + + + cybozu::ldig::Model model(K); + model.features.settext(LdigString("catdog")); + model.features.push_back(cybozu::ldig::Feature(1,1,1)); // a + model.features.push_back(cybozu::ldig::Feature(1,2,1)); // at + model.features.push_back(cybozu::ldig::Feature(0,1,1)); // c + model.features.push_back(cybozu::ldig::Feature(0,2,1)); // ca + model.features.push_back(cybozu::ldig::Feature(0,3,1)); // cat + model.features.push_back(cybozu::ldig::Feature(3,1,1)); // d + model.features.push_back(cybozu::ldig::Feature(3,3,1)); // dog + model.features.push_back(cybozu::ldig::Feature(5,1,1)); // g + model.features.push_back(cybozu::ldig::Feature(4,1,1)); // o + model.features.push_back(cybozu::ldig::Feature(2,1,1)); // t + model.post_features(); + + CYBOZU_TEST_EQUAL(model.K, K); + CYBOZU_TEST_EQUAL(model.M, 10); + + cybozu::ldig::Events events; + events[0] = 1; + events[1] = 2; + events[2] = 1; + std::vector prob(K); + model.predict(prob, events); + CYBOZU_TEST_EQUAL(prob.size(), K); + CYBOZU_TEST_NEAR(prob[0], 0.3333333, 1e-7); + CYBOZU_TEST_NEAR(prob[1], 0.3333333, 1e-7); + CYBOZU_TEST_NEAR(prob[2], 0.3333333, 1e-7); + + model.update(events, 1, 0.1); + //cybozu::ldig::printvec(model.params); + + prob.clear(); + size_t predict_k = model.predict(prob, events); + + CYBOZU_TEST_EQUAL(predict_k, 1); + CYBOZU_TEST_NEAR(prob[0]+prob[1]+prob[2], 1.0, 1e-7); + CYBOZU_TEST_NEAR(prob[0], 0.26163499, 1e-7); + CYBOZU_TEST_NEAR(prob[1], 0.47673003, 1e-7); + CYBOZU_TEST_NEAR(prob[2], 0.26163499, 1e-7); +} + + +CYBOZU_TEST_AUTO(test_model2) +{ + cybozu::ldig::Corpus corpus; + corpus.addText("en", "This is a pen."); + corpus.addText("en", "It is rainy today."); + corpus.addText("en", "Good morning! Have a nice day!"); + corpus.addText("it", "Come ti va la vita?"); + corpus.addText("it", "Molto bene, bravi tutti."); + corpus.addText("it", "Grazie mille allora!"); + corpus.postLoad(); + + + auto& fulltext = corpus.text(); + auto& texts = corpus.texts(); + + const size_t K = 2; + cybozu::ldig::Model model(K); + model.generate_features(fulltext, 2); + model.labelmap = corpus.labels(); + model.generate_labellist(); + + { + std::vector correct; + CYBOZU_TEST_NEAR(model.likelihood(correct, corpus), 6 * log(2.0), 1e-7); + } + + CYBOZU_TEST_EQUAL(texts[0].label, "en"); + + auto testOneEvents = [&texts, &fulltext, &model](const size_t k, const size_t j) { + auto &pos = texts[k].vec[j]; + cybozu::ldig::Events events; + model.trie.extract_features(events, fulltext, pos); + //CYBOZU_TEST_EQUAL(events.size(), 10); + + std::vector prob1; + model.predict(prob1, events); + + //printevents(events, da, fulltext); + model.update(events, k, 0.1); + + std::vector prob2; + model.predict(prob2, events); + + //std::cout << prob2[k] << " > " << prob1[k] << std::endl; + CYBOZU_TEST_ASSERT(1.0-prob1[k]<1e-7 || prob2[k] > prob1[k]); + + }; + + for (size_t i=0;i<10;++i) { + testOneEvents(0,0); + testOneEvents(1,0); + testOneEvents(0,1); + testOneEvents(1,1); + testOneEvents(0,2); + testOneEvents(1,2); + + std::vector correct; + //std::cout << model.likelihood(correct, corpus) << std::endl; + } +} diff --git a/ldig/ldigcpp/readme.md b/ldig/ldigcpp/readme.md new file mode 100644 index 0000000..985d1dd --- /dev/null +++ b/ldig/ldigcpp/readme.md @@ -0,0 +1,113 @@ +ldigcpp (Language Detection with Infinity Gram for C++) +====================== + +This is a prototype of language detection for short message service (twitter). +with about 99% accuracy for 50 languages + + +Build +------ + +1. Clone ldig project and checkout cpp branch + + ```sh + git clone https://github.com/shuyo/ldig.git + cd ldig + git checkout cpp + cd ldigcpp + ``` + +2. Clone cybozulib + + ```sh + git clone https://github.com/herumi/cybozulib.git + ``` + +3. Make + + ```sh + cmake . + make + ``` + + +Usage +------ + +1. Extract model archive + xz -dk lang50.x64.model.xz + +2. Detect + ldig.py -m lang50.x64.model [text data file] + + +Data format +------ + +See https://github.com/shuyo/ldig/tree/cpp#data-format + + + +Supported Languages +------ + +In lang50.x64.model + +- ar Arabic +- ar-bz Arabizi (Arabic chat alphabet) +- bg Bulgarian +- bn Bengali +- ca Catalan +- cs Czech +- da Dannish +- de German +- dv Maldivian (Dhivehi) +- el Greek +- en English +- es Spanish +- et Estonian +- fa Persian (Farsi) +- fi Finnish +- fr French +- gu Gujarati +- he Hebrew +- hi Hindi +- hr Croatian (Hrvatski) +- hu Hungrian +- id Indonesian +- it Italian +- ja Japanese +- ko Korean +- lt Lithuanian +- lv Latvian +- mk Macedonian +- ml Malayalam +- mn Mongolian +- nl Dutch +- no Norwegian +- pa Punjabi +- pl Polish +- pt Portuguese +- ro Romanian +- ru Russian +- si Sinhala +- sq Albanian +- sv Swedish +- ta Tamil +- te Telugu +- th Thai +- tl Tagalog +- tr Turkish +- uk Ukrainian +- ur Urdu +- vi Vietnamese +- zh-cn Simplified Chinese +- zh-tw Traditional Chinese + + + +Copyright & License +----- +- (c)2013-2014 Nakatani Shuyo / Cybozu Labs Inc. All rights reserved. +- All codes and resources are available under the MIT License. + diff --git a/maxsubst/cybozulib/include/cybozu/exception.hpp b/ldig/maxsubst/cybozulib/include/cybozu/exception.hpp similarity index 100% rename from maxsubst/cybozulib/include/cybozu/exception.hpp rename to ldig/maxsubst/cybozulib/include/cybozu/exception.hpp diff --git a/maxsubst/cybozulib/include/cybozu/inttype.hpp b/ldig/maxsubst/cybozulib/include/cybozu/inttype.hpp similarity index 100% rename from maxsubst/cybozulib/include/cybozu/inttype.hpp rename to ldig/maxsubst/cybozulib/include/cybozu/inttype.hpp diff --git a/maxsubst/cybozulib/include/cybozu/itoa.hpp b/ldig/maxsubst/cybozulib/include/cybozu/itoa.hpp similarity index 100% rename from maxsubst/cybozulib/include/cybozu/itoa.hpp rename to ldig/maxsubst/cybozulib/include/cybozu/itoa.hpp diff --git a/maxsubst/cybozulib/include/cybozu/string.hpp b/ldig/maxsubst/cybozulib/include/cybozu/string.hpp similarity index 100% rename from maxsubst/cybozulib/include/cybozu/string.hpp rename to ldig/maxsubst/cybozulib/include/cybozu/string.hpp diff --git a/maxsubst/esa.hxx b/ldig/maxsubst/esa.hxx similarity index 100% rename from maxsubst/esa.hxx rename to ldig/maxsubst/esa.hxx diff --git a/maxsubst/maxsubst.cpp b/ldig/maxsubst/maxsubst.cpp similarity index 100% rename from maxsubst/maxsubst.cpp rename to ldig/maxsubst/maxsubst.cpp diff --git a/maxsubst/maxsubst.vcproj b/ldig/maxsubst/maxsubst.vcproj similarity index 100% rename from maxsubst/maxsubst.vcproj rename to ldig/maxsubst/maxsubst.vcproj diff --git a/maxsubst/readme.md b/ldig/maxsubst/readme.md similarity index 100% rename from maxsubst/readme.md rename to ldig/maxsubst/readme.md diff --git a/ldig/maxsubst/sais.hxx b/ldig/maxsubst/sais.hxx new file mode 100644 index 0000000..20e69df --- /dev/null +++ b/ldig/maxsubst/sais.hxx @@ -0,0 +1,364 @@ +/* + * sais.hxx for sais-lite + * Copyright (c) 2008-2009 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _SAIS_HXX +#define _SAIS_HXX 1 +#ifdef __cplusplus + +#ifdef __INTEL_COMPILER +#pragma warning(disable : 383 981 1418) +// for icc 64-bit +//#define __builtin_vsnprintf(a, b, c, d) __builtin_vsnprintf(a, b, c, (char *)d) +#endif + +#include +#ifdef _OPENMP +# include +#endif + +namespace saisxx_private { + +/* find the start or end of each bucket */ +template +void +getCounts(const string_type T, bucket_type C, index_type n, index_type k) { +#ifdef _OPENMP + bucket_type D; + index_type i, j, p, sum, first, last; + int thnum, maxthreads = omp_get_max_threads(); +#pragma omp parallel default(shared) private(D, i, thnum, first, last) + { + thnum = omp_get_thread_num(); + D = C + thnum * k; + first = n / maxthreads * thnum; + last = (thnum < (maxthreads - 1)) ? n / maxthreads * (thnum + 1) : n; + for(i = 0; i < k; ++i) { D[i] = 0; } + for(i = first; i < last; ++i) { ++D[T[i]]; } + } + if(1 < maxthreads) { +#pragma omp parallel for default(shared) private(i, j, p, sum) + for(i = 0; i < k; ++i) { + for(j = 1, p = i + k, sum = C[i]; j < maxthreads; ++j, p += k) { + sum += C[p]; + } + C[i] = sum; + } + } +#else + index_type i; + for(i = 0; i < k; ++i) { C[i] = 0; } + for(i = 0; i < n; ++i) { ++C[T[i]]; } +#endif +} +template +void +getBuckets(const bucket_type C, bucket_type B, index_type k, bool end) { + index_type i, sum = 0; + if(end) { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } } + else { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } } +} + +/* compute SA and BWT */ +template +void +induceSA(string_type T, sarray_type SA, bucket_type C, bucket_type B, + index_type n, index_type k) { +typedef typename std::iterator_traits::value_type char_type; + sarray_type b; + index_type i, j; + char_type c0, c1; + /* compute SAl */ + if(C == B) { getCounts(T, C, n, k); } + getBuckets(C, B, k, false); /* find starts of buckets */ + b = SA + B[c1 = T[j = n - 1]]; + *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + for(i = 0; i < n; ++i) { + j = SA[i], SA[i] = ~j; + if(0 < j) { + if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; } + *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + } + } + /* compute SAs */ + if(C == B) { getCounts(T, C, n, k); } + getBuckets(C, B, k, true); /* find ends of buckets */ + for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) { + if(0 < (j = SA[i])) { + if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; } + *--b = ((j == 0) || (T[j - 1] > c1)) ? ~j : j; + } else { + SA[i] = ~j; + } + } +} +template +int +computeBWT(string_type T, sarray_type SA, bucket_type C, bucket_type B, + index_type n, index_type k) { +typedef typename std::iterator_traits::value_type char_type; + sarray_type b; + index_type i, j, pidx = -1; + char_type c0, c1; + /* compute SAl */ + if(C == B) { getCounts(T, C, n, k); } + getBuckets(C, B, k, false); /* find starts of buckets */ + b = SA + B[c1 = T[j = n - 1]]; + *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + for(i = 0; i < n; ++i) { + if(0 < (j = SA[i])) { + SA[i] = ~(c0 = T[--j]); + if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; } + *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + } else if(j != 0) { + SA[i] = ~j; + } + } + /* compute SAs */ + if(C == B) { getCounts(T, C, n, k); } + getBuckets(C, B, k, true); /* find ends of buckets */ + for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) { + if(0 < (j = SA[i])) { + SA[i] = (c0 = T[--j]); + if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; } + *--b = ((0 < j) && (T[j - 1] > c1)) ? ~((index_type)T[j - 1]) : j; + } else if(j != 0) { + SA[i] = ~j; + } else { + pidx = i; + } + } + return pidx; +} + +/* find the suffix array SA of T[0..n-1] in {0..k}^n + use a working space (excluding s and SA) of at most 2n+O(1) for a constant alphabet */ +template +int +suffixsort(string_type T, sarray_type SA, + index_type fs, index_type n, index_type k, + bool isbwt) { +typedef typename std::iterator_traits::value_type char_type; + sarray_type RA; + index_type i, j, m, p, q, plen, qlen, name, pidx = 0; + bool diff; + int c; +#ifdef _OPENMP + int maxthreads = omp_get_max_threads(); +#else +# define maxthreads 1 +#endif + char_type c0, c1; + + /* stage 1: reduce the problem by at least 1/2 + sort all the S-substrings */ + if(fs < (maxthreads * k)) { + index_type *C, *B; + if((C = new index_type[maxthreads * k]) == 0) { return -2; } + B = (1 < maxthreads) ? C + k : C; + getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = 0; i < n; ++i) { SA[i] = 0; } + for(i = n - 2, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) { + if((c0 = T[i]) < (c1 + c)) { c = 1; } + else if(c != 0) { SA[--B[c1]] = i + 1, c = 0; } + } + induceSA(T, SA, C, B, n, k); + delete [] C; + } else { + sarray_type C, B; + C = SA + n; + B = ((1 < maxthreads) || (k <= (fs - k))) ? C + k : C; + getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = 0; i < n; ++i) { SA[i] = 0; } + for(i = n - 2, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) { + if((c0 = T[i]) < (c1 + c)) { c = 1; } + else if(c != 0) { SA[--B[c1]] = i + 1, c = 0; } + } + induceSA(T, SA, C, B, n, k); + } + + /* compact all the sorted substrings into the first m items of SA + 2*m must be not larger than n (proveable) */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i, j, p, c0, c1) + for(i = 0; i < n; ++i) { + p = SA[i]; + if((0 < p) && (T[p - 1] > (c0 = T[p]))) { + for(j = p + 1; (j < n) && (c0 == (c1 = T[j])); ++j) { } + if((j < n) && (c0 < c1)) { SA[i] = ~p; } + } + } + for(i = 0, m = 0; i < n; ++i) { if((p = SA[i]) < 0) { SA[m++] = ~p; } } +#else + for(i = 0, m = 0; i < n; ++i) { + p = SA[i]; + if((0 < p) && (T[p - 1] > (c0 = T[p]))) { + for(j = p + 1; (j < n) && (c0 == (c1 = T[j])); ++j) { } + if((j < n) && (c0 < c1)) { SA[m++] = p; } + } + } +#endif + j = m + (n >> 1); +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = m; i < j; ++i) { SA[i] = 0; } /* init the name array buffer */ + /* store the length of all substrings */ + for(i = n - 2, j = n, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) { + if((c0 = T[i]) < (c1 + c)) { c = 1; } + else if(c != 0) { SA[m + ((i + 1) >> 1)] = j - i - 1; j = i + 1; c = 0; } + } + /* find the lexicographic names of all substrings */ + for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) { + p = SA[i], plen = SA[m + (p >> 1)], diff = true; + if(plen == qlen) { + for(j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { } + if(j == plen) { diff = false; } + } + if(diff != false) { ++name, q = p, qlen = plen; } + SA[m + (p >> 1)] = name; + } + + /* stage 2: solve the reduced problem + recurse if names are not yet unique */ + if(name < m) { + RA = SA + n + fs - m; + for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) { + if(SA[i] != 0) { RA[j--] = SA[i] - 1; } + } + if(suffixsort(RA, SA, fs + n - m * 2, m, name, false) != 0) { return -2; } + for(i = n - 2, j = m - 1, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) { + if((c0 = T[i]) < (c1 + c)) { c = 1; } + else if(c != 0) { RA[j--] = i + 1, c = 0; } /* get p1 */ + } +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; } /* get index in s */ + } + + /* stage 3: induce the result for the original problem */ + if(fs < (maxthreads * k)) { + index_type *B, *C; + if((C = new index_type[maxthreads * k]) == 0) { return -2; } + B = (1 < maxthreads) ? C + k : C; + /* put all left-most S characters into their buckets */ + getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */ + for(i = m - 1; 0 <= i; --i) { + j = SA[i], SA[i] = 0; + SA[--B[T[j]]] = j; + } + if(isbwt == false) { induceSA(T, SA, C, B, n, k); } + else { pidx = computeBWT(T, SA, C, B, n, k); } + delete [] C; + } else { + sarray_type C, B; + C = SA + n; + B = ((1 < maxthreads) || (k <= (fs - k))) ? C + k : C; + /* put all left-most S characters into their buckets */ + getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */ + for(i = m - 1; 0 <= i; --i) { + j = SA[i], SA[i] = 0; + SA[--B[T[j]]] = j; + } + if(isbwt == false) { induceSA(T, SA, C, B, n, k); } + else { pidx = computeBWT(T, SA, C, B, n, k); } + } + + return pidx; +#ifndef _OPENMP +# undef maxthreads +#endif +} + +} /* namespace saisxx_private */ + + +/** + * @brief Constructs the suffix array of a given string in linear time. + * @param T[0..n-1] The input string. (random access iterator) + * @param SA[0..n-1] The output array of suffixes. (random access iterator) + * @param n The length of the given string. + * @param k The alphabet size. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +template +int +saisxx(string_type T, sarray_type SA, index_type n, index_type k = 256) { + int err; + if((n < 0) || (k <= 0)) { return -1; } + if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } + try { err = saisxx_private::suffixsort(T, SA, 0, n, k, false); } + catch(...) { err = -2; } + return err; +} + +/** + * @brief Constructs the burrows-wheeler transformed string of a given string in linear time. + * @param T[0..n-1] The input string. (random access iterator) + * @param U[0..n-1] The output string. (random access iterator) + * @param A[0..n-1] The temporary array. (random access iterator) + * @param n The length of the given string. + * @param k The alphabet size. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +template +index_type +saisxx_bwt(string_type T, string_type U, sarray_type A, index_type n, index_type k = 256) { +typedef typename std::iterator_traits::value_type char_type; + index_type i, pidx; + if((n < 0) || (k <= 0)) { return -1; } + if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + try { + pidx = saisxx_private::suffixsort(T, A, 0, n, k, true); + if(0 <= pidx) { + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (char_type)A[i]; } + for(i += 1; i < n; ++i) { U[i] = (char_type)A[i]; } + pidx += 1; + } + } catch(...) { pidx = -2; } + return pidx; +} + + +#endif /* __cplusplus */ +#endif /* _SAIS_HXX */ diff --git a/ldig/models/ldig.model.small.tar.gz b/ldig/models/ldig.model.small.tar.gz new file mode 100644 index 0000000..7023834 Binary files /dev/null and b/ldig/models/ldig.model.small.tar.gz differ diff --git a/models/model.latin.20120315.tar.xz b/ldig/models/model.latin.20120209.tar.gz similarity index 63% rename from models/model.latin.20120315.tar.xz rename to ldig/models/model.latin.20120209.tar.gz index 3530dd1..d645ccd 100644 Binary files a/models/model.latin.20120315.tar.xz and b/ldig/models/model.latin.20120209.tar.gz differ diff --git a/ldig/models/model.latin.20120315.tar.gz b/ldig/models/model.latin.20120315.tar.gz new file mode 100644 index 0000000..451fe28 Binary files /dev/null and b/ldig/models/model.latin.20120315.tar.gz differ diff --git a/server.py b/ldig/server.py similarity index 100% rename from server.py rename to ldig/server.py diff --git a/static/index.html b/ldig/static/index.html similarity index 100% rename from static/index.html rename to ldig/static/index.html diff --git a/static/info.html b/ldig/static/info.html similarity index 100% rename from static/info.html rename to ldig/static/info.html diff --git a/static/jquery-1.7.1.min.js b/ldig/static/jquery-1.7.1.min.js similarity index 100% rename from static/jquery-1.7.1.min.js rename to ldig/static/jquery-1.7.1.min.js diff --git a/test_da.py b/ldig/test_da.py similarity index 100% rename from test_da.py rename to ldig/test_da.py diff --git a/testcase.py b/ldig/testcase.py similarity index 100% rename from testcase.py rename to ldig/testcase.py diff --git a/models/ldig.model.small.tgz b/models/ldig.model.small.tgz deleted file mode 100644 index 04f4a45..0000000 Binary files a/models/ldig.model.small.tgz and /dev/null differ diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6bc759c --- /dev/null +++ b/setup.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python2 + +from setuptools import setup + +setup( + name='ldig', + version='0.1.0', + url='https://github.com/shuyo/ldig', + license='MIT', + author='(c)2011-2012 Nakatani Shuyo / Cybozu Labs Inc. All rights reserved.', + description='This is a prototype of language detection for short message' + ' service (twitter). With 99.1% accuracy for 17 languages', + packages=['ldig'], + include_package_data=True, + zip_safe=False, + platforms='any', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Environment :: Web Environment', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Information Analysis', + 'Topic :: Software Development', + 'Topic :: Software Development :: Internationalization', + 'Topic :: Software Development :: Libraries', + ], +)