diff --git a/.gitignore b/.gitignore index a0be001..169b41d 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ build/ *.os *.a *.so +pydoc/_build diff --git a/README.md b/README.md index 87c2f6d..e8adff3 100644 --- a/README.md +++ b/README.md @@ -68,10 +68,7 @@ After building the executables, you can run two simple test runs as follows: To build the Python extension, run - python setup.py build - sudo python setup.py install - -(this is currently broken) + pip install . # Documentation / Examples @@ -165,9 +162,10 @@ storage format. # Python API -The `clstm.i` file implements a simple Python interface to clstm, plus -a wrapper that makes an INetwork mostly a replacement for the lstm.py -implementation from ocropy. +The source code includes a Python interface to clstm (via Cython). Currently +it only exposes the `CLSTMOCR` class for OCR training and prediction. +To install it, just make sure you have the above dependencies and +Cython (>=0.23) installed and run `pip install .`. # Comand Line Drivers diff --git a/_clstm.pxd b/_clstm.pxd new file mode 100644 index 0000000..9dae05c --- /dev/null +++ b/_clstm.pxd @@ -0,0 +1,81 @@ +from cpython.ref cimport PyObject +from libc.stddef cimport wchar_t +from libcpp.vector cimport vector +from libcpp.string cimport string +from libcpp.memory cimport shared_ptr + + +cdef extern from "" namespace "std": + cppclass wstring: + cppclass iterator: + iterator() + wchar_t* operator*() + iterator(iterator &) + iterator operator++() + iterator operator--() + iterator operator==(iterator) + iterator operator!=(iterator) + iterator begin() + iterator end() + + +cdef extern from "pyextra_defs.h": + cdef Py_ssize_t Unicode_AsWideChar(PyObject* ustr, Py_ssize_t length, + wchar_t* wchars) + + +cdef extern from "pstring.h": + wstring utf8_to_utf32(string s) + + +cdef extern from "clstm.h": + cdef double levenshtein[A, B](A a, B b) + + +cdef extern from "clstm.h" namespace "ocropus": + cdef cppclass Assoc: + Assoc() + Assoc(string &s) + bint contains(string &key, bint parent = true) + string get(string &key) + string get(string &key, string default) + void set(string &key, string value) + + cdef cppclass INetwork: + Assoc attr + + ctypedef shared_ptr[INetwork] Network + + +cdef extern from "tensor.h" namespace "ocropus": + cppclass TensorMap2: + pass + + cdef cppclass Tensor2: + int dims[2] + float *ptr + void resize(int i, int j) + void put(float val, int i, int j) + float get(int i, int j) + TensorMap2 map() + +cdef extern from "clstmhl.h" namespace "ocropus": + struct CharPrediction: + int i + int x + wchar_t c + float p + + # NOTE: The content of `codec` should be the utf-32 characters that the + # network is supposed to learn, encoded as integers + cppclass CLSTMOCR: + int target_height + Network net + bint maybe_load(string &fname) + bint maybe_save(string &fname) + void createBidi(vector[int] codec, int nhidden) + void setLearningRate(float learning_rate, float momentum) + string train_utf8(TensorMap2 imgdata, string &target) + string predict_utf8(TensorMap2 imgdata) + void predict(vector[CharPrediction] &preds, TensorMap2 imgdata) + string aligned_utf8() diff --git a/clstm.i b/clstm.i deleted file mode 100644 index 9fb45ce..0000000 --- a/clstm.i +++ /dev/null @@ -1,304 +0,0 @@ -// -*- C++ -*- - -%{ -#pragma GCC diagnostic ignored "-Wstrict-aliasing" -#pragma GCC diagnostic ignored "-Wuninitialized" -#pragma GCC diagnostic ignored "-Wunused-but-set-variable" -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -%} - -%module(docstring="C-version of the ocropy LSTM implementation") clstm; -%feature("autodoc",1); -%include "typemaps.i" -%include "std_string.i" -%include "std_wstring.i" -%include "std_shared_ptr.i" -%include "std_vector.i" -%shared_ptr(ITrainable) -%shared_ptr(INetwork) -#ifdef SWIGPYTHON -%include "cstring.i" -#endif - -%{ -#include -#include -#include "clstm.h" -#include "clstm_compute.h" -using namespace ocropus; -using namespace std; -%} - -typedef float Float; -using std::string; - -#ifdef SWIGPYTHON -%exception { - try { - $action - } - catch(const char *s) { - PyErr_SetString(PyExc_IndexError,s); - return NULL; - } - catch(...) { - PyErr_SetString(PyExc_IndexError,"unknown exception in iulib"); - return NULL; - } -} -#endif - -%{ -#include "numpy/arrayobject.h" -%} - -%init %{ -import_array(); -%} - -/* create simple interface definitions for the built-in Sequence types */ - -struct Classes { - Classes(); - ~Classes(); - %rename(__getitem__) operator[]; - int operator[](int i); - int size(); - void resize(int); -}; -%extend Classes { - void __setitem__(int i,int value) { - (*$self)[i] = value; - } -} - -struct Batch { - void resize(int,int); - void setZero(int,int); - int rows(); - int cols(); - float &v(int,int); - float &d(int,int); -}; - -struct Params { - void resize(int,int); - void setZero(int,int); - int rows(); - int cols(); - float &v(int,int); - float &d(int,int); -}; - - -struct Sequence { - Sequence(); - ~Sequence(); - int size(); - int rows(); - int cols(); - %rename(__getitem__) operator[]; - Batch &operator[](int i); -}; - -struct Assoc { - string get(string key); - string get(string key, string dflt); - void set(string key, string dflt); -}; - -struct Codec { - std::vector codec; - int size() { return codec.size(); } - void set(const vector &data); - wchar_t decode(int cls); - std::wstring decode(Classes &cs); - void encode(Classes &cs, const std::wstring &s); -private: - void operator=(const Codec &); -}; - -struct INetwork; -typedef std::shared_ptr Network; -%template(vectornet) std::vector >; - -struct INetwork { - string kind; - Assoc attr; - virtual void setLearningRate(Float lr, Float momentum) = 0; - virtual void forward() = 0; - virtual void backward() = 0; - virtual void initialize(); - virtual ~INetwork(); - Sequence inputs; - Sequence outputs; - std::vector > sub; - Codec codec; - Codec icodec; - virtual int ninput(); - virtual int noutput(); - virtual void add(std::shared_ptr net); -}; - -void sgd_update(Network net); -void set_inputs(Network net, Sequence &inputs); -void set_targets(Network net, Sequence &targets); -void set_classes(Network net, Classes &classes); -void mktargets(Sequence &seq, Classes &targets, int ndim); - -std::shared_ptr make_layer(string); -std::shared_ptr make_net_init(string,string); - -#if 0 -%rename(seq_forward) forward_algorithm; -void forward_algorithm(Mat &lr,Mat &lmatch,double skip=-5.0); -%rename(seq_forwardbackward) forwardbackward; -void forwardbackward(Mat &both,Mat &lmatch); -#endif - -%rename(seq_ctc_align) ctc_align_targets; -void ctc_align_targets(Sequence &posteriors,Sequence &outputs,Sequence &targets); -void mktargets(Sequence &seq, Classes &targets, int ndim); - -void save_net(const string &file, Network net); -Network load_net(const string &file); - -%rename(network_info) network_info_as_strings; - -%inline %{ -int string_edit_distance(string a, string b) { - return levenshtein(a, b); -} - -string network_info_as_string(Network net) { - string result = ""; - walk_networks(net, [&result] (string s, INetwork *net) { - double lr = net->attr.get("learning_rate","-1"); - double momentum = net->attr.get("momentum","-1"); - result += s + ": " + to_string(lr); - result += string(" ") + to_string(momentum); - result += string(" ") + to_string(net->ninput()); - result += string(" ") + to_string(net->noutput()); - result += "\n"; - }); - return result; -} - -string sequence_info(Sequence &seq) { - string result = ""; - result += to_string(seq.size()); - result += string(":") + (seq.size()>0?to_string(seq[0].rows()):"*"); - result += string(":") + (seq.size()>0?to_string(seq[0].cols()):"*"); -#if 0 - // FIXME - double lo = 1e99, hi = -1e99; - for (int t=0;t