diff --git a/LICENSE b/LICENSE index 6caac54e16e..149d71b25b7 100644 --- a/LICENSE +++ b/LICENSE @@ -22,7 +22,7 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -Catch.hpp is licensed under the Boost Software License. +src/unittest/catch.hpp is licensed under the Boost Software License. Boost Software License - Version 1.0 - August 17th, 2003 @@ -47,3 +47,11 @@ SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +src/stream/fdstream.hpp is licensed under the following license: + +(C) Copyright Nicolai M. Josuttis 2001. +Permission to copy, use, modify, sell and distribute this software +is granted provided this copyright notice appears in all copies. +This software is provided "as is" without express or implied +warranty, and with no claim as to its suitability for any purpose. diff --git a/src/stream/fd_streams.cpp b/src/stream/fd_streams.cpp deleted file mode 100644 index 0b9f79fa024..00000000000 --- a/src/stream/fd_streams.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/** - * \file fd_streams.cpp - * Implementations for file descriptor streams. - */ - -#include "fd_streams.hpp" - -#include - -namespace vg { - -namespace stream { - -using namespace std; - -fdstreambuf::fdstreambuf(int fd) : fd(fd) { - // Nothing to do! - // Leave all the buffers empty/nonexistent -} - -auto fdstreambuf::overflow(int c) -> int { - - if (c != traits_type::eof()) { - // It's not EOF, so write the character - char byte = (char) c; - // Do the write and return EOF if it failed. - return (write(fd, &byte, 1) == 1) ? c : traits_type::eof(); - } else { - // We got passed EOF. Just EOF right back. - return c; - } - -} - -auto fdstreambuf::underflow() -> int { - char byte; - // Read a byte, and return it if successful or EOF if failed. - return (read(fd, &byte, 1) == 1) ? (int) byte : traits_type::eof(); -} - -fdistream::fdistream(int fd) : istream(nullptr), backend(fd) { - // Now that buf is constructed, associate ourselves with it - rdbuf(&backend); -} - -fdostream::fdostream(int fd) : ostream(nullptr), backend(fd) { - // Now that buf is constructed, associate ourselves with it - rdbuf(&backend); -} - -} - -} diff --git a/src/stream/fd_streams.hpp b/src/stream/fd_streams.hpp deleted file mode 100644 index 09476f28f9d..00000000000 --- a/src/stream/fd_streams.hpp +++ /dev/null @@ -1,112 +0,0 @@ -#ifndef VG_STREAM_FD_STREAMS_HPP_INCLUDED -#define VG_STREAM_FD_STREAMS_HPP_INCLUDED - -/** - * \file fd_streams.hpp - * Contains istream and ostream implementations that operate on file descriptors. - * Together with POSIX pipe(), this provides an easy way of talking to yourself over streams. - * This functionality really should be part of C++, but it isn't. See: https://stackoverflow.com/q/2746168 - */ - -#include -#include - -namespace vg { - -namespace stream { - -using namespace std; - -/** - * Streambuf implementation that reads from/writes to a file descriptor. The - * base streambuf class can use an internal buffer ("controlled" sequence), and - * can handle the reading/writing from that. It can also have an internal - * buffer where all the pointers are null, and all reads/writes immediately - * underflow/overflow. There still *is* a controlled sequence, it just - * immediately commits to the associated sequence. - * - * We are only responsible for overriding some virtual functions that fill - * in/clear out the buffer from/to the backing data source ("associated" - * sequence). - * - * See: https://en.cppreference.com/w/cpp/io/basic_streambuf and - * http://www.cplusplus.com/reference/streambuf/streambuf/ - * - * We MUST implement: - * - overflow() to set the current controlled output sequence character, and - * optionally provide some more write buffer space in the controlled output - * sequence and commit what was there. - * - underflow() to return the current controlled input sequence character, and - * optionally buffer some more in the controlled input sequence and rewrite - * pointers. - * - sync() to commit everything written to the controlled output sequence to - * the associated output sequence, IF we don't do that automatically (such as - * by having an always-empty buffer. - * - * We SHOULD implement: - * - xsputn() to put a bunch of characters into the controlled output sequence - * at once, possibly sending them to the associated output sequence. - * - xsgetn() to get a bunch of characters fromn the controlled input sequence - * at once, reading from the associated input sequence if necessary. - * - pbackfail() to back up the controlled input sequence by one character, - * when we are out of cached input sequnce in the buffer, if possible. - * Necessary for peek to work. - * - * Our current strategy is to not buffer or do putback at all, and just work on - * individual characters immediately read/written. The next TODO is batch - * reads/writes. - */ -class fdstreambuf : public streambuf { -public: - /** - * Make a new fdstreambuf wrapping the given file descriptor. Undefined - * behavior will happen if the FD is closed while the streambuf is alive, - * or if we try to read/write when the file descriptor doesn't support that - * direction of IO. - */ - fdstreambuf(int fd); - -protected: - - /// Put the given character in the controlled sequence, if it is not EOF. - /// Returns something other than EOF on success and EOF on failure. - int overflow(int c); - - /// Get a character and return it, or EOF on failure. - int underflow(); - - /// Stores the actual backing file descriptor - int fd; -}; - -// Now we define istream and ostream implementations that create, own, and associate an fdstreambuf. - -/** - * C++ istream that reads from a file descriptor. - */ -class fdistream : public istream { -public: - /// Wrap a file descriptor in an istream. - fdistream(int fd); -protected: - /// The streambuf implementation we use as a backend. - fdstreambuf backend; -}; - -/** - * C++ ostream that writes to a file descriptor. - */ -class fdostream : public ostream { -public: - /// Wrap a file descriptor in an ostream. - fdostream(int fd); -protected: - /// The streambuf implementation we use as a backend. - fdstreambuf backend; -}; - -} - -} - -#endif diff --git a/src/stream/fdstream.hpp b/src/stream/fdstream.hpp new file mode 100644 index 00000000000..72e65428b89 --- /dev/null +++ b/src/stream/fdstream.hpp @@ -0,0 +1,188 @@ +/* The following code declares classes to read from and write to + * file descriptore or file handles. + * + * See + * http://www.josuttis.com/cppcode + * for details and the latest version. + * + * - open: + * - integrating BUFSIZ on some systems? + * - optimized reading of multiple characters + * - stream for reading AND writing + * - i18n + * + * (C) Copyright Nicolai M. Josuttis 2001. + * Permission to copy, use, modify, sell and distribute this software + * is granted provided this copyright notice appears in all copies. + * This software is provided "as is" without express or implied + * warranty, and with no claim as to its suitability for any purpose. + * + * Version: Jul 28, 2002 + * History: + * Jan 29, 2019: namespace for vg project + * Jul 28, 2002: bugfix memcpy() => memmove() + * fdinbuf::underflow(): cast for return statements + * Aug 05, 2001: first public version + */ +#ifndef VG_STREAM_FDSTREAM_HPP_INCLUDED +#define VG_STREAM_FDSTREAM_HPP_INCLUDED + +#include +#include +#include +// for EOF: +#include +// for memmove(): +#include + + +// low-level read and write functions +#ifdef _MSC_VER +# include +#else +# include +//extern "C" { +// int write (int fd, const char* buf, int num); +// int read (int fd, char* buf, int num); +//} +#endif + + +namespace vg { + +namespace stream { + + +/************************************************************ + * fdostream + * - a stream that writes on a file descriptor + ************************************************************/ + + +class fdoutbuf : public std::streambuf { + protected: + int fd; // file descriptor + public: + // constructor + fdoutbuf (int _fd) : fd(_fd) { + } + protected: + // write one character + virtual int_type overflow (int_type c) { + if (c != EOF) { + char z = c; + if (write (fd, &z, 1) != 1) { + return EOF; + } + } + return c; + } + // write multiple characters + virtual + std::streamsize xsputn (const char* s, + std::streamsize num) { + return write(fd,s,num); + } +}; + +class fdostream : public std::ostream { + protected: + fdoutbuf buf; + public: + fdostream (int fd) : std::ostream(0), buf(fd) { + rdbuf(&buf); + } +}; + + +/************************************************************ + * fdistream + * - a stream that reads on a file descriptor + ************************************************************/ + +class fdinbuf : public std::streambuf { + protected: + int fd; // file descriptor + protected: + /* data buffer: + * - at most, pbSize characters in putback area plus + * - at most, bufSize characters in ordinary read buffer + */ + static const int pbSize = 4; // size of putback area + static const int bufSize = 1024; // size of the data buffer + char buffer[bufSize+pbSize]; // data buffer + + public: + /* constructor + * - initialize file descriptor + * - initialize empty data buffer + * - no putback area + * => force underflow() + */ + fdinbuf (int _fd) : fd(_fd) { + setg (buffer+pbSize, // beginning of putback area + buffer+pbSize, // read position + buffer+pbSize); // end position + } + + protected: + // insert new characters into the buffer + virtual int_type underflow () { +#ifndef _MSC_VER + using std::memmove; +#endif + + // is read position before end of buffer? + if (gptr() < egptr()) { + return traits_type::to_int_type(*gptr()); + } + + /* process size of putback area + * - use number of characters read + * - but at most size of putback area + */ + int numPutback; + numPutback = gptr() - eback(); + if (numPutback > pbSize) { + numPutback = pbSize; + } + + /* copy up to pbSize characters previously read into + * the putback area + */ + memmove (buffer+(pbSize-numPutback), gptr()-numPutback, + numPutback); + + // read at most bufSize new characters + int num; + num = read (fd, buffer+pbSize, bufSize); + if (num <= 0) { + // ERROR or EOF + return EOF; + } + + // reset buffer pointers + setg (buffer+(pbSize-numPutback), // beginning of putback area + buffer+pbSize, // read position + buffer+pbSize+num); // end of buffer + + // return next character + return traits_type::to_int_type(*gptr()); + } +}; + +class fdistream : public std::istream { + protected: + fdinbuf buf; + public: + fdistream (int fd) : std::istream(0), buf(fd) { + rdbuf(&buf); + } +}; + + +} + +} + +#endif diff --git a/src/stream/register_loader_saver_gcsa.cpp b/src/stream/register_loader_saver_gcsa.cpp index fb03d8c6744..faacaadd778 100644 --- a/src/stream/register_loader_saver_gcsa.cpp +++ b/src/stream/register_loader_saver_gcsa.cpp @@ -26,7 +26,8 @@ void register_loader_saver_gcsa() { return (void*) index; }), wrap_stream_saver([](const void* index_void, ostream& output) { // Cast to GCSA and serialize to the stream. - sdsl::serialize(*(const gcsa::GCSA*) index_void, output); + assert(index_void != nullptr); + ((const gcsa::GCSA*) index_void)->serialize(output); })); } diff --git a/src/stream/register_loader_saver_lcp.cpp b/src/stream/register_loader_saver_lcp.cpp index c233e4635fe..d6d74ae78be 100644 --- a/src/stream/register_loader_saver_lcp.cpp +++ b/src/stream/register_loader_saver_lcp.cpp @@ -26,7 +26,7 @@ void register_loader_saver_lcp() { return (void*) index; }), wrap_stream_saver([](const void* index_void, ostream& output) { // Cast to LCP and serialize to the stream. - sdsl::serialize(*(const gcsa::LCPArray*) index_void, output); + ((const gcsa::LCPArray*) index_void)->serialize(output); })); } diff --git a/src/stream/registry.cpp b/src/stream/registry.cpp index 1b5285ebd20..f6658f02f42 100644 --- a/src/stream/registry.cpp +++ b/src/stream/registry.cpp @@ -4,7 +4,7 @@ */ #include "registry.hpp" -#include "fd_streams.hpp" +#include "fdstream.hpp" #include "register_loader_saver_gcsa.hpp" #include "register_loader_saver_lcp.cpp" @@ -139,7 +139,9 @@ auto wrap_stream_loader(function istream_loader) -> load_functi auto wrap_stream_saver(function ostream_saver) -> save_function_t { // Capture the ostream-using function by value return [ostream_saver](const void* to_save, const message_consumer_function_t& emit_message) { - + + assert(to_save != nullptr); + // Open a pipe with an istream and an ostream. // First we make an array to hold input and output ends of a pipe. diff --git a/src/stream/vpkg.hpp b/src/stream/vpkg.hpp index 030953b2cb3..4fdefcf405c 100644 --- a/src/stream/vpkg.hpp +++ b/src/stream/vpkg.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace vg { @@ -43,8 +43,8 @@ class VPKG { // Make an iterator MessageIterator it(in); - // Make a destination tuple - tuple...> to_return; + // Create a collection of null void*s that will hold the allocated objects we want to load when we think we can load them. + deque to_fill { (void*)(Wanted*)nullptr... }; bool keep_going = false; @@ -52,25 +52,24 @@ class VPKG { // We exploit initializer list evaluation order to be able to tell // individual calls resulting from a ... variadic template argument - // expansion what number they are, so they can index into a tuple. - // See https://stackoverflow.com/a/21194071 + // expansion what number they are, so they can index into a data + // structure. See https://stackoverflow.com/a/21194071 - size_t tuple_index = 0; + size_t index = 0; // Call the load function for each type, and get the statuses - vector load_statuses = {load_into_one(it, tuple_index++, to_return)...}; + vector load_statuses { load_into_one(it, index++, to_fill)... }; for (bool status : load_statuses) { // OR together all the statuses so we know if we need to continue for anything. keep_going |= status; } - - } while (keep_going); - // Now all the unique_ptrs that can be filled in are filled in - return to_return; + // Now all the unique_ptrs that can be filled in are filled in. + // Convert to a tuple and return. + return to_tuple(to_fill); } /** @@ -98,18 +97,41 @@ class VPKG { private: /** - * If the one item of type One at index i in the destination tuple can be - * filled from the given MessageIterator, and is empty, fill it. + * Given a collection of void pointers, give ownership of the objects they point to, if any, to unique_ptrs in a tuple. + */ + template + static tuple...> to_tuple(deque items) { + // Use initializer list expansion to repeatedly pop the first thing off the collection and type it correctly. + tuple...> to_return { extract_first(items)... }; + return to_return; + } + + /** + * Pop off the first item in the given collection and wrap it in a typed unique_ptr. + */ + template + static unique_ptr extract_first(deque& pointers) { + // Grab off the first thing + void* got = pointers.front(); + pointers.pop_front(); + // Wrap it in a properly typed unique_ptr; + return unique_ptr((Pointed*) got); + } + + /** + * If the null slot at index i in the given collection of void*s can be + * filled with an object of type One from the given MessageIterator, fill + * it. * * Returns false if it can't be filled, or is already filled, or the * iterator is over. */ - template - static bool load_into_one(MessageIterator& it, size_t i, tuple...>& dest) { - // Find the pointer to load - unique_ptr& ptr = get(dest); + template + static bool load_into_one(MessageIterator& it, size_t i, deque& dest) { + // Find the slot to load into + void*& slot = dest[i]; - if (ptr.get() != nullptr) { + if (slot != nullptr) { // If it's already loaded, we're done return false; } @@ -131,13 +153,13 @@ class VPKG { } // Otherwise we can load, so do it. - ptr = unique_ptr((*loader)([&](const message_consumer_function_t& handle_message) { + slot = (*loader)([&](const message_consumer_function_t& handle_message) { while (it.has_next() && (*it).first == tag_to_load) { // Feed in messages from the file until we run out or the tag changes handle_message((*it).second); ++it; } - })); + }); // Now there's nothing left to load return false; diff --git a/src/unittest/vpkg.cpp b/src/unittest/vpkg.cpp index f4f34fa9f87..fc2f50de3fb 100644 --- a/src/unittest/vpkg.cpp +++ b/src/unittest/vpkg.cpp @@ -21,6 +21,10 @@ TEST_CASE("We can serialize and re-read an empty GCSA", "[vpkg][gcsa]") { gcsa::GCSA empty_index; + // Make sure we can save the empty index to a stream at all. + stringstream teststream; + empty_index.serialize(teststream); + stringstream ss; stream::VPKG::save(empty_index, ss);