Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement and test file seek, extract common "read_exactly" for reuse #368

Merged
merged 2 commits into from
Dec 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 2 additions & 25 deletions include/osmium/io/detail/pbf_input_format.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,37 +114,14 @@ namespace osmium {
return size;
}

/**
* Read exactly size bytes from fd into buffer.
*
* @pre Value in size parameter must fit in unsigned int
* @returns true if size bytes could be read
* false if EOF was encountered
*/
bool read_exactly(char* buffer, std::size_t size) {
std::size_t to_read = size;

while (to_read > 0) {
auto const read_size = osmium::io::detail::reliable_read(m_fd, buffer + (size - to_read), static_cast<unsigned int>(to_read));
if (read_size == 0) { // EOF
return false;
}
to_read -= read_size;
}

*m_offset_ptr += size;

return true;
}

/**
* Read 4 bytes in network byte order from file. They contain
* the length of the following BlobHeader.
*/
uint32_t read_blob_header_size_from_file() {
if (m_fd != -1) {
std::array<char, sizeof(uint32_t)> buffer{};
if (!read_exactly(buffer.data(), buffer.size())) {
if (!osmium::io::detail::read_exactly(m_fd, buffer.data(), static_cast<unsigned int>(buffer.size()))) {
return 0; // EOF
}
return check_size(get_size_in_network_byte_order(buffer.data()));
Expand Down Expand Up @@ -230,7 +207,7 @@ namespace osmium {
if (m_fd != -1) {
buffer.resize(size);

if (!read_exactly(&*buffer.begin(), size)) {
if (!osmium::io::detail::read_exactly(m_fd, &*buffer.begin(), static_cast<unsigned int>(size))) {
throw osmium::pbf_error{"unexpected EOF"};
}
} else {
Expand Down
24 changes: 24 additions & 0 deletions include/osmium/io/detail/read_write.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ DEALINGS IN THE SOFTWARE.
#include <osmium/io/writer_options.hpp>
#include <osmium/util/file.hpp>

#include <cassert>
#include <cerrno>
#include <cstddef>
#include <fcntl.h>
Expand Down Expand Up @@ -201,6 +202,29 @@ namespace osmium {
return nread;
}

/**
* Read exactly size bytes from fd into buffer. In contrast to reliable_read,
* this function will continue reading until either EOF or an error is encountered.
*
* @pre buffer Buffer for data to be read. Must be at least size bytes long.
* @returns true if size bytes could be read
* false if EOF was encountered
*/
inline bool read_exactly(int fd, char* buffer, unsigned int size) {
unsigned int to_read = size;

while (to_read > 0) {
auto const read_size = reliable_read(fd, buffer + (size - to_read), to_read);
if (read_size == 0) { // EOF
return false;
}
assert(read_size <= to_read);
to_read -= read_size;
}

return true;
}

inline void reliable_fsync(const int fd) {
#ifdef _MSC_VER
osmium::detail::disable_invalid_parameter_handler diph;
Expand Down
16 changes: 16 additions & 0 deletions include/osmium/util/file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,22 @@ namespace osmium {
return static_cast<std::size_t>(offset);
}

/**
* Set current offset into file.
*
* @param fd Open file descriptor.
* @param offset Desired absolute offset into the file
*/
inline void file_seek(int fd, size_t offset) noexcept {
#ifdef _MSC_VER
osmium::detail::disable_invalid_parameter_handler diph;
// https://msdn.microsoft.com/en-us/library/1yee101t.aspx
_lseeki64(fd, static_cast<__int64>(offset), SEEK_SET);
#else
::lseek(fd, offset, SEEK_SET);
#endif
}

/**
* Check whether the file descriptor refers to a TTY.
*
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ add_unit_test(io test_compression_factory)
add_unit_test(io test_file_formats)
add_unit_test(io test_nocompression)
add_unit_test(io test_output_utils)
add_unit_test(io test_file_seek)
add_unit_test(io test_string_table)

add_unit_test(io test_bzip2 ENABLE_IF ${BZIP2_FOUND} LIBS ${BZIP2_LIBRARIES})
Expand Down
61 changes: 61 additions & 0 deletions test/t/io/test_file_seek.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include "catch.hpp"

#include "utils.hpp"

#include <osmium/io/detail/read_write.hpp>
#include <osmium/util/file.hpp>

/**
* Can read and seek around in files.
*/
TEST_CASE("Seek and read in files") {
/* gzipped data contains very few repetitions in the binary file format,
* which makes it easy to identify any problems. */
int fd = osmium::io::detail::open_for_reading(with_data_dir("t/io/data.osm.gz"));
struct seek_expectation {
size_t offset;
unsigned char eight_bytes[8];
};
const seek_expectation expectations[] = {
{ 0x00, {0x1f, 0x8b, 0x08, 0x08, 0x19, 0x4a, 0x18, 0x54} },
{ 0x00, {0x1f, 0x8b, 0x08, 0x08, 0x19, 0x4a, 0x18, 0x54} }, /* repeat / jump back */
{ 0x21, {0x56, 0xc6, 0x18, 0xc3, 0xea, 0x6d, 0x4f, 0xe0} }, /* unaligned */
{ 0xb3, {0xcd, 0x0a, 0xe7, 0x8f, 0xde, 0x00, 0x00, 0x00} }, /* close to end */
{ 0x21, {0x56, 0xc6, 0x18, 0xc3, 0xea, 0x6d, 0x4f, 0xe0} }, /* "long" backward jump */
};
for (const auto& expect : expectations) {
char actual_eight_bytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
osmium::util::file_seek(fd, expect.offset);
bool did_actually_read = osmium::io::detail::read_exactly(fd, &actual_eight_bytes[0], 8);
REQUIRE(did_actually_read);
for (int i = 0; i < 8; ++i) {
REQUIRE(expect.eight_bytes[i] == static_cast<unsigned char>(actual_eight_bytes[i]));
}
}
}

TEST_CASE("Seek close to end of file") {
/* gzipped data contains very few repetitions in the binary file format,
* which makes it easy to identify any problems. */
int fd = osmium::io::detail::open_for_reading(with_data_dir("t/io/data.osm.gz"));
REQUIRE(osmium::util::file_size(with_data_dir("t/io/data.osm.gz")) == 187);
char actual_eight_bytes[8] = {1, 1, 1, 1, 1, 1, 1, 1};
osmium::util::file_seek(fd, 186);
auto actually_read = osmium::io::detail::reliable_read(fd, &actual_eight_bytes[0], 8);
REQUIRE(actually_read == 1);
REQUIRE(actual_eight_bytes[0] == 0);
REQUIRE(actual_eight_bytes[1] == 1);
}

TEST_CASE("Seek to exact end of file") {
/* gzipped data contains very few repetitions in the binary file format,
* which makes it easy to identify any problems. */
int fd = osmium::io::detail::open_for_reading(with_data_dir("t/io/data.osm.gz"));
REQUIRE(osmium::util::file_size(with_data_dir("t/io/data.osm.gz")) == 187);
char actual_eight_bytes[8] = {1, 1, 1, 1, 1, 1, 1, 1};
osmium::util::file_seek(fd, 187);
auto actually_read = osmium::io::detail::reliable_read(fd, &actual_eight_bytes[0], 8);
REQUIRE(actually_read == 0);
REQUIRE(actual_eight_bytes[0] == 1);
REQUIRE(actual_eight_bytes[1] == 1);
}