From ac916fd51e6f371adb1b47438b5aa5d9c0fac81e Mon Sep 17 00:00:00 2001
From: Pablo Martin-Gomez <pablomg@eskapa.be>
Date: Sat, 6 Jun 2020 21:01:35 +0200
Subject: [PATCH] Use a lookup table for decoding

According to "measure_time.cpp" benchmark, decoding is 2.55 (g++ -O3) / 2.65 (g++ -O0) times slower than encoding. One of the bottleneck comes from doing several comparisons and calculating the value for each decoded character. Replacing the comparison + calculation by a lookup table (like in the encoding) improves the decoding speed, making it only 2.22 (g++ -O0) / 1.36 (g++ -O2) times slower than encoding.
---
 base64.cpp | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/base64.cpp b/base64.cpp
index 843719e..98cf83c 100644
--- a/base64.cpp
+++ b/base64.cpp
@@ -41,7 +41,7 @@
  // two sets of base64 characters needs to be chosen.
  // They differ in their last two characters.
  //
-static const char* base64_chars[2] = {
+static const char* to_base64_chars[2] = {
              "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
              "abcdefghijklmnopqrstuvwxyz"
              "0123456789"
@@ -52,17 +52,31 @@ static const char* base64_chars[2] = {
              "0123456789"
              "-_"};
 
+static const unsigned char from_base64_chars[256] = {
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 62, 64, 63,
+    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64,
+    64,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 63,
+    64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+};
+
 static unsigned int pos_of_char(const unsigned char chr) {
  //
  // Return the position of chr within base64_encode()
  //
 
-    if      (chr >= 'A' && chr <= 'Z') return chr - 'A';
-    else if (chr >= 'a' && chr <= 'z') return chr - 'a' + ('Z' - 'A')               + 1;
-    else if (chr >= '0' && chr <= '9') return chr - '0' + ('Z' - 'A') + ('z' - 'a') + 2;
-    else if (chr == '+' || chr == '-') return 62; // Be liberal with input and accept both url ('-') and non-url ('+') base 64 characters (
-    else if (chr == '/' || chr == '_') return 63; // Ditto for '/' and '_'
-    else
+    if (from_base64_chars[chr] != 64) return from_base64_chars[chr];
  //
  // 2020-10-23: Throw std::exception rather than const char*
  //(Pablo Martin-Gomez, https://github.com/Bouska)
@@ -123,7 +137,7 @@ std::string base64_encode(unsigned char const* bytes_to_encode, size_t in_len, b
  // the correct character set is chosen by subscripting
  // base64_chars with url.
  //
-    const char* base64_chars_ = base64_chars[url];
+    const char* base64_chars_ = to_base64_chars[url];
 
     std::string ret;
     ret.reserve(len_encoded);