/*
base64.cpp and base64.h
Copyright (C) 2004-2008 René Nyffenegger
This source code is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this source code must not be misrepresented; you must not
claim that you wrote the original source code. If you use this source code
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original source code.
3. This notice may not be removed or altered from any source distribution.
René Nyffenegger rene.nyffenegger@adp-gmbh.ch
*/
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(unsigned char c) {
return (isalnum(c) || (c == '+') || (c == '/'));
}
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
std::string ret;
int i = 0;
int j = 0;
unsigned char char_array_3[3];
unsigned char char_array_4[4];
while (in_len--) {
char_array_3[i++] = *(bytes_to_encode++);
if (i == 3) {
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for(i = 0; (i <4) ; i++)
ret += base64_chars[char_array_4[i]];
i = 0;
}
}
if (i)
{
for(j = i; j < 3; j++)
char_array_3[j] = '\0';
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (j = 0; (j < i + 1); j++)
ret += base64_chars[char_array_4[j]];
while((i++ < 3))
ret += '=';
}
return ret;
}
std::string base64_decode(std::string const& encoded_string) {
int in_len = encoded_string.size();
int i = 0;
int j = 0;
int in_ = 0;
unsigned char char_array_4[4], char_array_3[3];
std::string ret;
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
char_array_4[i++] = encoded_string[in_]; in_++;
if (i ==4) {
for (i = 0; i <4; i++)
char_array_4[i] = base64_chars.find(char_array_4[i]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++)
ret += char_array_3[i];
i = 0;
}
}
if (i) {
for (j = i; j <4; j++)
char_array_4[j] = 0;
for (j = 0; j <4; j++)
char_array_4[j] = base64_chars.find(char_array_4[j]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
}
return ret;
}
According to this excellent comparison made by GaspardP I would not choose this solution. It's not the worst, but it's not the best either. The only thing it got going for it is that it's possibly easier to understand.
I found the other two answers to be pretty hard to understand. They also produce some warnings in my compiler and the use of a find function in the decode part should result in a pretty bad efficiency. So I decided to roll my own.
It avoids copying the parameters at the expense of a couple of tests.
Uses uint8_t instead of BYTE.
Adds some handy functions for dealing with strings, although usually the input data is binary and may have zero bytes inside, so typically should not be manipulated as a string (which often implies null-terminated data).
Also adds some casts to fix compiler warnings (at least on GCC, I haven't run it through MSVC yet).
Part of file base64.hpp:
void base64_encode(string & out, const vector<uint8_t>& buf);
void base64_encode(string & out, const uint8_t* buf, size_t bufLen);
void base64_encode(string & out, string const& buf);
void base64_decode(vector<uint8_t> & out, string const& encoded_string);
// Use this if you know the output should be a valid string
void base64_decode(string & out, string const& encoded_string);
File base64.cpp:
static const uint8_t from_base64[128] = {
// 8 rows of 16 = 128
// Note: only requires 123 entries, as we only lookup for <= z , which z=122
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 62, 255, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 0, 255, 255, 255,
255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63,
255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255
};
static const char to_base64[65] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
void base64_encode(string & out, string const& buf)
{
if (buf.empty())
base64_encode(out, NULL, 0);
else
base64_encode(out, reinterpret_cast<uint8_t const*>(&buf[0]), buf.size());
}
void base64_encode(string & out, std::vector<uint8_t> const& buf)
{
if (buf.empty())
base64_encode(out, NULL, 0);
else
base64_encode(out, &buf[0], buf.size());
}
void base64_encode(string & ret, uint8_t const* buf, size_t bufLen)
{
// Calculate how many bytes that needs to be added to get a multiple of 3
size_t missing = 0;
size_t ret_size = bufLen;
while ((ret_size % 3) != 0)
{
++ret_size;
++missing;
}
// Expand the return string size to a multiple of 4
ret_size = 4*ret_size/3;
ret.clear();
ret.reserve(ret_size);
for (size_t i = 0; i < ret_size/4; ++i)
{
// Read a group of three bytes (avoid buffer overrun by replacing with 0)
const size_t index = i*3;
const uint8_t b3_0 = (index+0 < bufLen) ? buf[index+0] : 0;
const uint8_t b3_1 = (index+1 < bufLen) ? buf[index+1] : 0;
const uint8_t b3_2 = (index+2 < bufLen) ? buf[index+2] : 0;
// Transform into four base 64 characters
const uint8_t b4_0 = ((b3_0 & 0xfc) >> 2);
const uint8_t b4_1 = ((b3_0 & 0x03) << 4) + ((b3_1 & 0xf0) >> 4);
const uint8_t b4_2 = ((b3_1 & 0x0f) << 2) + ((b3_2 & 0xc0) >> 6);
const uint8_t b4_3 = ((b3_2 & 0x3f) << 0);
// Add the base 64 characters to the return value
ret.push_back(to_base64[b4_0]);
ret.push_back(to_base64[b4_1]);
ret.push_back(to_base64[b4_2]);
ret.push_back(to_base64[b4_3]);
}
// Replace data that is invalid (always as many as there are missing bytes)
for (size_t i = 0; i != missing; ++i)
ret[ret_size - i - 1] = '=';
}
template <class Out>
void base64_decode_any( Out & ret, std::string const& in)
{
typedef typename Out::value_type T;
// Make sure the *intended* string length is a multiple of 4
size_t encoded_size = in.size();
while ((encoded_size % 4) != 0)
++encoded_size;
const size_t N = in.size();
ret.clear();
ret.reserve(3*encoded_size/4);
for (size_t i = 0; i < encoded_size; i += 4)
{
// Note: 'z' == 122
// Get values for each group of four base 64 characters
const uint8_t b4_0 = ( in[i+0] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+0])] : 0xff;
const uint8_t b4_1 = (i+1 < N and in[i+1] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+1])] : 0xff;
const uint8_t b4_2 = (i+2 < N and in[i+2] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+2])] : 0xff;
const uint8_t b4_3 = (i+3 < N and in[i+3] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+3])] : 0xff;
// Transform into a group of three bytes
const uint8_t b3_0 = ((b4_0 & 0x3f) << 2) + ((b4_1 & 0x30) >> 4);
const uint8_t b3_1 = ((b4_1 & 0x0f) << 4) + ((b4_2 & 0x3c) >> 2);
const uint8_t b3_2 = ((b4_2 & 0x03) << 6) + ((b4_3 & 0x3f) >> 0);
// Add the byte to the return value if it isn't part of an '=' character (indicated by 0xff)
if (b4_1 != 0xff) ret.push_back( static_cast<T>(b3_0) );
if (b4_2 != 0xff) ret.push_back( static_cast<T>(b3_1) );
if (b4_3 != 0xff) ret.push_back( static_cast<T>(b3_2) );
}
}
void base64_decode(vector<uint8_t> & out, string const& encoded_string)
{
base64_decode_any(out, encoded_string);
}
void base64_decode(string & out, string const& encoded_string)
{
base64_decode_any(out, encoded_string);
}
Thanks to Jens Alfke for pointing out a performance issue, I have made some modifications to this old post. This one works way faster than before. Its other advantage is smooth handling of corrupt data as well.
Last edition: Although in these kinds of problems, it seems that speed is an overkill, but just for the fun of it I have made some other modifications to make this one the fastest algorithm out there AFAIK. Special thanks goes to GaspardP for his valuable suggestions and nice benchmark.