Skip to content

Commit 50c7ba4

Browse files
authored
Merge pull request TheAlgorithms#685 from darius-welsch/Development
Add Base64 encoding and decoding
2 parents edac7f3 + 87eec2d commit 50c7ba4

File tree

2 files changed

+280
-0
lines changed

2 files changed

+280
-0
lines changed
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
package src.main.java.com.crypto.codec;
2+
3+
import java.nio.ByteBuffer;
4+
import java.nio.charset.StandardCharsets;
5+
import java.util.Arrays;
6+
7+
/**
8+
* <p>This class implements the Base64 codec as it is specified in the RFC 4648.
9+
* Base64 represents binary data as a text string. It is used to store or transfer
10+
* data in legacy systems that are restricted to 7 bit per character. There are
11+
* many variations of Base64, e.g. MIME (RFC 2045) specifies Base64 slightly
12+
* different. The RFC 4648 tries to solve this disambiguation.
13+
*
14+
* @see <a href="https://tools.ietf.org/html/rfc4648">RFC 4648</a>
15+
*/
16+
public class Base64 {
17+
private static final char pad = '=';
18+
private static final char[] encodeAlphabet = {
19+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
20+
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
21+
'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
22+
'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3',
23+
'4', '5', '6', '7', '8', '9', '+', '/'
24+
};
25+
private static final byte[] decodeAlphabet = {
26+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28+
-1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57,
29+
58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
30+
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1,
31+
-1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
32+
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
33+
};
34+
35+
/**
36+
* <p>Encodes user-provided data with the Base64 encoding. For this, the data
37+
* is split in to blocks of 3-byte length. The last block may be of a shorter
38+
* length. The blocks are then encoded, and the last block is padded.
39+
*
40+
* @param data The data that is to be Base64-encoded
41+
* @return Base64-encoded data
42+
* @see <a href="https://tools.ietf.org/html/rfc4648#section-4">RFC 4648 - Base64 encoding</a>
43+
*/
44+
public static String encode(byte[] data) {
45+
StringBuilder builder = new StringBuilder();
46+
int blockCount = data.length / 3;
47+
48+
// if last block is shorter than 3 bytes, then it's not buffered
49+
ByteBuffer buffer = ByteBuffer.wrap(data, 0, blockCount * 3);
50+
51+
// encode buffered data
52+
while (buffer.hasRemaining()) {
53+
byte[] dataBlock = new byte[3];
54+
for (int i = 0; i < 3; i++) {
55+
dataBlock[i] = buffer.get();
56+
}
57+
builder.append(encodeBlock(dataBlock));
58+
}
59+
60+
// encode (pad) last block, if it was shorter than 3 bytes
61+
int remaining = data.length % 3;
62+
if (remaining > 0) {
63+
byte[] lastBlock = Arrays.copyOfRange(data, data.length - remaining, data.length);
64+
char[] padding = padBlock(lastBlock);
65+
builder.append(padding);
66+
}
67+
68+
return builder.toString();
69+
}
70+
71+
/**
72+
* <p>Encodes one block of data.
73+
*
74+
* @param block The block to be encoded with a length of 3 bytes
75+
* @return The encoded block
76+
*/
77+
private static char[] encodeBlock(byte[] block) {
78+
char[] encodedBlock = new char[4];
79+
80+
// split 3 bytes in to 4 6-bit blocks
81+
encodedBlock[0] = encodeAlphabet[block[0] >>> 2];
82+
encodedBlock[1] = encodeAlphabet[(block[0] & 0b11) << 4 | block[1] >>> 4];
83+
encodedBlock[2] = encodeAlphabet[(block[1] & 0b1111) << 2 | block[2] >>> 6];
84+
encodedBlock[3] = encodeAlphabet[block[2] & 0b11_1111];
85+
86+
return encodedBlock;
87+
}
88+
89+
/**
90+
* <p>Pads a block of data with a padding character ('='). A data block can
91+
* have one or two elements. The resulting encoded block has always four
92+
* characters.
93+
* <p>There are two cases:
94+
* <ul>
95+
* <li>If data block has one element, then the encoded block has two
96+
* characters and two padding characters</li>
97+
* <li>If data block has two elements, the the encoded block has three
98+
* characters and one padding character</li>
99+
* </ul>
100+
*
101+
* @param block A block of data with a length of one or two
102+
* @return The encoded and padded block
103+
* @see <a href="https://tools.ietf.org/html/rfc4648#section-4">RFC 4648 - Base64 encoding</a>
104+
*/
105+
private static char[] padBlock(byte[] block) {
106+
char[] paddedBlock = new char[4];
107+
108+
paddedBlock[0] = encodeAlphabet[block[0] >>> 2];
109+
if (block.length == 1) {
110+
// pad with 2 padding characters
111+
paddedBlock[1] = encodeAlphabet[(block[0] & 0b11) << 4];
112+
paddedBlock[2] = pad;
113+
paddedBlock[3] = pad;
114+
} else { // block.length == 2
115+
// pad with 1 padding character
116+
paddedBlock[1] = encodeAlphabet[(block[0] & 0b11) << 4 | block[1] >>> 4];
117+
paddedBlock[2] = encodeAlphabet[(block[1] & 0b1111) << 2];
118+
paddedBlock[3] = pad;
119+
}
120+
121+
return paddedBlock;
122+
}
123+
124+
/**
125+
* <p>Decodes original data from a Base64 string. The string has always
126+
* a length of the multiple of four characters (empty string included). The
127+
* last block of four characters can contain one or two padding characters ('=').
128+
* <p>Each block is decoded in to 3 bytes of data. The last block is
129+
* processed separately to reverse the padding.
130+
*
131+
* @param base64String Base64 encoded data
132+
* @return original data
133+
*/
134+
public static byte[] decode(String base64String) {
135+
if (base64String == null) {
136+
throw new IllegalArgumentException("Base64 string must not be null!");
137+
}
138+
139+
base64String = base64String.strip();
140+
if (!isValidBase64String(base64String)) {
141+
throw new IllegalArgumentException("String is not a valid Base64 string!");
142+
}
143+
144+
// an empty base64 string decodes in to an empty byte array
145+
if (base64String.length() == 0) {
146+
return new byte[0];
147+
}
148+
149+
int blockCount = base64String.length() / 4;
150+
ByteBuffer stringBuffer = ByteBuffer.wrap(base64String.getBytes(StandardCharsets.ISO_8859_1),
151+
0, base64String.length());
152+
// separate last block from data
153+
ByteBuffer dataBuffer = ByteBuffer.allocate((blockCount - 1) * 3);
154+
155+
byte[] encodedBlock = new byte[4];
156+
// decode bulk of data
157+
for (int i = 0; i < blockCount - 1; ++i) {
158+
for (int j = 0; j < 4; j++) {
159+
encodedBlock[j] = stringBuffer.get();
160+
}
161+
dataBuffer.put(decodeBlock(encodedBlock));
162+
}
163+
// decode last block
164+
for (int i = 0; i < 4; i++) {
165+
encodedBlock[i] = stringBuffer.get();
166+
}
167+
byte[] lastDataBlock = undoPadding(encodedBlock);
168+
169+
// glue bulk of data and last block together
170+
ByteBuffer decodedData = ByteBuffer.allocate(dataBuffer.capacity() + lastDataBlock.length);
171+
decodedData.put(dataBuffer.array());
172+
decodedData.put(lastDataBlock);
173+
return decodedData.array();
174+
}
175+
176+
/**
177+
* <p>Decodes an encoded block.
178+
*
179+
* @param block The encoded block with a length of 4 bytes
180+
* @return The decoded block
181+
*/
182+
private static byte[] decodeBlock(byte[] block) {
183+
byte[] decodedBlock = new byte[3];
184+
185+
decodedBlock[0] = (byte) (decodeAlphabet[block[0]] << 2 | decodeAlphabet[block[1]] >>> 4);
186+
decodedBlock[1] = (byte) (decodeAlphabet[block[1]] << 4 | decodeAlphabet[block[2]] >>> 2);
187+
decodedBlock[2] = (byte) (decodeAlphabet[block[2]] << 6 | decodeAlphabet[block[3]]);
188+
189+
return decodedBlock;
190+
}
191+
192+
/**
193+
* <p>Removes the padding from the last block.
194+
*
195+
* @param block
196+
* @return The decoded last block of data
197+
*/
198+
private static byte[] undoPadding(byte[] block) {
199+
int padCount = 0;
200+
byte[] decodedBlock;
201+
202+
// count padding characters
203+
if (block[3] == (byte) pad) padCount++;
204+
if (block[2] == (byte) pad) padCount++;
205+
206+
if (padCount == 2) {
207+
decodedBlock = new byte[1];
208+
decodedBlock[0] = (byte) (decodeAlphabet[block[0]] << 2 | decodeAlphabet[block[1]] >>> 4);
209+
} else if (padCount == 1) {
210+
decodedBlock = new byte[2];
211+
decodedBlock[0] = (byte) (decodeAlphabet[block[0]] << 2 | decodeAlphabet[block[1]] >>> 4);
212+
decodedBlock[1] = (byte) (decodeAlphabet[block[1]] << 4 | decodeAlphabet[block[2]] >>> 2);
213+
} else {
214+
decodedBlock = decodeBlock(block);
215+
}
216+
return decodedBlock;
217+
}
218+
219+
/**
220+
* <p>Check if the provided string is a valid Base64 string.
221+
* <p>The specification (RFC 4648) says: <i>"Implementations MUST reject the
222+
* encoded data if it contains characters outside the base alphabet when
223+
* interpreting base-encoded data, unless the specification referring to this
224+
* document explicitly states otherwise."</i>
225+
*
226+
* @param base64String
227+
* @return <code>true</code> if the string is valid;
228+
* <code>false</code> otherwise
229+
*/
230+
private static boolean isValidBase64String(String base64String) {
231+
return base64String.matches("^(?:[A-Za-z0-9+\\/]{4})*(?:[A-Za-z0-9+\\/]{2}==|[A-Za-z0-9+\\/]{3}=)?$");
232+
}
233+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package src.test.java.com.crypto.codec;
2+
3+
import src.main.java.com.crypto.codec.Base64;
4+
import org.junit.Test;
5+
6+
import static junit.framework.Assert.*;
7+
import static org.junit.Assert.assertArrayEquals;
8+
9+
public class Base64Test {
10+
11+
/*
12+
* Test vectors are taken from:
13+
* https://tools.ietf.org/html/rfc4648#section-10
14+
*/
15+
16+
@Test
17+
public void TestBase64Encode() {
18+
assertEquals("", Base64.encode("".getBytes()));
19+
assertEquals("Zg==", Base64.encode("f".getBytes()));
20+
assertEquals("Zm8=", Base64.encode("fo".getBytes()));
21+
assertEquals("Zm9v", Base64.encode("foo".getBytes()));
22+
assertEquals("Zm9vYg==", Base64.encode("foob".getBytes()));
23+
assertEquals("Zm9vYmE=", Base64.encode("fooba".getBytes()));
24+
assertEquals("Zm9vYmFy", Base64.encode("foobar".getBytes()));
25+
}
26+
27+
@Test
28+
public void TestBase64Decode() {
29+
assertArrayEquals("".getBytes(), Base64.decode(""));
30+
assertArrayEquals("f".getBytes(), Base64.decode("Zg=="));
31+
assertArrayEquals("fo".getBytes(), Base64.decode("Zm8="));
32+
assertArrayEquals("foo".getBytes(), Base64.decode("Zm9v"));
33+
assertArrayEquals("foob".getBytes(), Base64.decode("Zm9vYg=="));
34+
assertArrayEquals("fooba".getBytes(), Base64.decode("Zm9vYmE="));
35+
assertArrayEquals("foobar".getBytes(), Base64.decode("Zm9vYmFy"));
36+
}
37+
38+
@Test(expected = IllegalArgumentException.class)
39+
public void testInvalidBase64String() {
40+
Base64.decode("Z/+v&mF=");
41+
}
42+
43+
@Test(expected = IllegalArgumentException.class)
44+
public void testInvalidLengthOfBase64String() {
45+
Base64.decode("Zm9v" + "YmFy" + "d");
46+
}
47+
}

0 commit comments

Comments
 (0)