From 2d0a80a6399a723312a59c5913d569b8c572d919 Mon Sep 17 00:00:00 2001 From: Thanan Traiongthawon <95660+nullcoder@users.noreply.github.com> Date: Fri, 6 Jun 2025 01:30:24 -0700 Subject: [PATCH] feat: implement binary format encoding/decoding for multi-file support (#37) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create lib/binary.ts with complete binary format implementation - Add encodeFiles() function with magic number "GPST" and version support - Add decodeFiles() function with comprehensive validation - Add validateBinaryFormat() for quick format validation - Add extractHeader() to read header without full decode - Implement efficient binary packing with proper size limit enforcement - Use specific error types instead of generic ValidationError - Add comprehensive test suite with 35 tests covering: - Normal operations (single/multiple files) - Edge cases (empty files, Unicode, special characters) - Error conditions and size limits - Format validation and corruption detection Binary format structure: - 4 bytes: Magic number "GPST" (0x47505354) - 1 byte: Version - 2 bytes: File count - 4 bytes: Total size - Per file: filename length + name + content length + content + language This implementation provides efficient multi-file packing while maintaining data integrity and enforcing security limits. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- docs/TODO.md | 8 +- lib/binary.test.ts | 606 +++++++++++++++++++++++++++++++++++++++++++++ lib/binary.ts | 542 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1152 insertions(+), 4 deletions(-) create mode 100644 lib/binary.test.ts create mode 100644 lib/binary.ts diff --git a/docs/TODO.md b/docs/TODO.md index 43ba18b..4e9a3a6 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -78,10 +78,10 @@ This document tracks the implementation progress of GhostPaste. Check off tasks ### Binary Format (`lib/binary.ts`) -- [ ] Implement file encoding to binary format - [#37](https://github.com/nullcoder/ghostpaste/issues/37) -- [ ] Implement binary format decoding - [#37](https://github.com/nullcoder/ghostpaste/issues/37) -- [ ] Add format validation - [#37](https://github.com/nullcoder/ghostpaste/issues/37) -- [ ] Create unit tests for binary operations - [#37](https://github.com/nullcoder/ghostpaste/issues/37) +- [x] Implement file encoding to binary format - [#37](https://github.com/nullcoder/ghostpaste/issues/37) +- [x] Implement binary format decoding - [#37](https://github.com/nullcoder/ghostpaste/issues/37) +- [x] Add format validation - [#37](https://github.com/nullcoder/ghostpaste/issues/37) +- [x] Create unit tests for binary operations - [#37](https://github.com/nullcoder/ghostpaste/issues/37) ### PIN Authentication (`lib/auth.ts`) diff --git a/lib/binary.test.ts b/lib/binary.test.ts new file mode 100644 index 0000000..f7fa47c --- /dev/null +++ b/lib/binary.test.ts @@ -0,0 +1,606 @@ +/** + * Tests for binary.ts - Binary format encoding/decoding utilities + */ + +import { describe, it, expect } from "vitest"; +import { + encodeFiles, + decodeFiles, + validateBinaryFormat, + extractHeader, +} from "./binary"; +import { + MAGIC_NUMBER, + BINARY_FORMAT_VERSION, + type BinarySizeLimits, +} from "@/types/binary"; +import { type File } from "@/types/models"; +import { + InvalidBinaryFormatError, + FileTooLargeError, + TooManyFilesError, + PayloadTooLargeError, + BadRequestError, +} from "./errors"; + +describe("Binary Module", () => { + // Test data + const sampleFiles: File[] = [ + { + name: "test.js", + content: "console.log('Hello, World!');", + language: "javascript", + }, + { + name: "README.md", + content: "# Test Project\n\nThis is a test.", + language: "markdown", + }, + { + name: "empty.txt", + content: "", + }, + ]; + + const singleFile: File[] = [ + { + name: "index.html", + content: "\nTest", + language: "html", + }, + ]; + + describe("encodeFiles", () => { + it("should encode a single file correctly", () => { + const encoded = encodeFiles(singleFile); + + expect(encoded).toBeInstanceOf(Uint8Array); + expect(encoded.length).toBeGreaterThan(11); // At least header size + + // Verify magic number + const magic = + encoded[0] | + (encoded[1] << 8) | + (encoded[2] << 16) | + (encoded[3] << 24); + expect(magic).toBe(MAGIC_NUMBER); + + // Verify version + expect(encoded[4]).toBe(BINARY_FORMAT_VERSION); + + // Verify file count + const fileCount = encoded[5] | (encoded[6] << 8); + expect(fileCount).toBe(1); + }); + + it("should encode multiple files correctly", () => { + const encoded = encodeFiles(sampleFiles); + + expect(encoded).toBeInstanceOf(Uint8Array); + + // Extract header + const header = extractHeader(encoded); + expect(header.magic).toBe(MAGIC_NUMBER); + expect(header.version).toBe(BINARY_FORMAT_VERSION); + expect(header.fileCount).toBe(3); + + // Verify we can decode it back + const decoded = decodeFiles(encoded); + expect(decoded).toHaveLength(3); + }); + + it("should handle empty file content", () => { + const filesWithEmpty: File[] = [ + { + name: "empty.txt", + content: "", + }, + ]; + + const encoded = encodeFiles(filesWithEmpty); + const decoded = decodeFiles(encoded); + + expect(decoded).toHaveLength(1); + expect(decoded[0].name).toBe("empty.txt"); + expect(decoded[0].content).toBe(""); + }); + + it("should handle files without language field", () => { + const filesNoLang: File[] = [ + { + name: "test.txt", + content: "Test content", + }, + ]; + + const encoded = encodeFiles(filesNoLang); + const decoded = decodeFiles(encoded); + + expect(decoded).toHaveLength(1); + expect(decoded[0].language).toBeUndefined(); + }); + + it("should handle Unicode filenames and content", () => { + const unicodeFiles: File[] = [ + { + name: "测试文件.txt", + content: "Hello 世界! 🌍 Привет мир", + language: "text", + }, + { + name: "émojis 😀.js", + content: "const greeting = '👋';", + language: "javascript", + }, + ]; + + const encoded = encodeFiles(unicodeFiles); + const decoded = decodeFiles(encoded); + + expect(decoded).toHaveLength(2); + expect(decoded[0].name).toBe("测试文件.txt"); + expect(decoded[0].content).toBe("Hello 世界! 🌍 Привет мир"); + expect(decoded[1].name).toBe("émojis 😀.js"); + expect(decoded[1].content).toBe("const greeting = '👋';"); + }); + + it("should handle special characters in content", () => { + const specialFiles: File[] = [ + { + name: "special.txt", + content: "Line 1\nLine 2\r\nLine 3\tTabbed\0Null", + }, + ]; + + const encoded = encodeFiles(specialFiles); + const decoded = decodeFiles(encoded); + + expect(decoded[0].content).toBe("Line 1\nLine 2\r\nLine 3\tTabbed\0Null"); + }); + + it("should throw error for empty file array", () => { + expect(() => encodeFiles([])).toThrow(BadRequestError); + expect(() => encodeFiles([])).toThrow("No files provided"); + }); + + it("should throw error for empty filename", () => { + const invalidFiles: File[] = [ + { + name: "", + content: "Test", + }, + ]; + + expect(() => encodeFiles(invalidFiles)).toThrow(BadRequestError); + expect(() => encodeFiles(invalidFiles)).toThrow( + "File name cannot be empty" + ); + }); + + it("should throw error for too many files", () => { + const tooManyFiles: File[] = Array(25).fill({ + name: "test.txt", + content: "Test", + }); + + expect(() => encodeFiles(tooManyFiles)).toThrow(TooManyFilesError); + expect(() => encodeFiles(tooManyFiles)).toThrow("Too many files"); + }); + + it("should throw error for oversized file", () => { + const oversizedFiles: File[] = [ + { + name: "large.txt", + content: "x".repeat(600 * 1024), // 600KB + }, + ]; + + expect(() => encodeFiles(oversizedFiles)).toThrow(FileTooLargeError); + expect(() => encodeFiles(oversizedFiles)).toThrow( + 'File "large.txt" too large' + ); + }); + + it("should throw error for oversized total", () => { + // Create files that individually are OK but total exceeds limit + const files: File[] = Array(15).fill({ + name: "file.txt", + content: "x".repeat(400 * 1024), // 400KB each, 6MB total + }); + + expect(() => encodeFiles(files)).toThrow(PayloadTooLargeError); + expect(() => encodeFiles(files)).toThrow("Total size too large"); + }); + + it("should throw error for long filename", () => { + const longNameFiles: File[] = [ + { + name: "x".repeat(300), + content: "Test", + }, + ]; + + expect(() => encodeFiles(longNameFiles)).toThrow(BadRequestError); + expect(() => encodeFiles(longNameFiles)).toThrow("Filename too long"); + }); + + it("should throw error for long language identifier", () => { + const longLangFiles: File[] = [ + { + name: "test.txt", + content: "Test", + language: "x".repeat(100), + }, + ]; + + expect(() => encodeFiles(longLangFiles)).toThrow(BadRequestError); + expect(() => encodeFiles(longLangFiles)).toThrow( + "Language identifier too long" + ); + }); + + it("should respect custom size limits", () => { + const customLimits: BinarySizeLimits = { + maxFileSize: 1024, // 1KB + maxTotalSize: 2048, // 2KB + maxFileCount: 2, + maxFilenameLength: 10, + maxLanguageLength: 5, + }; + + const validFiles: File[] = [ + { + name: "test.txt", + content: "x".repeat(500), + language: "text", + }, + ]; + + // Should succeed with valid files + expect(() => encodeFiles(validFiles, customLimits)).not.toThrow(); + + // Should fail with large file + const largeFiles: File[] = [ + { + name: "test.txt", + content: "x".repeat(2000), + }, + ]; + expect(() => encodeFiles(largeFiles, customLimits)).toThrow( + FileTooLargeError + ); + + // Should fail with too many files + const tooManyFiles: File[] = Array(3).fill({ + name: "test.txt", + content: "Test", + }); + expect(() => encodeFiles(tooManyFiles, customLimits)).toThrow( + TooManyFilesError + ); + }); + }); + + describe("decodeFiles", () => { + it("should decode encoded files correctly", () => { + const encoded = encodeFiles(sampleFiles); + const decoded = decodeFiles(encoded); + + expect(decoded).toHaveLength(sampleFiles.length); + + for (let i = 0; i < sampleFiles.length; i++) { + expect(decoded[i].name).toBe(sampleFiles[i].name); + expect(decoded[i].content).toBe(sampleFiles[i].content); + expect(decoded[i].language).toBe(sampleFiles[i].language); + } + }); + + it("should throw error for empty data", () => { + expect(() => decodeFiles(new Uint8Array())).toThrow( + InvalidBinaryFormatError + ); + expect(() => decodeFiles(new Uint8Array())).toThrow("too small"); + }); + + it("should throw error for data too small for header", () => { + const smallData = new Uint8Array(5); + expect(() => decodeFiles(smallData)).toThrow(InvalidBinaryFormatError); + expect(() => decodeFiles(smallData)).toThrow("too small"); + }); + + it("should throw error for invalid magic number", () => { + const invalidData = new Uint8Array(11); + invalidData[0] = 0x12; + invalidData[1] = 0x34; + invalidData[2] = 0x56; + invalidData[3] = 0x78; + + expect(() => decodeFiles(invalidData)).toThrow(InvalidBinaryFormatError); + expect(() => decodeFiles(invalidData)).toThrow("Invalid magic number"); + }); + + it("should throw error for unsupported version", () => { + const data = new Uint8Array(11); + // Set correct magic number + data[0] = MAGIC_NUMBER & 0xff; + data[1] = (MAGIC_NUMBER >> 8) & 0xff; + data[2] = (MAGIC_NUMBER >> 16) & 0xff; + data[3] = (MAGIC_NUMBER >> 24) & 0xff; + // Set wrong version + data[4] = 99; + + expect(() => decodeFiles(data)).toThrow(InvalidBinaryFormatError); + expect(() => decodeFiles(data)).toThrow("Unsupported version"); + }); + + it("should throw error for invalid file count", () => { + const data = new Uint8Array(11); + // Set correct magic number and version + data[0] = MAGIC_NUMBER & 0xff; + data[1] = (MAGIC_NUMBER >> 8) & 0xff; + data[2] = (MAGIC_NUMBER >> 16) & 0xff; + data[3] = (MAGIC_NUMBER >> 24) & 0xff; + data[4] = BINARY_FORMAT_VERSION; + // Set file count to 0 + data[5] = 0; + data[6] = 0; + + expect(() => decodeFiles(data)).toThrow(InvalidBinaryFormatError); + expect(() => decodeFiles(data)).toThrow("Invalid file count"); + }); + + it("should throw error for truncated data", () => { + const encoded = encodeFiles(sampleFiles); + // Truncate the data + const truncated = encoded.slice(0, encoded.length - 10); + + expect(() => decodeFiles(truncated)).toThrow(InvalidBinaryFormatError); + }); + + it("should throw error for corrupted file data", () => { + const files: File[] = [ + { + name: "test.txt", + content: "Test content", + }, + ]; + + const encoded = encodeFiles(files); + // Find position of content length + // Header: 11 bytes + // Filename length: 2 bytes + // Filename "test.txt": 8 bytes + // Content length position: 11 + 2 + 8 = 21 + // Corrupt the content length to be larger than actual remaining data + // but within the file size limit + encoded[21] = 100; // 100 bytes (more than remaining data but under 500KB limit) + encoded[22] = 0; + encoded[23] = 0; + encoded[24] = 0; + + expect(() => decodeFiles(encoded)).toThrow(InvalidBinaryFormatError); + expect(() => decodeFiles(encoded)).toThrow("Unexpected end of data"); + }); + + it("should handle round-trip with maximum values", () => { + const maxFiles: File[] = [ + { + name: "a".repeat(255), // Max filename length + content: "x".repeat(100), // Some content + language: "l".repeat(50), // Max language length + }, + ]; + + const encoded = encodeFiles(maxFiles); + const decoded = decodeFiles(encoded); + + expect(decoded[0].name).toBe(maxFiles[0].name); + expect(decoded[0].content).toBe(maxFiles[0].content); + expect(decoded[0].language).toBe(maxFiles[0].language); + }); + }); + + describe("validateBinaryFormat", () => { + it("should validate correct binary data", () => { + const encoded = encodeFiles(sampleFiles); + expect(validateBinaryFormat(encoded)).toBe(true); + }); + + it("should throw error for invalid data", () => { + const invalidData = new Uint8Array(100); + expect(() => validateBinaryFormat(invalidData)).toThrow( + InvalidBinaryFormatError + ); + }); + + it("should validate without full decode", () => { + const largeFiles: File[] = Array(10).fill({ + name: "file.txt", + content: "x".repeat(10000), + }); + + const encoded = encodeFiles(largeFiles); + // Validation should be fast and not decode content + expect(validateBinaryFormat(encoded)).toBe(true); + }); + + it("should detect extra data after files", () => { + const files: File[] = [ + { + name: "test.txt", + content: "Test", + }, + ]; + + const encoded = encodeFiles(files); + // Add extra data + const withExtra = new Uint8Array(encoded.length + 10); + withExtra.set(encoded); + + expect(() => validateBinaryFormat(withExtra)).toThrow( + InvalidBinaryFormatError + ); + expect(() => validateBinaryFormat(withExtra)).toThrow( + "Extra data after files" + ); + }); + + it("should detect size mismatch", () => { + const files: File[] = [ + { + name: "test.txt", + content: "Test content", + }, + ]; + + const encoded = encodeFiles(files); + // Modify the total size in header to not match actual content + encoded[7] = 0xff; + + expect(() => validateBinaryFormat(encoded)).toThrow( + InvalidBinaryFormatError + ); + expect(() => validateBinaryFormat(encoded)).toThrow( + "Total size mismatch" + ); + }); + }); + + describe("extractHeader", () => { + it("should extract header correctly", () => { + const encoded = encodeFiles(sampleFiles); + const header = extractHeader(encoded); + + expect(header.magic).toBe(MAGIC_NUMBER); + expect(header.version).toBe(BINARY_FORMAT_VERSION); + expect(header.fileCount).toBe(3); + expect(header.totalSize).toBeGreaterThan(0); + }); + + it("should throw error for invalid data", () => { + expect(() => extractHeader(new Uint8Array())).toThrow( + InvalidBinaryFormatError + ); + expect(() => extractHeader(new Uint8Array(5))).toThrow( + InvalidBinaryFormatError + ); + }); + + it("should extract header without validating rest of data", () => { + // Create minimal valid header with no file data + const header = new Uint8Array(11); + // Magic number + header[0] = MAGIC_NUMBER & 0xff; + header[1] = (MAGIC_NUMBER >> 8) & 0xff; + header[2] = (MAGIC_NUMBER >> 16) & 0xff; + header[3] = (MAGIC_NUMBER >> 24) & 0xff; + // Version + header[4] = BINARY_FORMAT_VERSION; + // File count = 1 + header[5] = 1; + header[6] = 0; + // Total size = 100 + header[7] = 100; + header[8] = 0; + header[9] = 0; + header[10] = 0; + + const extracted = extractHeader(header); + expect(extracted.magic).toBe(MAGIC_NUMBER); + expect(extracted.version).toBe(BINARY_FORMAT_VERSION); + expect(extracted.fileCount).toBe(1); + expect(extracted.totalSize).toBe(100); + }); + }); + + describe("Edge cases and stress tests", () => { + it("should handle many small files", () => { + const manyFiles: File[] = Array(20) + .fill(null) + .map((_, i) => ({ + name: `file${i}.txt`, + content: `Content ${i}`, + language: i % 2 === 0 ? "text" : undefined, + })); + + const encoded = encodeFiles(manyFiles); + const decoded = decodeFiles(encoded); + + expect(decoded).toHaveLength(20); + decoded.forEach((file, i) => { + expect(file.name).toBe(`file${i}.txt`); + expect(file.content).toBe(`Content ${i}`); + }); + }); + + it("should handle various character encodings", () => { + const files: File[] = [ + { + name: "ascii.txt", + content: "Simple ASCII text", + }, + { + name: "latin1.txt", + content: "Café, naïve, résumé", + }, + { + name: "cyrillic.txt", + content: "Привет мир", + }, + { + name: "cjk.txt", + content: "你好世界こんにちは世界 안녕하세요", + }, + { + name: "emoji.txt", + content: "🚀 🌍 💻 📝", + }, + ]; + + const encoded = encodeFiles(files); + const decoded = decodeFiles(encoded); + + decoded.forEach((file, i) => { + expect(file.name).toBe(files[i].name); + expect(file.content).toBe(files[i].content); + }); + }); + + it("should maintain data integrity with random content", () => { + // Generate random binary data as string + const randomContent = Array(1000) + .fill(null) + .map(() => String.fromCharCode(Math.floor(Math.random() * 128))) + .join(""); + + const files: File[] = [ + { + name: "random.bin", + content: randomContent, + }, + ]; + + const encoded = encodeFiles(files); + const decoded = decodeFiles(encoded); + + expect(decoded[0].content).toBe(randomContent); + }); + + it("should calculate compression ratio correctly", () => { + const files: File[] = [ + { + name: "test.txt", + content: "x".repeat(1000), + }, + ]; + + const encoded = encodeFiles(files); + const header = extractHeader(encoded); + + // Binary size should be close to content size + overhead + const overhead = 11 + 2 + 8 + 4 + 1; // header + name length + "test.txt" + content length + lang length + expect(encoded.length).toBe(1000 + overhead); + expect(header.totalSize).toBe(1000); + }); + }); +}); diff --git a/lib/binary.ts b/lib/binary.ts new file mode 100644 index 0000000..547b95c --- /dev/null +++ b/lib/binary.ts @@ -0,0 +1,542 @@ +/** + * Binary format encoding/decoding utilities for multi-file support + * + * This module provides efficient binary packing/unpacking for multiple files + * in a single blob, reducing storage overhead and maintaining data integrity. + */ + +import { + BINARY_FORMAT_VERSION, + MAGIC_NUMBER, + DEFAULT_SIZE_LIMITS, + type BinaryHeader, + type BinarySizeLimits, +} from "@/types/binary"; +import { type File } from "@/types/models"; +import { + InvalidBinaryFormatError, + FileTooLargeError, + TooManyFilesError, + PayloadTooLargeError, + BadRequestError, +} from "./errors"; +import { logger } from "./logger"; + +/** + * Text encoder/decoder instances + */ +const textEncoder = new TextEncoder(); +const textDecoder = new TextDecoder(); + +/** + * Convert a 32-bit number to 4 bytes in little-endian format + */ +function uint32ToBytes(value: number): Uint8Array { + const bytes = new Uint8Array(4); + bytes[0] = value & 0xff; + bytes[1] = (value >> 8) & 0xff; + bytes[2] = (value >> 16) & 0xff; + bytes[3] = (value >> 24) & 0xff; + return bytes; +} + +/** + * Convert 4 bytes in little-endian format to a 32-bit number + */ +function bytesToUint32(bytes: Uint8Array, offset: number = 0): number { + return ( + bytes[offset] | + (bytes[offset + 1] << 8) | + (bytes[offset + 2] << 16) | + (bytes[offset + 3] << 24) + ); +} + +/** + * Convert a 16-bit number to 2 bytes in little-endian format + */ +function uint16ToBytes(value: number): Uint8Array { + const bytes = new Uint8Array(2); + bytes[0] = value & 0xff; + bytes[1] = (value >> 8) & 0xff; + return bytes; +} + +/** + * Convert 2 bytes in little-endian format to a 16-bit number + */ +function bytesToUint16(bytes: Uint8Array, offset: number = 0): number { + return bytes[offset] | (bytes[offset + 1] << 8); +} + +/** + * Encode multiple files into a single binary blob + * + * Binary format: + * - 4 bytes: Magic number "GPST" (0x47505354) + * - 1 byte: Version + * - 2 bytes: File count + * - 4 bytes: Total size + * - For each file: + * - 2 bytes: Filename length + * - N bytes: Filename (UTF-8) + * - 4 bytes: Content length + * - N bytes: Content (UTF-8) + * - 1 byte: Language length + * - N bytes: Language (UTF-8, optional) + * + * @param files - Array of files to encode + * @param limits - Size limits to enforce + * @returns Encoded binary data + * @throws {BadRequestError} If input validation fails + * @throws {TooManyFilesError} If too many files + * @throws {FileTooLargeError} If individual file too large + * @throws {PayloadTooLargeError} If total size too large + * @throws {InvalidBinaryFormatError} If encoding fails + */ +export function encodeFiles( + files: File[], + limits: BinarySizeLimits = DEFAULT_SIZE_LIMITS +): Uint8Array { + // Validate inputs + if (!files || files.length === 0) { + throw new BadRequestError("No files provided for encoding"); + } + + if (files.length > limits.maxFileCount) { + throw new TooManyFilesError( + `Too many files: ${files.length} exceeds limit of ${limits.maxFileCount}` + ); + } + + // Calculate total size and validate limits + let totalContentSize = 0; + const encodedFiles: Array<{ + nameBytes: Uint8Array; + contentBytes: Uint8Array; + languageBytes: Uint8Array; + }> = []; + + for (const file of files) { + // Validate filename + if (!file.name || file.name.length === 0) { + throw new BadRequestError("File name cannot be empty"); + } + + if (file.name.length > limits.maxFilenameLength) { + throw new BadRequestError( + `Filename too long: ${file.name.length} exceeds limit of ${limits.maxFilenameLength}` + ); + } + + // Encode strings to UTF-8 + const nameBytes = textEncoder.encode(file.name); + const contentBytes = textEncoder.encode(file.content || ""); + const languageBytes = textEncoder.encode(file.language || ""); + + // Validate individual file size + if (contentBytes.length > limits.maxFileSize) { + throw new FileTooLargeError( + `File "${file.name}" too large: ${contentBytes.length} bytes exceeds limit of ${limits.maxFileSize}` + ); + } + + // Validate language length + if (languageBytes.length > limits.maxLanguageLength) { + throw new BadRequestError( + `Language identifier too long: ${languageBytes.length} exceeds limit of ${limits.maxLanguageLength}` + ); + } + + totalContentSize += contentBytes.length; + encodedFiles.push({ nameBytes, contentBytes, languageBytes }); + } + + // Validate total size + if (totalContentSize > limits.maxTotalSize) { + throw new PayloadTooLargeError( + `Total size too large: ${totalContentSize} bytes exceeds limit of ${limits.maxTotalSize}` + ); + } + + // Calculate total binary size + // Header: 4 (magic) + 1 (version) + 2 (file count) + 4 (total size) = 11 bytes + let totalBinarySize = 11; + + for (const { nameBytes, contentBytes, languageBytes } of encodedFiles) { + // Per file: 2 (name length) + name + 4 (content length) + content + 1 (lang length) + lang + totalBinarySize += + 2 + nameBytes.length + 4 + contentBytes.length + 1 + languageBytes.length; + } + + // Create binary buffer + const buffer = new Uint8Array(totalBinarySize); + let offset = 0; + + // Write header + // Magic number (4 bytes) + buffer.set(uint32ToBytes(MAGIC_NUMBER), offset); + offset += 4; + + // Version (1 byte) + buffer[offset] = BINARY_FORMAT_VERSION; + offset += 1; + + // File count (2 bytes) + buffer.set(uint16ToBytes(files.length), offset); + offset += 2; + + // Total content size (4 bytes) + buffer.set(uint32ToBytes(totalContentSize), offset); + offset += 4; + + // Write files + for (let i = 0; i < files.length; i++) { + const { nameBytes, contentBytes, languageBytes } = encodedFiles[i]; + + // Filename length (2 bytes) + buffer.set(uint16ToBytes(nameBytes.length), offset); + offset += 2; + + // Filename + buffer.set(nameBytes, offset); + offset += nameBytes.length; + + // Content length (4 bytes) + buffer.set(uint32ToBytes(contentBytes.length), offset); + offset += 4; + + // Content + buffer.set(contentBytes, offset); + offset += contentBytes.length; + + // Language length (1 byte) + buffer[offset] = languageBytes.length; + offset += 1; + + // Language (if present) + if (languageBytes.length > 0) { + buffer.set(languageBytes, offset); + offset += languageBytes.length; + } + } + + logger.debug("Encoded files to binary format", { + fileCount: files.length, + totalContentSize, + totalBinarySize, + compressionRatio: (totalBinarySize / totalContentSize).toFixed(2), + }); + + return buffer; +} + +/** + * Decode binary blob back to individual files + * + * @param data - Binary data to decode + * @param limits - Size limits to validate against + * @returns Array of decoded files + * @throws {InvalidBinaryFormatError} If data is invalid or corrupted + */ +export function decodeFiles( + data: Uint8Array, + limits: BinarySizeLimits = DEFAULT_SIZE_LIMITS +): File[] { + if (!data || data.length < 11) { + throw new InvalidBinaryFormatError( + "Binary data too small to contain valid header" + ); + } + + let offset = 0; + + // Read and validate magic number + const magic = bytesToUint32(data, offset); + if (magic !== MAGIC_NUMBER) { + throw new InvalidBinaryFormatError( + `Invalid magic number: expected ${MAGIC_NUMBER.toString(16)}, got ${magic.toString(16)}` + ); + } + offset += 4; + + // Read and validate version + const version = data[offset]; + if (version !== BINARY_FORMAT_VERSION) { + throw new InvalidBinaryFormatError( + `Unsupported version: expected ${BINARY_FORMAT_VERSION}, got ${version}` + ); + } + offset += 1; + + // Read file count + const fileCount = bytesToUint16(data, offset); + if (fileCount === 0 || fileCount > limits.maxFileCount) { + throw new InvalidBinaryFormatError( + `Invalid file count: ${fileCount} (must be 1-${limits.maxFileCount})` + ); + } + offset += 2; + + // Read total size + const totalSize = bytesToUint32(data, offset); + if (totalSize > limits.maxTotalSize) { + throw new InvalidBinaryFormatError( + `Total size too large: ${totalSize} exceeds limit of ${limits.maxTotalSize}` + ); + } + offset += 4; + + // Decode files + const files: File[] = []; + let decodedSize = 0; + + try { + for (let i = 0; i < fileCount; i++) { + // Check remaining data + if (offset + 2 > data.length) { + throw new InvalidBinaryFormatError( + `Unexpected end of data while reading file ${i + 1}` + ); + } + + // Read filename length + const nameLength = bytesToUint16(data, offset); + offset += 2; + + if (nameLength === 0 || nameLength > limits.maxFilenameLength) { + throw new InvalidBinaryFormatError( + `Invalid filename length: ${nameLength} for file ${i + 1}` + ); + } + + // Read filename + if (offset + nameLength > data.length) { + throw new InvalidBinaryFormatError( + `Unexpected end of data while reading filename for file ${i + 1}` + ); + } + + const nameBytes = data.slice(offset, offset + nameLength); + const name = textDecoder.decode(nameBytes); + offset += nameLength; + + // Read content length + if (offset + 4 > data.length) { + throw new InvalidBinaryFormatError( + `Unexpected end of data while reading content length for file ${i + 1}` + ); + } + + const contentLength = bytesToUint32(data, offset); + offset += 4; + + if (contentLength > limits.maxFileSize) { + throw new InvalidBinaryFormatError( + `File too large: ${contentLength} exceeds limit of ${limits.maxFileSize}` + ); + } + + // Read content + if (offset + contentLength > data.length) { + throw new InvalidBinaryFormatError( + `Unexpected end of data while reading content for file ${i + 1}` + ); + } + + const contentBytes = data.slice(offset, offset + contentLength); + const content = textDecoder.decode(contentBytes); + offset += contentLength; + decodedSize += contentLength; + + // Read language length + if (offset >= data.length) { + throw new InvalidBinaryFormatError( + `Unexpected end of data while reading language length for file ${i + 1}` + ); + } + + const languageLength = data[offset]; + offset += 1; + + if (languageLength > limits.maxLanguageLength) { + throw new InvalidBinaryFormatError( + `Language identifier too long: ${languageLength} exceeds limit of ${limits.maxLanguageLength}` + ); + } + + // Read language (optional) + let language: string | undefined; + if (languageLength > 0) { + if (offset + languageLength > data.length) { + throw new InvalidBinaryFormatError( + `Unexpected end of data while reading language for file ${i + 1}` + ); + } + + const languageBytes = data.slice(offset, offset + languageLength); + language = textDecoder.decode(languageBytes); + offset += languageLength; + } + + files.push({ name, content, language }); + } + } catch (error) { + if (error instanceof InvalidBinaryFormatError) { + throw error; + } + // Handle TextDecoder errors + throw new InvalidBinaryFormatError( + "Failed to decode text data: invalid UTF-8", + { + originalError: error, + } + ); + } + + // Verify total size matches + if (decodedSize !== totalSize) { + throw new InvalidBinaryFormatError( + `Size mismatch: decoded ${decodedSize} bytes, expected ${totalSize}` + ); + } + + logger.debug("Decoded binary format to files", { + fileCount: files.length, + totalSize, + dataSize: data.length, + }); + + return files; +} + +/** + * Validate binary format without fully decoding + * + * This is a lighter-weight validation that checks the format structure + * without decoding all content, useful for quick validation checks. + * + * @param data - Binary data to validate + * @param limits - Size limits to validate against + * @returns True if format is valid + * @throws {InvalidBinaryFormatError} If format is invalid + */ +export function validateBinaryFormat( + data: Uint8Array, + limits: BinarySizeLimits = DEFAULT_SIZE_LIMITS +): boolean { + if (!data || data.length < 11) { + throw new InvalidBinaryFormatError( + "Binary data too small to contain valid header" + ); + } + + // Validate magic number + const magic = bytesToUint32(data, 0); + if (magic !== MAGIC_NUMBER) { + throw new InvalidBinaryFormatError("Invalid magic number"); + } + + // Validate version + const version = data[4]; + if (version !== BINARY_FORMAT_VERSION) { + throw new InvalidBinaryFormatError("Unsupported version"); + } + + // Validate file count + const fileCount = bytesToUint16(data, 5); + if (fileCount === 0 || fileCount > limits.maxFileCount) { + throw new InvalidBinaryFormatError("Invalid file count"); + } + + // Validate total size + const totalSize = bytesToUint32(data, 7); + if (totalSize > limits.maxTotalSize) { + throw new InvalidBinaryFormatError("Total size exceeds limit"); + } + + // Quick structural validation without full decode + let offset = 11; + let calculatedSize = 0; + + for (let i = 0; i < fileCount; i++) { + // Check filename length + if (offset + 2 > data.length) { + throw new InvalidBinaryFormatError( + "Truncated data: missing filename length" + ); + } + + const nameLength = bytesToUint16(data, offset); + if (nameLength === 0 || nameLength > limits.maxFilenameLength) { + throw new InvalidBinaryFormatError("Invalid filename length"); + } + offset += 2 + nameLength; + + // Check content length + if (offset + 4 > data.length) { + throw new InvalidBinaryFormatError( + "Truncated data: missing content length" + ); + } + + const contentLength = bytesToUint32(data, offset); + if (contentLength > limits.maxFileSize) { + throw new InvalidBinaryFormatError("File size exceeds limit"); + } + calculatedSize += contentLength; + offset += 4 + contentLength; + + // Check language length + if (offset >= data.length) { + throw new InvalidBinaryFormatError( + "Truncated data: missing language length" + ); + } + + const languageLength = data[offset]; + if (languageLength > limits.maxLanguageLength) { + throw new InvalidBinaryFormatError("Language identifier too long"); + } + offset += 1 + languageLength; + } + + // Verify we consumed exactly the right amount of data + if (offset !== data.length) { + throw new InvalidBinaryFormatError("Extra data after files"); + } + + // Verify total size matches + if (calculatedSize !== totalSize) { + throw new InvalidBinaryFormatError("Total size mismatch"); + } + + return true; +} + +/** + * Extract header information from binary data without full decode + * + * @param data - Binary data to read header from + * @returns Binary header information + * @throws {InvalidBinaryFormatError} If header is invalid + */ +export function extractHeader(data: Uint8Array): BinaryHeader { + if (!data || data.length < 11) { + throw new InvalidBinaryFormatError( + "Binary data too small to contain valid header" + ); + } + + const magic = bytesToUint32(data, 0); + const version = data[4]; + const fileCount = bytesToUint16(data, 5); + const totalSize = bytesToUint32(data, 7); + + return { + magic, + version, + fileCount, + totalSize, + }; +}