Skip to content

Commit 3656a90

Browse files
committed
Add TextData unit & tests to DUnitX tests
1 parent f5b7d56 commit 3656a90

File tree

4 files changed

+992
-1
lines changed

4 files changed

+992
-1
lines changed

cupola/src/CSLE.TextData.pas

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
{
2+
This Source Code Form is subject to the terms of the Mozilla Public License,
3+
v. 2.0. If a copy of the MPL was not distributed with this file, You can
4+
obtain one at https://mozilla.org/MPL/2.0/
5+
6+
Copyright (C) 2024, Peter Johnson (gravatar.com/delphidabbler).
7+
8+
Data types that encapsulate text data in different encodings.
9+
}
10+
11+
unit CSLE.TextData;
12+
13+
{$SCOPEDENUMS ON}
14+
15+
interface
16+
17+
uses
18+
System.SysUtils,
19+
System.Classes;
20+
21+
type
22+
ASCIIString = type AnsiString(20127);
23+
24+
TTextDataType = (
25+
ASCII = 0, // data bytes represent ASCII string
26+
ANSI = 1, // default ANSI encoding for local system
27+
UTF8 = 2 // data bytes represent UTF-8 string
28+
);
29+
30+
TTextData = record
31+
strict private
32+
var
33+
fData: TBytes;
34+
fDataType: TTextDataType;
35+
class var
36+
fEncodingMap: array[TTextDataType] of TEncoding;
37+
class function CopyBytes(const ABytes: TBytes): TBytes; static;
38+
class function BytesToRawByteString(const ABytes: TBytes; const CP: UInt16):
39+
RawByteString; static;
40+
class function RawByteStringToBytes(const AStr: RawByteString): TBytes;
41+
static;
42+
function ToRawByteString(const AWantedType: TTextDataType): RawByteString;
43+
public
44+
class constructor Create;
45+
constructor Create(const AData: TBytes; const ADataType: TTextDataType);
46+
overload;
47+
constructor Create(const AStr: string; const ADataType: TTextDataType);
48+
overload;
49+
constructor Create(const AStr: RawByteString); overload;
50+
// If ACount <= 0 then whole of remainder of stream is read
51+
constructor Create(const AStream: TStream;
52+
const ADataType: TTextDataType; const ACount: Int64 = 0); overload;
53+
/// <summary>Initialises new record instance to null ID.</summary>
54+
class operator Initialize(out Dest: TTextData);
55+
/// <summary>Assigns a copy of the value of record <c>Src</c> to
56+
/// <c>Dest</c>.</summary>
57+
class operator Assign(var Dest: TTextData;
58+
const [ref] Src: TTextData);
59+
60+
function DataLength: NativeUInt; inline;
61+
function Encoding: TEncoding; inline;
62+
function ToString: string; inline;
63+
function ToANSIString: AnsiString;
64+
function ToASCIIString: ASCIIString;
65+
function ToUTF8String: UTF8String;
66+
67+
class function SupportsString(const ADataType: TTextDataType;
68+
const AStr: string): Boolean; static;
69+
70+
property Data: TBytes read fData;
71+
property DataType: TTextDataType read fDataType;
72+
73+
/// <summary>Compares two text data records for equality.</summary>
74+
class operator Equal(const Left, Right: TTextData): Boolean;
75+
/// <summary>Compares two text data records for inequality.</summary>
76+
class operator NotEqual(const Left, Right: TTextData): Boolean; inline;
77+
78+
end;
79+
80+
implementation
81+
82+
{ TTextData }
83+
84+
class operator TTextData.Assign(var Dest: TTextData;
85+
const [ref] Src: TTextData);
86+
begin
87+
// Don't do: Dest := TTextData.Create(Src.fData, Src.fDataType);
88+
// It causes stack overflow, presumably because Dest := XXX causes recursion
89+
Dest.fData := CopyBytes(Src.fData);
90+
Dest.fDataType := Src.fDataType;
91+
end;
92+
93+
class function TTextData.BytesToRawByteString(const ABytes: TBytes;
94+
const CP: UInt16): RawByteString;
95+
begin
96+
Assert(Assigned(ABytes));
97+
98+
var StrLen := System.Length(ABytes);
99+
SetLength(Result, StrLen);
100+
if StrLen > 0 then
101+
begin
102+
Move(ABytes[0], Result[1], StrLen);
103+
if Result[StrLen] = #0 then
104+
SetLength(Result, StrLen - 1);
105+
end;
106+
SetCodePage(Result, CP, False);
107+
end;
108+
109+
class function TTextData.CopyBytes(const ABytes: TBytes): TBytes;
110+
begin
111+
if System.Length(ABytes) > 0 then
112+
Result := Copy(ABytes, 0, System.Length(ABytes))
113+
else
114+
System.SetLength(Result, 0);
115+
end;
116+
117+
class constructor TTextData.Create;
118+
begin
119+
fEncodingMap[TTextDataType.ASCII] := TEncoding.ASCII;
120+
fEncodingMap[TTextDataType.ANSI] := TEncoding.ANSI;
121+
fEncodingMap[TTextDataType.UTF8] := TEncoding.UTF8;
122+
end;
123+
124+
constructor TTextData.Create(const AData: TBytes;
125+
const ADataType: TTextDataType);
126+
begin
127+
fData := CopyBytes(AData);
128+
fDataType := ADataType;
129+
end;
130+
131+
constructor TTextData.Create(const AStr: string;
132+
const ADataType: TTextDataType);
133+
begin
134+
fDataType := ADataType;
135+
fData := CopyBytes(fEncodingMap[ADataType].GetBytes(AStr));
136+
end;
137+
138+
constructor TTextData.Create(const AStream: TStream;
139+
const ADataType: TTextDataType; const ACount: Int64);
140+
begin
141+
// assume reading all of stream from current position to end
142+
var BytesToRead := AStream.Size - AStream.Position;
143+
if (ACount > 0) and (ACount < BytesToRead) then
144+
// Adjust number of bytes to read down to ACount
145+
BytesToRead := ACount;
146+
SetLength(fData, BytesToRead);
147+
AStream.Read(fData, BytesToRead);
148+
fDataType := ADataType;
149+
end;
150+
151+
constructor TTextData.Create(const AStr: RawByteString);
152+
begin
153+
if AStr <> '' then
154+
begin
155+
fData := RawByteStringToBytes(AStr);
156+
var CodePage := StringCodePage(AStr);
157+
if CodePage = TEncoding.ASCII.CodePage then
158+
fDataType := TTextDataType.ASCII
159+
else if CodePage = TEncoding.UTF8.CodePage then
160+
fDataType := TTextDataType.UTF8
161+
else if CodePage = TEncoding.ANSI.CodePage then
162+
fDataType := TTextDataType.ANSI
163+
else
164+
raise Exception.CreateFmt('Unsupported code page for string "%s"', [AStr]);
165+
end
166+
else
167+
begin
168+
SetLength(fData, 0);
169+
fDataType := TTextDataType.UTF8;
170+
end;
171+
end;
172+
173+
function TTextData.DataLength: NativeUInt;
174+
begin
175+
Result := System.Length(fData);
176+
end;
177+
178+
function TTextData.Encoding: TEncoding;
179+
begin
180+
Result := fEncodingMap[fDataType];
181+
end;
182+
183+
class operator TTextData.Equal(const Left, Right: TTextData): Boolean;
184+
begin
185+
Result := False;
186+
if Left.fDataType <> Right.fDataType then
187+
Exit;
188+
if Left.DataLength <> Right.DataLength then
189+
Exit;
190+
for var I := Low(Left.fData) to High(Left.fData) do
191+
if Left.fData[I] <> Right.fData[I] then
192+
Exit;
193+
Result := True;
194+
end;
195+
196+
class operator TTextData.Initialize(out Dest: TTextData);
197+
begin
198+
SetLength(Dest.fData, 0);
199+
Dest.fDataType := TTextDataType.UTF8;
200+
end;
201+
202+
class operator TTextData.NotEqual(const Left, Right: TTextData): Boolean;
203+
begin
204+
Result := not (Left = Right);
205+
end;
206+
207+
class function TTextData.RawByteStringToBytes(
208+
const AStr: RawByteString): TBytes;
209+
begin
210+
var BufLen := System.Length(AStr);
211+
SetLength(Result, BufLen);
212+
if BufLen > 0 then
213+
Move(AStr[1], Result[0], BufLen);
214+
end;
215+
216+
class function TTextData.SupportsString(const ADataType: TTextDataType;
217+
const AStr: string): Boolean;
218+
begin
219+
var Bytes := fEncodingMap[ADataType].GetBytes(AStr);
220+
var TestStr := fEncodingMap[ADataType].GetString(Bytes);
221+
Result := AStr = TestStr;
222+
end;
223+
224+
function TTextData.ToANSIString: AnsiString;
225+
begin
226+
Result := ToRawByteString(TTextDataType.ANSI);
227+
228+
Assert(StringCodePage(Result) = fEncodingMap[TTextDataType.ANSI].CodePage);
229+
end;
230+
231+
function TTextData.ToASCIIString: ASCIIString;
232+
begin
233+
Result := ToRawByteString(TTextDataType.ASCII);
234+
235+
Assert(StringCodePage(Result) = fEncodingMap[TTextDataType.ASCII].CodePage);
236+
end;
237+
238+
function TTextData.ToRawByteString(const AWantedType: TTextDataType):
239+
RawByteString;
240+
begin
241+
var Bytes: TBytes;
242+
if AWantedType = fDataType then
243+
Bytes := fData
244+
else
245+
Bytes := fEncodingMap[AWantedType].GetBytes(ToString);
246+
Result := BytesToRawByteString(Bytes, fEncodingMap[AWantedType].CodePage);
247+
end;
248+
249+
function TTextData.ToString: string;
250+
begin
251+
Result := fEncodingMap[fDataType].GetString(fData);
252+
end;
253+
254+
function TTextData.ToUTF8String: UTF8String;
255+
begin
256+
Result := ToRawByteString(TTextDataType.UTF8);
257+
258+
Assert(StringCodePage(Result) = fEncodingMap[TTextDataType.UTF8].CodePage);
259+
end;
260+
261+
end.
262+

cupola/tests/CodeSnip.Cupola.Tests.dpr

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ uses
1616
CSLE.Utils.Conversions in '..\src\CSLE.Utils.Conversions.pas',
1717
Test.Utils.Conversions in 'Test.Utils.Conversions.pas',
1818
Test.Utils.Dates in 'Test.Utils.Dates.pas',
19-
CSLE.Utils.Dates in '..\src\CSLE.Utils.Dates.pas';
19+
CSLE.Utils.Dates in '..\src\CSLE.Utils.Dates.pas',
20+
Test.TextData in 'Test.TextData.pas',
21+
CSLE.TextData in '..\src\CSLE.TextData.pas';
2022

2123
{$IFNDEF TESTINSIGHT}
2224
var

cupola/tests/CodeSnip.Cupola.Tests.dproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@
7373
<DCCReference Include="Test.Utils.Conversions.pas"/>
7474
<DCCReference Include="Test.Utils.Dates.pas"/>
7575
<DCCReference Include="..\src\CSLE.Utils.Dates.pas"/>
76+
<DCCReference Include="Test.TextData.pas"/>
77+
<DCCReference Include="..\src\CSLE.TextData.pas"/>
7678
<BuildConfiguration Include="Base">
7779
<Key>Base</Key>
7880
</BuildConfiguration>

0 commit comments

Comments
 (0)