|
| 1 | +{ |
| 2 | + This Source Code Form is subject to the terms of the Mozilla Public License, |
| 3 | + v. 2.0. If a copy of the MPL was not distributed with this file, You can |
| 4 | + obtain one at https://mozilla.org/MPL/2.0/ |
| 5 | +
|
| 6 | + Copyright (C) 2024, Peter Johnson (gravatar.com/delphidabbler). |
| 7 | +
|
| 8 | + Data types that encapsulate text data in different encodings. |
| 9 | +} |
| 10 | + |
| 11 | +unit CSLE.TextData; |
| 12 | + |
| 13 | +{$SCOPEDENUMS ON} |
| 14 | + |
| 15 | +interface |
| 16 | + |
| 17 | +uses |
| 18 | + System.SysUtils, |
| 19 | + System.Classes; |
| 20 | + |
| 21 | +type |
| 22 | + ASCIIString = type AnsiString(20127); |
| 23 | + |
| 24 | + TTextDataType = ( |
| 25 | + ASCII = 0, // data bytes represent ASCII string |
| 26 | + ANSI = 1, // default ANSI encoding for local system |
| 27 | + UTF8 = 2 // data bytes represent UTF-8 string |
| 28 | + ); |
| 29 | + |
| 30 | + TTextData = record |
| 31 | + strict private |
| 32 | + var |
| 33 | + fData: TBytes; |
| 34 | + fDataType: TTextDataType; |
| 35 | + class var |
| 36 | + fEncodingMap: array[TTextDataType] of TEncoding; |
| 37 | + class function CopyBytes(const ABytes: TBytes): TBytes; static; |
| 38 | + class function BytesToRawByteString(const ABytes: TBytes; const CP: UInt16): |
| 39 | + RawByteString; static; |
| 40 | + class function RawByteStringToBytes(const AStr: RawByteString): TBytes; |
| 41 | + static; |
| 42 | + function ToRawByteString(const AWantedType: TTextDataType): RawByteString; |
| 43 | + public |
| 44 | + class constructor Create; |
| 45 | + constructor Create(const AData: TBytes; const ADataType: TTextDataType); |
| 46 | + overload; |
| 47 | + constructor Create(const AStr: string; const ADataType: TTextDataType); |
| 48 | + overload; |
| 49 | + constructor Create(const AStr: RawByteString); overload; |
| 50 | + // If ACount <= 0 then whole of remainder of stream is read |
| 51 | + constructor Create(const AStream: TStream; |
| 52 | + const ADataType: TTextDataType; const ACount: Int64 = 0); overload; |
| 53 | + /// <summary>Initialises new record instance to null ID.</summary> |
| 54 | + class operator Initialize(out Dest: TTextData); |
| 55 | + /// <summary>Assigns a copy of the value of record <c>Src</c> to |
| 56 | + /// <c>Dest</c>.</summary> |
| 57 | + class operator Assign(var Dest: TTextData; |
| 58 | + const [ref] Src: TTextData); |
| 59 | + |
| 60 | + function DataLength: NativeUInt; inline; |
| 61 | + function Encoding: TEncoding; inline; |
| 62 | + function ToString: string; inline; |
| 63 | + function ToANSIString: AnsiString; |
| 64 | + function ToASCIIString: ASCIIString; |
| 65 | + function ToUTF8String: UTF8String; |
| 66 | + |
| 67 | + class function SupportsString(const ADataType: TTextDataType; |
| 68 | + const AStr: string): Boolean; static; |
| 69 | + |
| 70 | + property Data: TBytes read fData; |
| 71 | + property DataType: TTextDataType read fDataType; |
| 72 | + |
| 73 | + /// <summary>Compares two text data records for equality.</summary> |
| 74 | + class operator Equal(const Left, Right: TTextData): Boolean; |
| 75 | + /// <summary>Compares two text data records for inequality.</summary> |
| 76 | + class operator NotEqual(const Left, Right: TTextData): Boolean; inline; |
| 77 | + |
| 78 | + end; |
| 79 | + |
| 80 | +implementation |
| 81 | + |
| 82 | +{ TTextData } |
| 83 | + |
| 84 | +class operator TTextData.Assign(var Dest: TTextData; |
| 85 | + const [ref] Src: TTextData); |
| 86 | +begin |
| 87 | + // Don't do: Dest := TTextData.Create(Src.fData, Src.fDataType); |
| 88 | + // It causes stack overflow, presumably because Dest := XXX causes recursion |
| 89 | + Dest.fData := CopyBytes(Src.fData); |
| 90 | + Dest.fDataType := Src.fDataType; |
| 91 | +end; |
| 92 | + |
| 93 | +class function TTextData.BytesToRawByteString(const ABytes: TBytes; |
| 94 | + const CP: UInt16): RawByteString; |
| 95 | +begin |
| 96 | + Assert(Assigned(ABytes)); |
| 97 | + |
| 98 | + var StrLen := System.Length(ABytes); |
| 99 | + SetLength(Result, StrLen); |
| 100 | + if StrLen > 0 then |
| 101 | + begin |
| 102 | + Move(ABytes[0], Result[1], StrLen); |
| 103 | + if Result[StrLen] = #0 then |
| 104 | + SetLength(Result, StrLen - 1); |
| 105 | + end; |
| 106 | + SetCodePage(Result, CP, False); |
| 107 | +end; |
| 108 | + |
| 109 | +class function TTextData.CopyBytes(const ABytes: TBytes): TBytes; |
| 110 | +begin |
| 111 | + if System.Length(ABytes) > 0 then |
| 112 | + Result := Copy(ABytes, 0, System.Length(ABytes)) |
| 113 | + else |
| 114 | + System.SetLength(Result, 0); |
| 115 | +end; |
| 116 | + |
| 117 | +class constructor TTextData.Create; |
| 118 | +begin |
| 119 | + fEncodingMap[TTextDataType.ASCII] := TEncoding.ASCII; |
| 120 | + fEncodingMap[TTextDataType.ANSI] := TEncoding.ANSI; |
| 121 | + fEncodingMap[TTextDataType.UTF8] := TEncoding.UTF8; |
| 122 | +end; |
| 123 | + |
| 124 | +constructor TTextData.Create(const AData: TBytes; |
| 125 | + const ADataType: TTextDataType); |
| 126 | +begin |
| 127 | + fData := CopyBytes(AData); |
| 128 | + fDataType := ADataType; |
| 129 | +end; |
| 130 | + |
| 131 | +constructor TTextData.Create(const AStr: string; |
| 132 | + const ADataType: TTextDataType); |
| 133 | +begin |
| 134 | + fDataType := ADataType; |
| 135 | + fData := CopyBytes(fEncodingMap[ADataType].GetBytes(AStr)); |
| 136 | +end; |
| 137 | + |
| 138 | +constructor TTextData.Create(const AStream: TStream; |
| 139 | + const ADataType: TTextDataType; const ACount: Int64); |
| 140 | +begin |
| 141 | + // assume reading all of stream from current position to end |
| 142 | + var BytesToRead := AStream.Size - AStream.Position; |
| 143 | + if (ACount > 0) and (ACount < BytesToRead) then |
| 144 | + // Adjust number of bytes to read down to ACount |
| 145 | + BytesToRead := ACount; |
| 146 | + SetLength(fData, BytesToRead); |
| 147 | + AStream.Read(fData, BytesToRead); |
| 148 | + fDataType := ADataType; |
| 149 | +end; |
| 150 | + |
| 151 | +constructor TTextData.Create(const AStr: RawByteString); |
| 152 | +begin |
| 153 | + if AStr <> '' then |
| 154 | + begin |
| 155 | + fData := RawByteStringToBytes(AStr); |
| 156 | + var CodePage := StringCodePage(AStr); |
| 157 | + if CodePage = TEncoding.ASCII.CodePage then |
| 158 | + fDataType := TTextDataType.ASCII |
| 159 | + else if CodePage = TEncoding.UTF8.CodePage then |
| 160 | + fDataType := TTextDataType.UTF8 |
| 161 | + else if CodePage = TEncoding.ANSI.CodePage then |
| 162 | + fDataType := TTextDataType.ANSI |
| 163 | + else |
| 164 | + raise Exception.CreateFmt('Unsupported code page for string "%s"', [AStr]); |
| 165 | + end |
| 166 | + else |
| 167 | + begin |
| 168 | + SetLength(fData, 0); |
| 169 | + fDataType := TTextDataType.UTF8; |
| 170 | + end; |
| 171 | +end; |
| 172 | + |
| 173 | +function TTextData.DataLength: NativeUInt; |
| 174 | +begin |
| 175 | + Result := System.Length(fData); |
| 176 | +end; |
| 177 | + |
| 178 | +function TTextData.Encoding: TEncoding; |
| 179 | +begin |
| 180 | + Result := fEncodingMap[fDataType]; |
| 181 | +end; |
| 182 | + |
| 183 | +class operator TTextData.Equal(const Left, Right: TTextData): Boolean; |
| 184 | +begin |
| 185 | + Result := False; |
| 186 | + if Left.fDataType <> Right.fDataType then |
| 187 | + Exit; |
| 188 | + if Left.DataLength <> Right.DataLength then |
| 189 | + Exit; |
| 190 | + for var I := Low(Left.fData) to High(Left.fData) do |
| 191 | + if Left.fData[I] <> Right.fData[I] then |
| 192 | + Exit; |
| 193 | + Result := True; |
| 194 | +end; |
| 195 | + |
| 196 | +class operator TTextData.Initialize(out Dest: TTextData); |
| 197 | +begin |
| 198 | + SetLength(Dest.fData, 0); |
| 199 | + Dest.fDataType := TTextDataType.UTF8; |
| 200 | +end; |
| 201 | + |
| 202 | +class operator TTextData.NotEqual(const Left, Right: TTextData): Boolean; |
| 203 | +begin |
| 204 | + Result := not (Left = Right); |
| 205 | +end; |
| 206 | + |
| 207 | +class function TTextData.RawByteStringToBytes( |
| 208 | + const AStr: RawByteString): TBytes; |
| 209 | +begin |
| 210 | + var BufLen := System.Length(AStr); |
| 211 | + SetLength(Result, BufLen); |
| 212 | + if BufLen > 0 then |
| 213 | + Move(AStr[1], Result[0], BufLen); |
| 214 | +end; |
| 215 | + |
| 216 | +class function TTextData.SupportsString(const ADataType: TTextDataType; |
| 217 | + const AStr: string): Boolean; |
| 218 | +begin |
| 219 | + var Bytes := fEncodingMap[ADataType].GetBytes(AStr); |
| 220 | + var TestStr := fEncodingMap[ADataType].GetString(Bytes); |
| 221 | + Result := AStr = TestStr; |
| 222 | +end; |
| 223 | + |
| 224 | +function TTextData.ToANSIString: AnsiString; |
| 225 | +begin |
| 226 | + Result := ToRawByteString(TTextDataType.ANSI); |
| 227 | + |
| 228 | + Assert(StringCodePage(Result) = fEncodingMap[TTextDataType.ANSI].CodePage); |
| 229 | +end; |
| 230 | + |
| 231 | +function TTextData.ToASCIIString: ASCIIString; |
| 232 | +begin |
| 233 | + Result := ToRawByteString(TTextDataType.ASCII); |
| 234 | + |
| 235 | + Assert(StringCodePage(Result) = fEncodingMap[TTextDataType.ASCII].CodePage); |
| 236 | +end; |
| 237 | + |
| 238 | +function TTextData.ToRawByteString(const AWantedType: TTextDataType): |
| 239 | + RawByteString; |
| 240 | +begin |
| 241 | + var Bytes: TBytes; |
| 242 | + if AWantedType = fDataType then |
| 243 | + Bytes := fData |
| 244 | + else |
| 245 | + Bytes := fEncodingMap[AWantedType].GetBytes(ToString); |
| 246 | + Result := BytesToRawByteString(Bytes, fEncodingMap[AWantedType].CodePage); |
| 247 | +end; |
| 248 | + |
| 249 | +function TTextData.ToString: string; |
| 250 | +begin |
| 251 | + Result := fEncodingMap[fDataType].GetString(fData); |
| 252 | +end; |
| 253 | + |
| 254 | +function TTextData.ToUTF8String: UTF8String; |
| 255 | +begin |
| 256 | + Result := ToRawByteString(TTextDataType.UTF8); |
| 257 | + |
| 258 | + Assert(StringCodePage(Result) = fEncodingMap[TTextDataType.UTF8].CodePage); |
| 259 | +end; |
| 260 | + |
| 261 | +end. |
| 262 | + |
0 commit comments