DEADSOFTWARE

sfs and wad code refactoring: part 1
[d2df-sdl.git] / src / shared / utils.pas
1 {$MODE DELPHI}
2 unit utils;
4 interface
6 // does filename have one of ".wad", ".pk3", ".zip" extensions?
7 function hasWadExtension (fn: AnsiString): Boolean;
9 // does filepath have ".XXX:\" in it?
10 function isWadPath (fn: AnsiString): Boolean;
12 // adds ".wad" extension if filename doesn't have one of ".wad", ".pk3", ".zip"
13 function addWadExtension (fn: AnsiString): AnsiString;
15 // convert number to strig with nice commas
16 function Int64ToStrComma (i: Int64): AnsiString;
18 function UpCase1251 (ch: Char): Char;
20 // `true` if strings are equal; ignoring case for cp1251
21 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
23 function utf8Valid (const s: AnsiString): Boolean;
25 function utf8to1251 (s: AnsiString): AnsiString;
27 // `pathname` will be modified if path is valid
28 // `lastIsDir` should be `true` if we are searching for directory
29 // nobody cares about shitdoze, so i'll use the same code path for it
30 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
33 implementation
35 uses
36 SysUtils;
39 function hasWadExtension (fn: AnsiString): Boolean;
40 begin
41 fn := ExtractFileExt(fn);
42 result := StrEquCI1251(fn, '.wad') or StrEquCI1251(fn, '.pk3') or StrEquCI1251(fn, '.zip');
43 end;
46 function addWadExtension (fn: AnsiString): AnsiString;
47 begin
48 result := fn;
49 if not hasWadExtension(result) then result := result+'.wad';
50 end;
53 function isWadPath (fn: AnsiString): Boolean;
54 var
55 p: Integer;
56 s: AnsiString;
57 begin
58 result := false;
59 while true do
60 begin
61 p := Pos(':', fn);
62 if (p = 0) or (length(fn)-p < 1) then break;
63 if (p-4 > 1) and (fn[p-4] = '.') and ((fn[p+1] = '\') or (fn[p+1] = '/')) then
64 begin
65 s := Copy(fn, p-4, 4);
66 if StrEquCI1251(s, '.wad') or StrEquCI1251(s, '.pk3') or StrEquCI1251(s, '.zip') then
67 begin
68 result := true;
69 exit;
70 end;
71 end;
72 Delete(fn, 1, p);
73 end;
74 end;
77 function Int64ToStrComma (i: Int64): AnsiString;
78 var
79 f: Integer;
80 begin
81 Str(i, result);
82 f := Length(result)+1;
83 while f > 4 do
84 begin
85 Dec(f, 3); Insert(',', result, f);
86 end;
87 end;
90 function UpCase1251 (ch: Char): Char;
91 begin
92 if ch < #128 then
93 begin
94 if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32);
95 end
96 else
97 begin
98 if (ch >= #224) and (ch <= #255) then
99 begin
100 Dec(ch, 32);
101 end
102 else
103 begin
104 case ch of
105 #184, #186, #191: Dec(ch, 16);
106 #162, #179: Dec(ch);
107 end;
108 end;
109 end;
110 result := ch;
111 end;
114 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
115 var
116 i: Integer;
117 begin
118 result := false;
119 if length(s0) <> length(s1) then exit;
120 for i := 1 to length(s0) do if UpCase1251(s0[i]) <> UpCase1251(s1[i]) then exit;
121 result := true;
122 end;
125 // ////////////////////////////////////////////////////////////////////////// //
126 // utils
127 // `ch`: utf8 start
128 // -1: invalid utf8
129 function utf8CodeLen (ch: Word): Integer;
130 begin
131 if ch < $80 then begin result := 1; exit; end;
132 if (ch and $FE) = $FC then begin result := 6; exit; end;
133 if (ch and $FC) = $F8 then begin result := 5; exit; end;
134 if (ch and $F8) = $F0 then begin result := 4; exit; end;
135 if (ch and $F0) = $E0 then begin result := 3; exit; end;
136 if (ch and $E0) = $C0 then begin result := 2; exit; end;
137 result := -1; // invalid
138 end;
141 function utf8Valid (const s: AnsiString): Boolean;
142 var
143 pos, len: Integer;
144 begin
145 result := false;
146 pos := 1;
147 while pos <= length(s) do
148 begin
149 len := utf8CodeLen(Byte(s[pos]));
150 if len < 1 then exit; // invalid sequence start
151 if pos+len-1 > length(s) then exit; // out of chars in string
152 Dec(len);
153 Inc(pos);
154 // check other sequence bytes
155 while len > 0 do
156 begin
157 if (Byte(s[pos]) and $C0) <> $80 then exit;
158 Dec(len);
159 Inc(pos);
160 end;
161 end;
162 result := true;
163 end;
166 // ////////////////////////////////////////////////////////////////////////// //
167 const
168 uni2wint: array [128..255] of Word = (
169 $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
170 $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
171 $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
172 $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
173 $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
174 $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
175 $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
176 $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
177 );
180 function decodeUtf8Char (s: AnsiString; var pos: Integer): char;
181 var
182 b, c: Integer;
183 begin
184 (* The following encodings are valid, except for the 5 and 6 byte
185 * combinations:
186 * 0xxxxxxx
187 * 110xxxxx 10xxxxxx
188 * 1110xxxx 10xxxxxx 10xxxxxx
189 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
190 * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
191 * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
192 *)
193 result := '?';
194 if pos > length(s) then exit;
196 b := Byte(s[pos]);
197 Inc(pos);
198 if b < $80 then begin result := char(b); exit; end;
200 // mask out unused bits
201 if (b and $FE) = $FC then b := b and $01
202 else if (b and $FC) = $F8 then b := b and $03
203 else if (b and $F8) = $F0 then b := b and $07
204 else if (b and $F0) = $E0 then b := b and $0F
205 else if (b and $E0) = $C0 then b := b and $1F
206 else exit; // invalid utf8
208 // now continue
209 while pos <= length(s) do
210 begin
211 c := Byte(s[pos]);
212 if (c and $C0) <> $80 then break; // no more
213 b := b shl 6;
214 b := b or (c and $3F);
215 Inc(pos);
216 end;
218 // done, try 1251
219 for c := 128 to 255 do if uni2wint[c] = b then begin result := char(c and $FF); exit; end;
220 // alas
221 end;
224 function utf8to1251 (s: AnsiString): AnsiString;
225 var
226 pos: Integer;
227 begin
228 if not utf8Valid(s) then begin result := s; exit; end;
229 pos := 1;
230 while pos <= length(s) do
231 begin
232 if Byte(s[pos]) >= $80 then break;
233 Inc(pos);
234 end;
235 if pos > length(s) then begin result := s; exit; end; // nothing to do here
236 result := '';
237 pos := 1;
238 while pos <= length(s) do result := result+decodeUtf8Char(s, pos);
239 end;
242 // ////////////////////////////////////////////////////////////////////////// //
243 // `pathname` will be modified if path is valid
244 // `lastIsDir` should be `true` if we are searching for directory
245 // nobody cares about shitdoze, so i'll use the same code path for it
246 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
247 var
248 sr: TSearchRec;
249 npt: AnsiString;
250 newname: AnsiString = '';
251 curname: AnsiString;
252 wantdir: Boolean;
253 attr: LongInt;
254 foundher: Boolean;
255 begin
256 npt := pathname;
257 result := (length(npt) > 0);
258 if (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) then newname := '/';
259 while length(npt) > 0 do
260 begin
261 // remove trailing slashes
262 while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
263 if length(npt) = 0 then break;
264 // extract name
265 curname := '';
266 while (length(npt) > 0) and (npt[1] <> '/') and (npt[1] <> '\') do
267 begin
268 curname := curname+npt[1];
269 Delete(npt, 1, 1);
270 end;
271 // remove trailing slashes again
272 while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
273 wantdir := lastIsDir or (length(npt) > 0); // do we want directory here?
274 //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
275 // try the easiest case first
276 attr := FileGetAttr(newname+curname);
277 if attr <> -1 then
278 begin
279 if wantdir = ((attr and faDirectory) <> 0) then
280 begin
281 // i found her!
282 newname := newname+curname;
283 if wantdir then newname := newname+'/';
284 continue;
285 end;
286 end;
287 //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
288 // alas, either not found, or invalid attributes
289 foundher := false;
290 try
291 if FindFirst(newname+'*', faAnyFile, sr) = 0 then
292 repeat
293 if (wantdir = ((sr.attr and faDirectory) <> 0)) and StrEquCI1251(sr.name, curname) then
294 begin
295 // i found her!
296 newname := newname+sr.name;
297 if wantdir then newname := newname+'/';
298 foundher := true;
299 break;
300 end;
301 until FindNext(sr) <> 0;
302 finally
303 FindClose(sr);
304 end;
305 if not foundher then begin newname := ''; result := false; break; end;
306 end;
307 if result then pathname := newname;
308 end;
311 end.