DEADSOFTWARE

changed license to GPLv3 only; sorry, no trust to FSF anymore
[d2df-editor.git] / src / shared / utils.pas
1 (* Copyright (C) DooM 2D:Forever Developers
2 *
3 * This program is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, version 3 of the License ONLY.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14 *)
15 {$INCLUDE a_modes.inc}
16 unit utils;
18 interface
20 uses
21 SysUtils, Classes;
24 // ////////////////////////////////////////////////////////////////////////// //
25 type
26 TUtf8DecoderFast = packed record
27 public
28 const Replacement = $FFFD; // replacement char for invalid unicode
29 const Accept = 0;
30 const Reject = 12;
32 private
33 state: LongWord;
35 public
36 codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state)
38 public
39 constructor Create (v: Boolean{fuck you, fpc});
41 procedure reset (); inline;
43 function complete (): Boolean; inline; // is current character complete? take `codepoint` then
44 function invalid (): Boolean; inline;
45 function completeOrInvalid (): Boolean; inline;
47 // process one byte, return `true` if codepoint is ready
48 function decode (b: Byte): Boolean; inline; overload;
49 function decode (c: AnsiChar): Boolean; inline; overload;
50 end;
53 // ////////////////////////////////////////////////////////////////////////// //
54 // does filename have one of ".wad", ".pk3", ".zip" extensions?
55 function hasWadExtension (fn: AnsiString): Boolean;
57 // does filepath have ".XXX:\" in it?
58 function isWadPath (fn: AnsiString): Boolean;
60 // adds ".wad" extension if filename doesn't have one of ".wad", ".pk3", ".zip"
61 function addWadExtension (fn: AnsiString): AnsiString;
63 // convert number to strig with nice commas
64 function Int64ToStrComma (i: Int64): AnsiString;
66 function UpCase1251 (ch: AnsiChar): AnsiChar; inline;
67 function LoCase1251 (ch: AnsiChar): AnsiChar; inline;
69 function toLowerCase1251 (const s: AnsiString): AnsiString;
71 // `true` if strings are equal; ignoring case for cp1251
72 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
74 function utf8Valid (const s: AnsiString): Boolean;
76 function utf8to1251 (s: AnsiString): AnsiString;
78 // `pathname` will be modified if path is valid
79 // `lastIsDir` should be `true` if we are searching for directory
80 // nobody cares about shitdoze, so i'll use the same code path for it
81 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
82 function findFileCIStr (pathname: AnsiString): AnsiString;
84 // they throws
85 function openDiskFileRO (pathname: AnsiString): TStream;
86 function createDiskFile (pathname: AnsiString): TStream;
88 // little endian
89 procedure writeInt (st: TStream; v: Byte); overload;
90 procedure writeInt (st: TStream; v: ShortInt); overload;
91 procedure writeInt (st: TStream; v: Word); overload;
92 procedure writeInt (st: TStream; v: SmallInt); overload;
93 procedure writeInt (st: TStream; v: LongWord); overload;
94 procedure writeInt (st: TStream; v: LongInt); overload;
95 procedure writeInt (st: TStream; v: Int64); overload;
96 procedure writeInt (st: TStream; v: UInt64); overload;
98 function readByte (st: TStream): Byte;
99 function readShortInt (st: TStream): ShortInt;
100 function readWord (st: TStream): Word;
101 function readSmallInt (st: TStream): SmallInt;
102 function readLongWord (st: TStream): LongWord;
103 function readLongInt (st: TStream): LongInt;
104 function readInt64 (st: TStream): Int64;
105 function readUInt64 (st: TStream): UInt64;
107 // big endian
108 procedure writeIntBE (st: TStream; v: Byte); overload;
109 procedure writeIntBE (st: TStream; v: ShortInt); overload;
110 procedure writeIntBE (st: TStream; v: Word); overload;
111 procedure writeIntBE (st: TStream; v: SmallInt); overload;
112 procedure writeIntBE (st: TStream; v: LongWord); overload;
113 procedure writeIntBE (st: TStream; v: LongInt); overload;
114 procedure writeIntBE (st: TStream; v: Int64); overload;
115 procedure writeIntBE (st: TStream; v: UInt64); overload;
117 function readByteBE (st: TStream): Byte;
118 function readShortIntBE (st: TStream): ShortInt;
119 function readWordBE (st: TStream): Word;
120 function readSmallIntBE (st: TStream): SmallInt;
121 function readLongWordBE (st: TStream): LongWord;
122 function readLongIntBE (st: TStream): LongInt;
123 function readInt64BE (st: TStream): Int64;
124 function readUInt64BE (st: TStream): UInt64;
127 type
128 TFormatStrFCallback = procedure (constref buf; len: SizeUInt);
130 function wchar2win (wc: WideChar): AnsiChar; inline;
131 function utf2win (const s: AnsiString): AnsiString;
132 function win2utf (const s: AnsiString): AnsiString;
133 function digitInBase (ch: AnsiChar; base: Integer): Integer;
135 // returns string in single or double quotes
136 // single quotes supports only pascal-style '' for single quote char
137 // double quotes supports c-style escapes
138 // function will select quote mode automatically
139 function quoteStr (const s: AnsiString): AnsiString;
142 // ////////////////////////////////////////////////////////////////////////// //
143 var
144 wc2shitmap: array[0..65535] of AnsiChar;
145 wc2shitmapInited: Boolean = false;
148 // ////////////////////////////////////////////////////////////////////////// //
149 const
150 cp1251: array[0..127] of Word = (
151 $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
152 $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
153 $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
154 $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
155 $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
156 $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
157 $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
158 $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
159 );
161 implementation
163 procedure initShitMap ();
164 var
165 f: Integer;
166 begin
167 for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?';
168 for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f);
169 for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128);
170 wc2shitmapInited := true;
171 end;
174 // ////////////////////////////////////////////////////////////////////////// //
175 // fast state-machine based UTF-8 decoder; using 8 bytes of memory
176 // code points from invalid range will never be valid, this is the property of the state machine
177 const
178 // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
179 utf8dfa: array[0..$16c-1] of Byte = (
180 // maps bytes to character classes
181 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f
182 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f
183 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f
184 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f
185 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f
186 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f
187 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f
188 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f
189 $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f
190 $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f
191 $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af
192 $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf
193 $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf
194 $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df
195 $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef
196 $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff
197 // maps a combination of a state of the automaton and a character class to a state
198 $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f
199 $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f
200 $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f
201 $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f
202 $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f
203 $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f
204 $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c);
207 // ////////////////////////////////////////////////////////////////////////// //
208 constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end;
210 procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end;
212 function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end;
213 function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end;
214 function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end;
216 function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end;
218 function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload;
219 var
220 tp: LongWord;
221 begin
222 if (state = Reject) then begin state := Accept; codepoint := 0; end;
223 tp := utf8dfa[b];
224 if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b;
225 state := utf8dfa[256+state+tp];
226 if (state = Reject) then begin codepoint := Replacement; state := Accept; end;
227 result := (state = Accept);
228 end;
231 // ////////////////////////////////////////////////////////////////////////// //
232 function wchar2win (wc: WideChar): AnsiChar; inline;
233 begin
234 if not wc2shitmapInited then initShitMap();
235 if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)];
236 end;
239 // ////////////////////////////////////////////////////////////////////////// //
240 function utf2win (const s: AnsiString): AnsiString;
241 var
242 f, c: Integer;
243 ud: TUtf8DecoderFast;
244 begin
245 for f := 1 to Length(s) do
246 begin
247 if (Byte(s[f]) > 127) then
248 begin
249 ud := TUtf8DecoderFast.Create(true);
250 result := '';
251 for c := 1 to Length(s) do
252 begin
253 if ud.decode(s[c]) then result += wchar2win(WideChar(ud.codepoint));
254 end;
255 exit;
256 end;
257 end;
258 result := s;
259 end;
262 function win2utf (const s: AnsiString): AnsiString;
263 var
264 f, c: Integer;
266 function utf8Encode (code: Integer): AnsiString;
267 begin
268 if (code < 0) or (code > $10FFFF) then begin result := '?'; exit; end;
269 if (code <= $7f) then
270 begin
271 result := Char(code and $ff);
272 end
273 else if (code <= $7FF) then
274 begin
275 result := Char($C0 or (code shr 6));
276 result += Char($80 or (code and $3F));
277 end
278 else if (code <= $FFFF) then
279 begin
280 result := Char($E0 or (code shr 12));
281 result += Char($80 or ((code shr 6) and $3F));
282 result += Char($80 or (code and $3F));
283 end
284 else if (code <= $10FFFF) then
285 begin
286 result := Char($F0 or (code shr 18));
287 result += Char($80 or ((code shr 12) and $3F));
288 result += Char($80 or ((code shr 6) and $3F));
289 result += Char($80 or (code and $3F));
290 end
291 else
292 begin
293 result := '?';
294 end;
295 end;
297 begin
298 for f := 1 to Length(s) do
299 begin
300 if (Byte(s[f]) > 127) then
301 begin
302 result := '';
303 for c := 1 to Length(s) do
304 begin
305 if (Byte(s[c]) < 128) then
306 begin
307 result += s[c];
308 end
309 else
310 begin
311 result += utf8Encode(cp1251[Byte(s[c])-128])
312 end;
313 end;
314 exit;
315 end;
316 end;
317 result := s;
318 end;
321 // ////////////////////////////////////////////////////////////////////////// //
322 function digitInBase (ch: AnsiChar; base: Integer): Integer;
323 begin
324 result := -1;
325 if (base < 1) or (base > 36) then exit;
326 if (ch < '0') then exit;
327 if (base <= 10) then
328 begin
329 if (Integer(ch) >= 48+base) then exit;
330 result := Integer(ch)-48;
331 end
332 else
333 begin
334 if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end;
335 if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower()
336 if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit;
337 result := Integer(ch)-65+10;
338 end;
339 end;
342 // ////////////////////////////////////////////////////////////////////////// //
343 function quoteStr (const s: AnsiString): AnsiString;
345 function squote (const s: AnsiString): AnsiString;
346 var
347 f: Integer;
348 begin
349 result := '''';
350 for f := 1 to Length(s) do
351 begin
352 if (s[f] = '''') then result += '''';
353 result += s[f];
354 end;
355 result += '''';
356 end;
358 function dquote (const s: AnsiString): AnsiString;
359 var
360 f: Integer;
361 ch: AnsiChar;
362 begin
363 result := '"';
364 for f := 1 to Length(s) do
365 begin
366 ch := s[f];
367 if (ch = #0) then result += '\z'
368 else if (ch = #9) then result += '\t'
369 else if (ch = #10) then result += '\n'
370 else if (ch = #13) then result += '\r'
371 else if (ch = #27) then result += '\e'
372 else if (ch < ' ') or (ch = #127) then
373 begin
374 result += '\x';
375 result += LowerCase(IntToHex(Integer(ch), 2));
376 end
377 else if (ch = '"') or (ch = '\') then
378 begin
379 result += '\';
380 result += ch;
381 end
382 else
383 begin
384 result += ch;
385 end;
386 end;
387 result += '"';
388 end;
390 var
391 needSingle: Boolean = false;
392 f: Integer;
393 begin
394 for f := 1 to Length(s) do
395 begin
396 if (s[f] = '''') then begin needSingle := true; continue; end;
397 if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end;
398 end;
399 if needSingle then result := squote(s) else result := ''''+s+'''';
400 end;
403 // ////////////////////////////////////////////////////////////////////////// //
404 function hasWadExtension (fn: AnsiString): Boolean;
405 begin
406 fn := ExtractFileExt(fn);
407 result := StrEquCI1251(fn, '.wad') or StrEquCI1251(fn, '.pk3') or StrEquCI1251(fn, '.zip');
408 end;
411 function addWadExtension (fn: AnsiString): AnsiString;
412 begin
413 result := fn;
414 if not hasWadExtension(result) then result := result+'.wad';
415 end;
418 function isWadPath (fn: AnsiString): Boolean;
419 var
420 p: Integer;
421 s: AnsiString;
422 begin
423 result := false;
424 while true do
425 begin
426 p := Pos(':', fn);
427 if (p = 0) or (length(fn)-p < 1) then break;
428 if (p-4 > 1) and (fn[p-4] = '.') and ((fn[p+1] = '\') or (fn[p+1] = '/')) then
429 begin
430 s := Copy(fn, p-4, 4);
431 if StrEquCI1251(s, '.wad') or StrEquCI1251(s, '.pk3') or StrEquCI1251(s, '.zip') then
432 begin
433 result := true;
434 exit;
435 end;
436 end;
437 Delete(fn, 1, p);
438 end;
439 end;
442 function Int64ToStrComma (i: Int64): AnsiString;
443 var
444 f: Integer;
445 begin
446 Str(i, result);
447 f := Length(result)+1;
448 while f > 4 do
449 begin
450 Dec(f, 3); Insert(',', result, f);
451 end;
452 end;
455 function UpCase1251 (ch: AnsiChar): AnsiChar; inline;
456 begin
457 if ch < #128 then
458 begin
459 if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32);
460 end
461 else
462 begin
463 if (ch >= #224) and (ch <= #255) then
464 begin
465 Dec(ch, 32);
466 end
467 else
468 begin
469 case ch of
470 #184, #186, #191: Dec(ch, 16);
471 #162, #179: Dec(ch);
472 end;
473 end;
474 end;
475 result := ch;
476 end;
479 function LoCase1251 (ch: AnsiChar): AnsiChar; inline;
480 begin
481 if ch < #128 then
482 begin
483 if (ch >= 'A') and (ch <= 'Z') then Inc(ch, 32);
484 end
485 else
486 begin
487 if (ch >= #192) and (ch <= #223) then
488 begin
489 Inc(ch, 32);
490 end
491 else
492 begin
493 case ch of
494 #168, #170, #175: Inc(ch, 16);
495 #161, #178: Inc(ch);
496 end;
497 end;
498 end;
499 result := ch;
500 end;
503 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
504 var
505 i: Integer;
506 begin
507 result := false;
508 if length(s0) <> length(s1) then exit;
509 for i := 1 to length(s0) do if UpCase1251(s0[i]) <> UpCase1251(s1[i]) then exit;
510 result := true;
511 end;
514 function toLowerCase1251 (const s: AnsiString): AnsiString;
515 var
516 f: Integer;
517 ch: AnsiChar;
518 begin
519 for ch in s do
520 begin
521 if (ch <> LoCase1251(ch)) then
522 begin
523 result := '';
524 SetLength(result, Length(s));
525 for f := 1 to Length(s) do result[f] := LoCase1251(s[f]);
526 exit;
527 end;
528 end;
529 // nothing to do
530 result := s;
531 end;
534 // ////////////////////////////////////////////////////////////////////////// //
535 // utils
536 // `ch`: utf8 start
537 // -1: invalid utf8
538 function utf8CodeLen (ch: Word): Integer;
539 begin
540 if ch < $80 then begin result := 1; exit; end;
541 if (ch and $FE) = $FC then begin result := 6; exit; end;
542 if (ch and $FC) = $F8 then begin result := 5; exit; end;
543 if (ch and $F8) = $F0 then begin result := 4; exit; end;
544 if (ch and $F0) = $E0 then begin result := 3; exit; end;
545 if (ch and $E0) = $C0 then begin result := 2; exit; end;
546 result := -1; // invalid
547 end;
550 function utf8Valid (const s: AnsiString): Boolean;
551 var
552 pos, len: Integer;
553 begin
554 result := false;
555 pos := 1;
556 while pos <= length(s) do
557 begin
558 len := utf8CodeLen(Byte(s[pos]));
559 if len < 1 then exit; // invalid sequence start
560 if pos+len-1 > length(s) then exit; // out of chars in string
561 Dec(len);
562 Inc(pos);
563 // check other sequence bytes
564 while len > 0 do
565 begin
566 if (Byte(s[pos]) and $C0) <> $80 then exit;
567 Dec(len);
568 Inc(pos);
569 end;
570 end;
571 result := true;
572 end;
575 // ////////////////////////////////////////////////////////////////////////// //
576 const
577 uni2wint: array [128..255] of Word = (
578 $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
579 $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
580 $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
581 $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
582 $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
583 $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
584 $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
585 $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
586 );
589 function decodeUtf8Char (s: AnsiString; var pos: Integer): char;
590 var
591 b, c: Integer;
592 begin
593 (* The following encodings are valid, except for the 5 and 6 byte
594 * combinations:
595 * 0xxxxxxx
596 * 110xxxxx 10xxxxxx
597 * 1110xxxx 10xxxxxx 10xxxxxx
598 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
599 * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
600 * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
601 *)
602 result := '?';
603 if pos > length(s) then exit;
605 b := Byte(s[pos]);
606 Inc(pos);
607 if b < $80 then begin result := char(b); exit; end;
609 // mask out unused bits
610 if (b and $FE) = $FC then b := b and $01
611 else if (b and $FC) = $F8 then b := b and $03
612 else if (b and $F8) = $F0 then b := b and $07
613 else if (b and $F0) = $E0 then b := b and $0F
614 else if (b and $E0) = $C0 then b := b and $1F
615 else exit; // invalid utf8
617 // now continue
618 while pos <= length(s) do
619 begin
620 c := Byte(s[pos]);
621 if (c and $C0) <> $80 then break; // no more
622 b := b shl 6;
623 b := b or (c and $3F);
624 Inc(pos);
625 end;
627 // done, try 1251
628 for c := 128 to 255 do if uni2wint[c] = b then begin result := char(c and $FF); exit; end;
629 // alas
630 end;
633 function utf8to1251 (s: AnsiString): AnsiString;
634 var
635 pos: Integer;
636 begin
637 if not utf8Valid(s) then begin result := s; exit; end;
638 pos := 1;
639 while pos <= length(s) do
640 begin
641 if Byte(s[pos]) >= $80 then break;
642 Inc(pos);
643 end;
644 if pos > length(s) then begin result := s; exit; end; // nothing to do here
645 result := '';
646 pos := 1;
647 while pos <= length(s) do result := result+decodeUtf8Char(s, pos);
648 end;
651 // ////////////////////////////////////////////////////////////////////////// //
652 // `pathname` will be modified if path is valid
653 // `lastIsDir` should be `true` if we are searching for directory
654 // nobody cares about shitdoze, so i'll use the same code path for it
655 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
656 var
657 sr: TSearchRec;
658 npt: AnsiString;
659 newname: AnsiString = '';
660 curname: AnsiString;
661 wantdir: Boolean;
662 attr: LongInt;
663 foundher: Boolean;
664 begin
665 npt := pathname;
666 result := (length(npt) > 0);
667 if (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) then newname := '/';
668 while length(npt) > 0 do
669 begin
670 // remove trailing slashes
671 while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
672 if length(npt) = 0 then break;
673 // extract name
674 curname := '';
675 while (length(npt) > 0) and (npt[1] <> '/') and (npt[1] <> '\') do
676 begin
677 curname := curname+npt[1];
678 Delete(npt, 1, 1);
679 end;
680 // remove trailing slashes again
681 while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
682 wantdir := lastIsDir or (length(npt) > 0); // do we want directory here?
683 //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
684 // try the easiest case first
685 attr := FileGetAttr(newname+curname);
686 if attr <> -1 then
687 begin
688 if wantdir = ((attr and faDirectory) <> 0) then
689 begin
690 // i found her!
691 newname := newname+curname;
692 if wantdir then newname := newname+'/';
693 continue;
694 end;
695 end;
696 //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
697 // alas, either not found, or invalid attributes
698 foundher := false;
699 try
700 if FindFirst(newname+'*', faAnyFile, sr) = 0 then
701 repeat
702 if (wantdir = ((sr.attr and faDirectory) <> 0)) and StrEquCI1251(sr.name, curname) then
703 begin
704 // i found her!
705 newname := newname+sr.name;
706 if wantdir then newname := newname+'/';
707 foundher := true;
708 break;
709 end;
710 until FindNext(sr) <> 0;
711 finally
712 FindClose(sr);
713 end;
714 if not foundher then begin newname := ''; result := false; break; end;
715 end;
716 if result then pathname := newname;
717 end;
719 function findFileCIStr (pathname: AnsiString): AnsiString;
720 begin
721 result := pathname;
722 findFileCI(result);
723 end;
725 function openDiskFileRO (pathname: AnsiString): TStream;
726 begin
727 if not findFileCI(pathname) then raise Exception.Create('can''t open file "'+pathname+'"');
728 result := TFileStream.Create(pathname, fmOpenRead or {fmShareDenyWrite}fmShareDenyNone);
729 end;
731 function createDiskFile (pathname: AnsiString): TStream;
732 var
733 path: AnsiString;
734 begin
735 path := ExtractFilePath(pathname);
736 if length(path) > 0 then
737 begin
738 if not findFileCI(path, true) then raise Exception.Create('can''t create file "'+pathname+'"');
739 end;
740 result := TFileStream.Create(path+ExtractFileName(pathname), fmCreate);
741 end;
744 procedure writeIntegerLE (st: TStream; vp: Pointer; size: Integer);
745 {$IFDEF ENDIAN_LITTLE}
746 begin
747 st.writeBuffer(vp^, size);
748 end;
749 {$ELSE}
750 var
751 p: PByte;
752 begin
753 p := PByte(vp)+size-1;
754 while size > 0 do
755 begin
756 st.writeBuffer(p^, 1);
757 Dec(size);
758 Dec(p);
759 end;
760 end;
761 {$ENDIF}
763 procedure writeIntegerBE (st: TStream; vp: Pointer; size: Integer);
764 {$IFDEF ENDIAN_LITTLE}
765 var
766 p: PByte;
767 begin
768 p := PByte(vp)+size-1;
769 while size > 0 do
770 begin
771 st.writeBuffer(p^, 1);
772 Dec(size);
773 Dec(p);
774 end;
775 end;
776 {$ELSE}
777 begin
778 st.writeBuffer(vp^, size);
779 end;
780 {$ENDIF}
782 procedure writeInt (st: TStream; v: Byte); overload; begin writeIntegerLE(st, @v, 1); end;
783 procedure writeInt (st: TStream; v: ShortInt); overload; begin writeIntegerLE(st, @v, 1); end;
784 procedure writeInt (st: TStream; v: Word); overload; begin writeIntegerLE(st, @v, 2); end;
785 procedure writeInt (st: TStream; v: SmallInt); overload; begin writeIntegerLE(st, @v, 2); end;
786 procedure writeInt (st: TStream; v: LongWord); overload; begin writeIntegerLE(st, @v, 4); end;
787 procedure writeInt (st: TStream; v: LongInt); overload; begin writeIntegerLE(st, @v, 4); end;
788 procedure writeInt (st: TStream; v: Int64); overload; begin writeIntegerLE(st, @v, 8); end;
789 procedure writeInt (st: TStream; v: UInt64); overload; begin writeIntegerLE(st, @v, 8); end;
791 procedure writeIntBE (st: TStream; v: Byte); overload; begin writeIntegerBE(st, @v, 1); end;
792 procedure writeIntBE (st: TStream; v: ShortInt); overload; begin writeIntegerBE(st, @v, 1); end;
793 procedure writeIntBE (st: TStream; v: Word); overload; begin writeIntegerBE(st, @v, 2); end;
794 procedure writeIntBE (st: TStream; v: SmallInt); overload; begin writeIntegerBE(st, @v, 2); end;
795 procedure writeIntBE (st: TStream; v: LongWord); overload; begin writeIntegerBE(st, @v, 4); end;
796 procedure writeIntBE (st: TStream; v: LongInt); overload; begin writeIntegerBE(st, @v, 4); end;
797 procedure writeIntBE (st: TStream; v: Int64); overload; begin writeIntegerBE(st, @v, 8); end;
798 procedure writeIntBE (st: TStream; v: UInt64); overload; begin writeIntegerBE(st, @v, 8); end;
801 procedure readIntegerLE (st: TStream; vp: Pointer; size: Integer);
802 {$IFDEF ENDIAN_LITTLE}
803 begin
804 st.readBuffer(vp^, size);
805 end;
806 {$ELSE}
807 var
808 p: PByte;
809 begin
810 p := PByte(vp)+size-1;
811 while size > 0 do
812 begin
813 st.readBuffer(p^, 1);
814 Dec(size);
815 Dec(p);
816 end;
817 end;
818 {$ENDIF}
820 procedure readIntegerBE (st: TStream; vp: Pointer; size: Integer);
821 {$IFDEF ENDIAN_LITTLE}
822 var
823 p: PByte;
824 begin
825 p := PByte(vp)+size-1;
826 while size > 0 do
827 begin
828 st.readBuffer(p^, 1);
829 Dec(size);
830 Dec(p);
831 end;
832 end;
833 {$ELSE}
834 begin
835 st.readBuffer(vp^, size);
836 end;
837 {$ENDIF}
839 function readByte (st: TStream): Byte; begin readIntegerLE(st, @result, 1); end;
840 function readShortInt (st: TStream): ShortInt; begin readIntegerLE(st, @result, 1); end;
841 function readWord (st: TStream): Word; begin readIntegerLE(st, @result, 2); end;
842 function readSmallInt (st: TStream): SmallInt; begin readIntegerLE(st, @result, 2); end;
843 function readLongWord (st: TStream): LongWord; begin readIntegerLE(st, @result, 4); end;
844 function readLongInt (st: TStream): LongInt; begin readIntegerLE(st, @result, 4); end;
845 function readInt64 (st: TStream): Int64; begin readIntegerLE(st, @result, 8); end;
846 function readUInt64 (st: TStream): UInt64; begin readIntegerLE(st, @result, 8); end;
848 function readByteBE (st: TStream): Byte; begin readIntegerBE(st, @result, 1); end;
849 function readShortIntBE (st: TStream): ShortInt; begin readIntegerBE(st, @result, 1); end;
850 function readWordBE (st: TStream): Word; begin readIntegerBE(st, @result, 2); end;
851 function readSmallIntBE (st: TStream): SmallInt; begin readIntegerBE(st, @result, 2); end;
852 function readLongWordBE (st: TStream): LongWord; begin readIntegerBE(st, @result, 4); end;
853 function readLongIntBE (st: TStream): LongInt; begin readIntegerBE(st, @result, 4); end;
854 function readInt64BE (st: TStream): Int64; begin readIntegerBE(st, @result, 8); end;
855 function readUInt64BE (st: TStream): UInt64; begin readIntegerBE(st, @result, 8); end;
857 end.