DEADSOFTWARE

Shared: Import toLowerCase1251 function and use it
[d2df-editor.git] / src / shared / utils.pas
1 (* Copyright (C) DooM 2D:Forever Developers
2 *
3 * This program is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *)
16 {$INCLUDE a_modes.inc}
17 unit utils;
19 interface
21 uses
22 SysUtils, Classes;
25 // ////////////////////////////////////////////////////////////////////////// //
26 type
27 TUtf8DecoderFast = packed record
28 public
29 const Replacement = $FFFD; // replacement char for invalid unicode
30 const Accept = 0;
31 const Reject = 12;
33 private
34 state: LongWord;
36 public
37 codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state)
39 public
40 constructor Create (v: Boolean{fuck you, fpc});
42 procedure reset (); inline;
44 function complete (): Boolean; inline; // is current character complete? take `codepoint` then
45 function invalid (): Boolean; inline;
46 function completeOrInvalid (): Boolean; inline;
48 // process one byte, return `true` if codepoint is ready
49 function decode (b: Byte): Boolean; inline; overload;
50 function decode (c: AnsiChar): Boolean; inline; overload;
51 end;
54 // ////////////////////////////////////////////////////////////////////////// //
55 // does filename have one of ".wad", ".pk3", ".zip" extensions?
56 function hasWadExtension (fn: AnsiString): Boolean;
58 // does filepath have ".XXX:\" in it?
59 function isWadPath (fn: AnsiString): Boolean;
61 // adds ".wad" extension if filename doesn't have one of ".wad", ".pk3", ".zip"
62 function addWadExtension (fn: AnsiString): AnsiString;
64 // convert number to strig with nice commas
65 function Int64ToStrComma (i: Int64): AnsiString;
67 function UpCase1251 (ch: AnsiChar): AnsiChar; inline;
68 function LoCase1251 (ch: AnsiChar): AnsiChar; inline;
70 function toLowerCase1251 (const s: AnsiString): AnsiString;
72 // `true` if strings are equal; ignoring case for cp1251
73 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
75 function utf8Valid (const s: AnsiString): Boolean;
77 function utf8to1251 (s: AnsiString): AnsiString;
79 // `pathname` will be modified if path is valid
80 // `lastIsDir` should be `true` if we are searching for directory
81 // nobody cares about shitdoze, so i'll use the same code path for it
82 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
83 function findFileCIStr (pathname: AnsiString): AnsiString;
85 // they throws
86 function openDiskFileRO (pathname: AnsiString): TStream;
87 function createDiskFile (pathname: AnsiString): TStream;
89 // little endian
90 procedure writeInt (st: TStream; v: Byte); overload;
91 procedure writeInt (st: TStream; v: ShortInt); overload;
92 procedure writeInt (st: TStream; v: Word); overload;
93 procedure writeInt (st: TStream; v: SmallInt); overload;
94 procedure writeInt (st: TStream; v: LongWord); overload;
95 procedure writeInt (st: TStream; v: LongInt); overload;
96 procedure writeInt (st: TStream; v: Int64); overload;
97 procedure writeInt (st: TStream; v: UInt64); overload;
99 function readByte (st: TStream): Byte;
100 function readShortInt (st: TStream): ShortInt;
101 function readWord (st: TStream): Word;
102 function readSmallInt (st: TStream): SmallInt;
103 function readLongWord (st: TStream): LongWord;
104 function readLongInt (st: TStream): LongInt;
105 function readInt64 (st: TStream): Int64;
106 function readUInt64 (st: TStream): UInt64;
108 // big endian
109 procedure writeIntBE (st: TStream; v: Byte); overload;
110 procedure writeIntBE (st: TStream; v: ShortInt); overload;
111 procedure writeIntBE (st: TStream; v: Word); overload;
112 procedure writeIntBE (st: TStream; v: SmallInt); overload;
113 procedure writeIntBE (st: TStream; v: LongWord); overload;
114 procedure writeIntBE (st: TStream; v: LongInt); overload;
115 procedure writeIntBE (st: TStream; v: Int64); overload;
116 procedure writeIntBE (st: TStream; v: UInt64); overload;
118 function readByteBE (st: TStream): Byte;
119 function readShortIntBE (st: TStream): ShortInt;
120 function readWordBE (st: TStream): Word;
121 function readSmallIntBE (st: TStream): SmallInt;
122 function readLongWordBE (st: TStream): LongWord;
123 function readLongIntBE (st: TStream): LongInt;
124 function readInt64BE (st: TStream): Int64;
125 function readUInt64BE (st: TStream): UInt64;
128 type
129 TFormatStrFCallback = procedure (constref buf; len: SizeUInt);
131 function wchar2win (wc: WideChar): AnsiChar; inline;
132 function utf2win (const s: AnsiString): AnsiString;
133 function win2utf (const s: AnsiString): AnsiString;
134 function digitInBase (ch: AnsiChar; base: Integer): Integer;
136 // returns string in single or double quotes
137 // single quotes supports only pascal-style '' for single quote char
138 // double quotes supports c-style escapes
139 // function will select quote mode automatically
140 function quoteStr (const s: AnsiString): AnsiString;
143 // ////////////////////////////////////////////////////////////////////////// //
144 var
145 wc2shitmap: array[0..65535] of AnsiChar;
146 wc2shitmapInited: Boolean = false;
149 // ////////////////////////////////////////////////////////////////////////// //
150 const
151 cp1251: array[0..127] of Word = (
152 $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
153 $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
154 $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
155 $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
156 $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
157 $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
158 $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
159 $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
160 );
162 implementation
164 procedure initShitMap ();
165 var
166 f: Integer;
167 begin
168 for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?';
169 for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f);
170 for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128);
171 wc2shitmapInited := true;
172 end;
175 // ////////////////////////////////////////////////////////////////////////// //
176 // fast state-machine based UTF-8 decoder; using 8 bytes of memory
177 // code points from invalid range will never be valid, this is the property of the state machine
178 const
179 // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
180 utf8dfa: array[0..$16c-1] of Byte = (
181 // maps bytes to character classes
182 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f
183 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f
184 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f
185 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f
186 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f
187 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f
188 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f
189 $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f
190 $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f
191 $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f
192 $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af
193 $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf
194 $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf
195 $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df
196 $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef
197 $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff
198 // maps a combination of a state of the automaton and a character class to a state
199 $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f
200 $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f
201 $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f
202 $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f
203 $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f
204 $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f
205 $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c);
208 // ////////////////////////////////////////////////////////////////////////// //
209 constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end;
211 procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end;
213 function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end;
214 function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end;
215 function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end;
217 function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end;
219 function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload;
220 var
221 tp: LongWord;
222 begin
223 if (state = Reject) then begin state := Accept; codepoint := 0; end;
224 tp := utf8dfa[b];
225 if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b;
226 state := utf8dfa[256+state+tp];
227 if (state = Reject) then begin codepoint := Replacement; state := Accept; end;
228 result := (state = Accept);
229 end;
232 // ////////////////////////////////////////////////////////////////////////// //
233 function wchar2win (wc: WideChar): AnsiChar; inline;
234 begin
235 if not wc2shitmapInited then initShitMap();
236 if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)];
237 end;
240 // ////////////////////////////////////////////////////////////////////////// //
241 function utf2win (const s: AnsiString): AnsiString;
242 var
243 f, c: Integer;
244 ud: TUtf8DecoderFast;
245 begin
246 for f := 1 to Length(s) do
247 begin
248 if (Byte(s[f]) > 127) then
249 begin
250 ud := TUtf8DecoderFast.Create(true);
251 result := '';
252 for c := 1 to Length(s) do
253 begin
254 if ud.decode(s[c]) then result += wchar2win(WideChar(ud.codepoint));
255 end;
256 exit;
257 end;
258 end;
259 result := s;
260 end;
263 function win2utf (const s: AnsiString): AnsiString;
264 var
265 f, c: Integer;
267 function utf8Encode (code: Integer): AnsiString;
268 begin
269 if (code < 0) or (code > $10FFFF) then begin result := '?'; exit; end;
270 if (code <= $7f) then
271 begin
272 result := Char(code and $ff);
273 end
274 else if (code <= $7FF) then
275 begin
276 result := Char($C0 or (code shr 6));
277 result += Char($80 or (code and $3F));
278 end
279 else if (code <= $FFFF) then
280 begin
281 result := Char($E0 or (code shr 12));
282 result += Char($80 or ((code shr 6) and $3F));
283 result += Char($80 or (code and $3F));
284 end
285 else if (code <= $10FFFF) then
286 begin
287 result := Char($F0 or (code shr 18));
288 result += Char($80 or ((code shr 12) and $3F));
289 result += Char($80 or ((code shr 6) and $3F));
290 result += Char($80 or (code and $3F));
291 end
292 else
293 begin
294 result := '?';
295 end;
296 end;
298 begin
299 for f := 1 to Length(s) do
300 begin
301 if (Byte(s[f]) > 127) then
302 begin
303 result := '';
304 for c := 1 to Length(s) do
305 begin
306 if (Byte(s[c]) < 128) then
307 begin
308 result += s[c];
309 end
310 else
311 begin
312 result += utf8Encode(cp1251[Byte(s[c])-128])
313 end;
314 end;
315 exit;
316 end;
317 end;
318 result := s;
319 end;
322 // ////////////////////////////////////////////////////////////////////////// //
323 function digitInBase (ch: AnsiChar; base: Integer): Integer;
324 begin
325 result := -1;
326 if (base < 1) or (base > 36) then exit;
327 if (ch < '0') then exit;
328 if (base <= 10) then
329 begin
330 if (Integer(ch) >= 48+base) then exit;
331 result := Integer(ch)-48;
332 end
333 else
334 begin
335 if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end;
336 if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower()
337 if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit;
338 result := Integer(ch)-65+10;
339 end;
340 end;
343 // ////////////////////////////////////////////////////////////////////////// //
344 function quoteStr (const s: AnsiString): AnsiString;
346 function squote (const s: AnsiString): AnsiString;
347 var
348 f: Integer;
349 begin
350 result := '''';
351 for f := 1 to Length(s) do
352 begin
353 if (s[f] = '''') then result += '''';
354 result += s[f];
355 end;
356 result += '''';
357 end;
359 function dquote (const s: AnsiString): AnsiString;
360 var
361 f: Integer;
362 ch: AnsiChar;
363 begin
364 result := '"';
365 for f := 1 to Length(s) do
366 begin
367 ch := s[f];
368 if (ch = #0) then result += '\z'
369 else if (ch = #9) then result += '\t'
370 else if (ch = #10) then result += '\n'
371 else if (ch = #13) then result += '\r'
372 else if (ch = #27) then result += '\e'
373 else if (ch < ' ') or (ch = #127) then
374 begin
375 result += '\x';
376 result += LowerCase(IntToHex(Integer(ch), 2));
377 end
378 else if (ch = '"') or (ch = '\') then
379 begin
380 result += '\';
381 result += ch;
382 end
383 else
384 begin
385 result += ch;
386 end;
387 end;
388 result += '"';
389 end;
391 var
392 needSingle: Boolean = false;
393 f: Integer;
394 begin
395 for f := 1 to Length(s) do
396 begin
397 if (s[f] = '''') then begin needSingle := true; continue; end;
398 if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end;
399 end;
400 if needSingle then result := squote(s) else result := ''''+s+'''';
401 end;
404 // ////////////////////////////////////////////////////////////////////////// //
405 function hasWadExtension (fn: AnsiString): Boolean;
406 begin
407 fn := ExtractFileExt(fn);
408 result := StrEquCI1251(fn, '.wad') or StrEquCI1251(fn, '.pk3') or StrEquCI1251(fn, '.zip');
409 end;
412 function addWadExtension (fn: AnsiString): AnsiString;
413 begin
414 result := fn;
415 if not hasWadExtension(result) then result := result+'.wad';
416 end;
419 function isWadPath (fn: AnsiString): Boolean;
420 var
421 p: Integer;
422 s: AnsiString;
423 begin
424 result := false;
425 while true do
426 begin
427 p := Pos(':', fn);
428 if (p = 0) or (length(fn)-p < 1) then break;
429 if (p-4 > 1) and (fn[p-4] = '.') and ((fn[p+1] = '\') or (fn[p+1] = '/')) then
430 begin
431 s := Copy(fn, p-4, 4);
432 if StrEquCI1251(s, '.wad') or StrEquCI1251(s, '.pk3') or StrEquCI1251(s, '.zip') then
433 begin
434 result := true;
435 exit;
436 end;
437 end;
438 Delete(fn, 1, p);
439 end;
440 end;
443 function Int64ToStrComma (i: Int64): AnsiString;
444 var
445 f: Integer;
446 begin
447 Str(i, result);
448 f := Length(result)+1;
449 while f > 4 do
450 begin
451 Dec(f, 3); Insert(',', result, f);
452 end;
453 end;
456 function UpCase1251 (ch: AnsiChar): AnsiChar; inline;
457 begin
458 if ch < #128 then
459 begin
460 if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32);
461 end
462 else
463 begin
464 if (ch >= #224) and (ch <= #255) then
465 begin
466 Dec(ch, 32);
467 end
468 else
469 begin
470 case ch of
471 #184, #186, #191: Dec(ch, 16);
472 #162, #179: Dec(ch);
473 end;
474 end;
475 end;
476 result := ch;
477 end;
480 function LoCase1251 (ch: AnsiChar): AnsiChar; inline;
481 begin
482 if ch < #128 then
483 begin
484 if (ch >= 'A') and (ch <= 'Z') then Inc(ch, 32);
485 end
486 else
487 begin
488 if (ch >= #192) and (ch <= #223) then
489 begin
490 Inc(ch, 32);
491 end
492 else
493 begin
494 case ch of
495 #168, #170, #175: Inc(ch, 16);
496 #161, #178: Inc(ch);
497 end;
498 end;
499 end;
500 result := ch;
501 end;
504 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
505 var
506 i: Integer;
507 begin
508 result := false;
509 if length(s0) <> length(s1) then exit;
510 for i := 1 to length(s0) do if UpCase1251(s0[i]) <> UpCase1251(s1[i]) then exit;
511 result := true;
512 end;
515 function toLowerCase1251 (const s: AnsiString): AnsiString;
516 var
517 f: Integer;
518 ch: AnsiChar;
519 begin
520 for ch in s do
521 begin
522 if (ch <> LoCase1251(ch)) then
523 begin
524 result := '';
525 SetLength(result, Length(s));
526 for f := 1 to Length(s) do result[f] := LoCase1251(s[f]);
527 exit;
528 end;
529 end;
530 // nothing to do
531 result := s;
532 end;
535 // ////////////////////////////////////////////////////////////////////////// //
536 // utils
537 // `ch`: utf8 start
538 // -1: invalid utf8
539 function utf8CodeLen (ch: Word): Integer;
540 begin
541 if ch < $80 then begin result := 1; exit; end;
542 if (ch and $FE) = $FC then begin result := 6; exit; end;
543 if (ch and $FC) = $F8 then begin result := 5; exit; end;
544 if (ch and $F8) = $F0 then begin result := 4; exit; end;
545 if (ch and $F0) = $E0 then begin result := 3; exit; end;
546 if (ch and $E0) = $C0 then begin result := 2; exit; end;
547 result := -1; // invalid
548 end;
551 function utf8Valid (const s: AnsiString): Boolean;
552 var
553 pos, len: Integer;
554 begin
555 result := false;
556 pos := 1;
557 while pos <= length(s) do
558 begin
559 len := utf8CodeLen(Byte(s[pos]));
560 if len < 1 then exit; // invalid sequence start
561 if pos+len-1 > length(s) then exit; // out of chars in string
562 Dec(len);
563 Inc(pos);
564 // check other sequence bytes
565 while len > 0 do
566 begin
567 if (Byte(s[pos]) and $C0) <> $80 then exit;
568 Dec(len);
569 Inc(pos);
570 end;
571 end;
572 result := true;
573 end;
576 // ////////////////////////////////////////////////////////////////////////// //
577 const
578 uni2wint: array [128..255] of Word = (
579 $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
580 $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
581 $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
582 $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
583 $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
584 $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
585 $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
586 $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
587 );
590 function decodeUtf8Char (s: AnsiString; var pos: Integer): char;
591 var
592 b, c: Integer;
593 begin
594 (* The following encodings are valid, except for the 5 and 6 byte
595 * combinations:
596 * 0xxxxxxx
597 * 110xxxxx 10xxxxxx
598 * 1110xxxx 10xxxxxx 10xxxxxx
599 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
600 * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
601 * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
602 *)
603 result := '?';
604 if pos > length(s) then exit;
606 b := Byte(s[pos]);
607 Inc(pos);
608 if b < $80 then begin result := char(b); exit; end;
610 // mask out unused bits
611 if (b and $FE) = $FC then b := b and $01
612 else if (b and $FC) = $F8 then b := b and $03
613 else if (b and $F8) = $F0 then b := b and $07
614 else if (b and $F0) = $E0 then b := b and $0F
615 else if (b and $E0) = $C0 then b := b and $1F
616 else exit; // invalid utf8
618 // now continue
619 while pos <= length(s) do
620 begin
621 c := Byte(s[pos]);
622 if (c and $C0) <> $80 then break; // no more
623 b := b shl 6;
624 b := b or (c and $3F);
625 Inc(pos);
626 end;
628 // done, try 1251
629 for c := 128 to 255 do if uni2wint[c] = b then begin result := char(c and $FF); exit; end;
630 // alas
631 end;
634 function utf8to1251 (s: AnsiString): AnsiString;
635 var
636 pos: Integer;
637 begin
638 if not utf8Valid(s) then begin result := s; exit; end;
639 pos := 1;
640 while pos <= length(s) do
641 begin
642 if Byte(s[pos]) >= $80 then break;
643 Inc(pos);
644 end;
645 if pos > length(s) then begin result := s; exit; end; // nothing to do here
646 result := '';
647 pos := 1;
648 while pos <= length(s) do result := result+decodeUtf8Char(s, pos);
649 end;
652 // ////////////////////////////////////////////////////////////////////////// //
653 // `pathname` will be modified if path is valid
654 // `lastIsDir` should be `true` if we are searching for directory
655 // nobody cares about shitdoze, so i'll use the same code path for it
656 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
657 var
658 sr: TSearchRec;
659 npt: AnsiString;
660 newname: AnsiString = '';
661 curname: AnsiString;
662 wantdir: Boolean;
663 attr: LongInt;
664 foundher: Boolean;
665 begin
666 npt := pathname;
667 result := (length(npt) > 0);
668 if (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) then newname := '/';
669 while length(npt) > 0 do
670 begin
671 // remove trailing slashes
672 while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
673 if length(npt) = 0 then break;
674 // extract name
675 curname := '';
676 while (length(npt) > 0) and (npt[1] <> '/') and (npt[1] <> '\') do
677 begin
678 curname := curname+npt[1];
679 Delete(npt, 1, 1);
680 end;
681 // remove trailing slashes again
682 while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
683 wantdir := lastIsDir or (length(npt) > 0); // do we want directory here?
684 //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
685 // try the easiest case first
686 attr := FileGetAttr(newname+curname);
687 if attr <> -1 then
688 begin
689 if wantdir = ((attr and faDirectory) <> 0) then
690 begin
691 // i found her!
692 newname := newname+curname;
693 if wantdir then newname := newname+'/';
694 continue;
695 end;
696 end;
697 //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
698 // alas, either not found, or invalid attributes
699 foundher := false;
700 try
701 if FindFirst(newname+'*', faAnyFile, sr) = 0 then
702 repeat
703 if (wantdir = ((sr.attr and faDirectory) <> 0)) and StrEquCI1251(sr.name, curname) then
704 begin
705 // i found her!
706 newname := newname+sr.name;
707 if wantdir then newname := newname+'/';
708 foundher := true;
709 break;
710 end;
711 until FindNext(sr) <> 0;
712 finally
713 FindClose(sr);
714 end;
715 if not foundher then begin newname := ''; result := false; break; end;
716 end;
717 if result then pathname := newname;
718 end;
720 function findFileCIStr (pathname: AnsiString): AnsiString;
721 begin
722 result := pathname;
723 findFileCI(result);
724 end;
726 function openDiskFileRO (pathname: AnsiString): TStream;
727 begin
728 if not findFileCI(pathname) then raise Exception.Create('can''t open file "'+pathname+'"');
729 result := TFileStream.Create(pathname, fmOpenRead or {fmShareDenyWrite}fmShareDenyNone);
730 end;
732 function createDiskFile (pathname: AnsiString): TStream;
733 var
734 path: AnsiString;
735 begin
736 path := ExtractFilePath(pathname);
737 if length(path) > 0 then
738 begin
739 if not findFileCI(path, true) then raise Exception.Create('can''t create file "'+pathname+'"');
740 end;
741 result := TFileStream.Create(path+ExtractFileName(pathname), fmCreate);
742 end;
745 procedure writeIntegerLE (st: TStream; vp: Pointer; size: Integer);
746 {$IFDEF ENDIAN_LITTLE}
747 begin
748 st.writeBuffer(vp^, size);
749 end;
750 {$ELSE}
751 var
752 p: PByte;
753 begin
754 p := PByte(vp)+size-1;
755 while size > 0 do
756 begin
757 st.writeBuffer(p^, 1);
758 Dec(size);
759 Dec(p);
760 end;
761 end;
762 {$ENDIF}
764 procedure writeIntegerBE (st: TStream; vp: Pointer; size: Integer);
765 {$IFDEF ENDIAN_LITTLE}
766 var
767 p: PByte;
768 begin
769 p := PByte(vp)+size-1;
770 while size > 0 do
771 begin
772 st.writeBuffer(p^, 1);
773 Dec(size);
774 Dec(p);
775 end;
776 end;
777 {$ELSE}
778 begin
779 st.writeBuffer(vp^, size);
780 end;
781 {$ENDIF}
783 procedure writeInt (st: TStream; v: Byte); overload; begin writeIntegerLE(st, @v, 1); end;
784 procedure writeInt (st: TStream; v: ShortInt); overload; begin writeIntegerLE(st, @v, 1); end;
785 procedure writeInt (st: TStream; v: Word); overload; begin writeIntegerLE(st, @v, 2); end;
786 procedure writeInt (st: TStream; v: SmallInt); overload; begin writeIntegerLE(st, @v, 2); end;
787 procedure writeInt (st: TStream; v: LongWord); overload; begin writeIntegerLE(st, @v, 4); end;
788 procedure writeInt (st: TStream; v: LongInt); overload; begin writeIntegerLE(st, @v, 4); end;
789 procedure writeInt (st: TStream; v: Int64); overload; begin writeIntegerLE(st, @v, 8); end;
790 procedure writeInt (st: TStream; v: UInt64); overload; begin writeIntegerLE(st, @v, 8); end;
792 procedure writeIntBE (st: TStream; v: Byte); overload; begin writeIntegerBE(st, @v, 1); end;
793 procedure writeIntBE (st: TStream; v: ShortInt); overload; begin writeIntegerBE(st, @v, 1); end;
794 procedure writeIntBE (st: TStream; v: Word); overload; begin writeIntegerBE(st, @v, 2); end;
795 procedure writeIntBE (st: TStream; v: SmallInt); overload; begin writeIntegerBE(st, @v, 2); end;
796 procedure writeIntBE (st: TStream; v: LongWord); overload; begin writeIntegerBE(st, @v, 4); end;
797 procedure writeIntBE (st: TStream; v: LongInt); overload; begin writeIntegerBE(st, @v, 4); end;
798 procedure writeIntBE (st: TStream; v: Int64); overload; begin writeIntegerBE(st, @v, 8); end;
799 procedure writeIntBE (st: TStream; v: UInt64); overload; begin writeIntegerBE(st, @v, 8); end;
802 procedure readIntegerLE (st: TStream; vp: Pointer; size: Integer);
803 {$IFDEF ENDIAN_LITTLE}
804 begin
805 st.readBuffer(vp^, size);
806 end;
807 {$ELSE}
808 var
809 p: PByte;
810 begin
811 p := PByte(vp)+size-1;
812 while size > 0 do
813 begin
814 st.readBuffer(p^, 1);
815 Dec(size);
816 Dec(p);
817 end;
818 end;
819 {$ENDIF}
821 procedure readIntegerBE (st: TStream; vp: Pointer; size: Integer);
822 {$IFDEF ENDIAN_LITTLE}
823 var
824 p: PByte;
825 begin
826 p := PByte(vp)+size-1;
827 while size > 0 do
828 begin
829 st.readBuffer(p^, 1);
830 Dec(size);
831 Dec(p);
832 end;
833 end;
834 {$ELSE}
835 begin
836 st.readBuffer(vp^, size);
837 end;
838 {$ENDIF}
840 function readByte (st: TStream): Byte; begin readIntegerLE(st, @result, 1); end;
841 function readShortInt (st: TStream): ShortInt; begin readIntegerLE(st, @result, 1); end;
842 function readWord (st: TStream): Word; begin readIntegerLE(st, @result, 2); end;
843 function readSmallInt (st: TStream): SmallInt; begin readIntegerLE(st, @result, 2); end;
844 function readLongWord (st: TStream): LongWord; begin readIntegerLE(st, @result, 4); end;
845 function readLongInt (st: TStream): LongInt; begin readIntegerLE(st, @result, 4); end;
846 function readInt64 (st: TStream): Int64; begin readIntegerLE(st, @result, 8); end;
847 function readUInt64 (st: TStream): UInt64; begin readIntegerLE(st, @result, 8); end;
849 function readByteBE (st: TStream): Byte; begin readIntegerBE(st, @result, 1); end;
850 function readShortIntBE (st: TStream): ShortInt; begin readIntegerBE(st, @result, 1); end;
851 function readWordBE (st: TStream): Word; begin readIntegerBE(st, @result, 2); end;
852 function readSmallIntBE (st: TStream): SmallInt; begin readIntegerBE(st, @result, 2); end;
853 function readLongWordBE (st: TStream): LongWord; begin readIntegerBE(st, @result, 4); end;
854 function readLongIntBE (st: TStream): LongInt; begin readIntegerBE(st, @result, 4); end;
855 function readInt64BE (st: TStream): Int64; begin readIntegerBE(st, @result, 8); end;
856 function readUInt64BE (st: TStream): UInt64; begin readIntegerBE(st, @result, 8); end;
858 end.