src/shared/utils.pas

   1 (* Copyright (C)  DooM 2D:Forever Developers
   2  *
   3  * This program is free software: you can redistribute it and/or modify
   4  * it under the terms of the GNU General Public License as published by
   5  * the Free Software Foundation, version 3 of the License ONLY.
   6  *
   7  * This program is distributed in the hope that it will be useful,
   8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10  * GNU General Public License for more details.
  11  *
  12  * You should have received a copy of the GNU General Public License
  13  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  14  *)
  15 {$INCLUDE a_modes.inc}
  16 unit utils;
  17
  18 interface
  19
  20 uses
  21   SysUtils, Classes;
  22
  23
  24 // ////////////////////////////////////////////////////////////////////////// //
  25 type
  26   TUtf8DecoderFast = packed record
  27   public
  28     const Replacement = $FFFD; // replacement char for invalid unicode
  29     const Accept = 0;
  30     const Reject = 12;
  31
  32   private
  33     state: LongWord;
  34
  35   public
  36     codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state)
  37
  38   public
  39     constructor Create (v: Boolean{fuck you, fpc});
  40
  41     procedure reset (); inline;
  42
  43     function complete (): Boolean; inline; // is current character complete? take `codepoint` then
  44     function invalid (): Boolean; inline;
  45     function completeOrInvalid (): Boolean; inline;
  46
  47     // process one byte, return `true` if codepoint is ready
  48     function decode (b: Byte): Boolean; inline; overload;
  49     function decode (c: AnsiChar): Boolean; inline; overload;
  50   end;
  51
  52
  53 // ////////////////////////////////////////////////////////////////////////// //
  54 // does filename have one of ".wad", ".pk3", ".zip" extensions?
  55 function hasWadExtension (fn: AnsiString): Boolean;
  56
  57 // does filepath have ".XXX:\" in it?
  58 function isWadPath (fn: AnsiString): Boolean;
  59
  60 // adds ".wad" extension if filename doesn't have one of ".wad", ".pk3", ".zip"
  61 function addWadExtension (fn: AnsiString): AnsiString;
  62
  63 // convert number to strig with nice commas
  64 function Int64ToStrComma (i: Int64): AnsiString;
  65
  66 function UpCase1251 (ch: AnsiChar): AnsiChar; inline;
  67 function LoCase1251 (ch: AnsiChar): AnsiChar; inline;
  68
  69 function toLowerCase1251 (const s: AnsiString): AnsiString;
  70
  71 // `true` if strings are equal; ignoring case for cp1251
  72 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
  73
  74 function utf8Valid (const s: AnsiString): Boolean;
  75
  76 function utf8to1251 (s: AnsiString): AnsiString;
  77
  78 // `pathname` will be modified if path is valid
  79 // `lastIsDir` should be `true` if we are searching for directory
  80 // nobody cares about shitdoze, so i'll use the same code path for it
  81 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
  82 function findFileCIStr (pathname: AnsiString): AnsiString;
  83
  84 // they throws
  85 function openDiskFileRO (pathname: AnsiString): TStream;
  86 function createDiskFile (pathname: AnsiString): TStream;
  87
  88 // little endian
  89 procedure writeInt (st: TStream; v: Byte); overload;
  90 procedure writeInt (st: TStream; v: ShortInt); overload;
  91 procedure writeInt (st: TStream; v: Word); overload;
  92 procedure writeInt (st: TStream; v: SmallInt); overload;
  93 procedure writeInt (st: TStream; v: LongWord); overload;
  94 procedure writeInt (st: TStream; v: LongInt); overload;
  95 procedure writeInt (st: TStream; v: Int64); overload;
  96 procedure writeInt (st: TStream; v: UInt64); overload;
  97
  98 function readByte (st: TStream): Byte;
  99 function readShortInt (st: TStream): ShortInt;
 100 function readWord (st: TStream): Word;
 101 function readSmallInt (st: TStream): SmallInt;
 102 function readLongWord (st: TStream): LongWord;
 103 function readLongInt (st: TStream): LongInt;
 104 function readInt64 (st: TStream): Int64;
 105 function readUInt64 (st: TStream): UInt64;
 106
 107 // big endian
 108 procedure writeIntBE (st: TStream; v: Byte); overload;
 109 procedure writeIntBE (st: TStream; v: ShortInt); overload;
 110 procedure writeIntBE (st: TStream; v: Word); overload;
 111 procedure writeIntBE (st: TStream; v: SmallInt); overload;
 112 procedure writeIntBE (st: TStream; v: LongWord); overload;
 113 procedure writeIntBE (st: TStream; v: LongInt); overload;
 114 procedure writeIntBE (st: TStream; v: Int64); overload;
 115 procedure writeIntBE (st: TStream; v: UInt64); overload;
 116
 117 function readByteBE (st: TStream): Byte;
 118 function readShortIntBE (st: TStream): ShortInt;
 119 function readWordBE (st: TStream): Word;
 120 function readSmallIntBE (st: TStream): SmallInt;
 121 function readLongWordBE (st: TStream): LongWord;
 122 function readLongIntBE (st: TStream): LongInt;
 123 function readInt64BE (st: TStream): Int64;
 124 function readUInt64BE (st: TStream): UInt64;
 125
 126
 127 type
 128   TFormatStrFCallback = procedure (constref buf; len: SizeUInt);
 129
 130 function wchar2win (wc: WideChar): AnsiChar; inline;
 131 function utf2win (const s: AnsiString): AnsiString;
 132 function win2utf (const s: AnsiString): AnsiString;
 133 function digitInBase (ch: AnsiChar; base: Integer): Integer;
 134
 135 // returns string in single or double quotes
 136 // single quotes supports only pascal-style '' for single quote char
 137 // double quotes supports c-style escapes
 138 // function will select quote mode automatically
 139 function quoteStr (const s: AnsiString): AnsiString;
 140
 141
 142 // ////////////////////////////////////////////////////////////////////////// //
 143 var
 144   wc2shitmap: array[0..65535] of AnsiChar;
 145   wc2shitmapInited: Boolean = false;
 146
 147
 148 // ////////////////////////////////////////////////////////////////////////// //
 149 const
 150   cp1251: array[0..127] of Word = (
 151     $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
 152     $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
 153     $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
 154     $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
 155     $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
 156     $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
 157     $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
 158     $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
 159   );
 160
 161 implementation
 162
 163 procedure initShitMap ();
 164 var
 165   f: Integer;
 166 begin
 167   for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?';
 168   for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f);
 169   for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128);
 170   wc2shitmapInited := true;
 171 end;
 172
 173
 174 // ////////////////////////////////////////////////////////////////////////// //
 175 // fast state-machine based UTF-8 decoder; using 8 bytes of memory
 176 // code points from invalid range will never be valid, this is the property of the state machine
 177 const
 178   // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
 179   utf8dfa: array[0..$16c-1] of Byte = (
 180     // maps bytes to character classes
 181     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f
 182     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f
 183     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f
 184     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f
 185     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f
 186     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f
 187     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f
 188     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f
 189     $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f
 190     $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f
 191     $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af
 192     $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf
 193     $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf
 194     $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df
 195     $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef
 196     $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff
 197     // maps a combination of a state of the automaton and a character class to a state
 198     $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f
 199     $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f
 200     $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f
 201     $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f
 202     $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f
 203     $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f
 204     $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c);
 205
 206
 207 // ////////////////////////////////////////////////////////////////////////// //
 208 constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end;
 209
 210 procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end;
 211
 212 function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end;
 213 function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end;
 214 function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end;
 215
 216 function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end;
 217
 218 function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload;
 219 var
 220   tp: LongWord;
 221 begin
 222   if (state = Reject) then begin state := Accept; codepoint := 0; end;
 223   tp := utf8dfa[b];
 224   if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b;
 225   state := utf8dfa[256+state+tp];
 226   if (state = Reject) then begin codepoint := Replacement; state := Accept; end;
 227   result := (state = Accept);
 228 end;
 229
 230
 231 // ////////////////////////////////////////////////////////////////////////// //
 232 function wchar2win (wc: WideChar): AnsiChar; inline;
 233 begin
 234   if not wc2shitmapInited then initShitMap();
 235   if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)];
 236 end;
 237
 238
 239 // ////////////////////////////////////////////////////////////////////////// //
 240 function utf2win (const s: AnsiString): AnsiString;
 241 var
 242   f, c: Integer;
 243   ud: TUtf8DecoderFast;
 244 begin
 245   for f := 1 to Length(s) do
 246   begin
 247     if (Byte(s[f]) > 127) then
 248     begin
 249       ud := TUtf8DecoderFast.Create(true);
 250       result := '';
 251       for c := 1 to Length(s) do
 252       begin
 253         if ud.decode(s[c]) then result += wchar2win(WideChar(ud.codepoint));
 254       end;
 255       exit;
 256     end;
 257   end;
 258   result := s;
 259 end;
 260
 261
 262 function win2utf (const s: AnsiString): AnsiString;
 263 var
 264   f, c: Integer;
 265
 266   function utf8Encode (code: Integer): AnsiString;
 267   begin
 268     if (code < 0) or (code > $10FFFF) then begin result := '?'; exit; end;
 269     if (code <= $7f) then
 270     begin
 271       result := Char(code and $ff);
 272     end
 273     else if (code <= $7FF) then
 274     begin
 275       result := Char($C0 or (code shr 6));
 276       result += Char($80 or (code and $3F));
 277     end
 278     else if (code <= $FFFF) then
 279     begin
 280       result := Char($E0 or (code shr 12));
 281       result += Char($80 or ((code shr 6) and $3F));
 282       result += Char($80 or (code and $3F));
 283     end
 284     else if (code <= $10FFFF) then
 285     begin
 286       result := Char($F0 or (code shr 18));
 287       result += Char($80 or ((code shr 12) and $3F));
 288       result += Char($80 or ((code shr 6) and $3F));
 289       result += Char($80 or (code and $3F));
 290     end
 291     else
 292     begin
 293       result := '?';
 294     end;
 295   end;
 296
 297 begin
 298   for f := 1 to Length(s) do
 299   begin
 300     if (Byte(s[f]) > 127) then
 301     begin
 302       result := '';
 303       for c := 1 to Length(s) do
 304       begin
 305         if (Byte(s[c]) < 128) then
 306         begin
 307           result += s[c];
 308         end
 309         else
 310         begin
 311           result += utf8Encode(cp1251[Byte(s[c])-128])
 312         end;
 313       end;
 314       exit;
 315     end;
 316   end;
 317   result := s;
 318 end;
 319
 320
 321 // ////////////////////////////////////////////////////////////////////////// //
 322 function digitInBase (ch: AnsiChar; base: Integer): Integer;
 323 begin
 324   result := -1;
 325   if (base < 1) or (base > 36) then exit;
 326   if (ch < '0') then exit;
 327   if (base <= 10) then
 328   begin
 329     if (Integer(ch) >= 48+base) then exit;
 330     result := Integer(ch)-48;
 331   end
 332   else
 333   begin
 334     if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end;
 335     if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower()
 336     if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit;
 337     result := Integer(ch)-65+10;
 338   end;
 339 end;
 340
 341
 342 // ////////////////////////////////////////////////////////////////////////// //
 343 function quoteStr (const s: AnsiString): AnsiString;
 344
 345   function squote (const s: AnsiString): AnsiString;
 346   var
 347     f: Integer;
 348   begin
 349     result := '''';
 350     for f := 1 to Length(s) do
 351     begin
 352       if (s[f] = '''') then result += '''';
 353       result += s[f];
 354     end;
 355     result += '''';
 356   end;
 357
 358   function dquote (const s: AnsiString): AnsiString;
 359   var
 360     f: Integer;
 361     ch: AnsiChar;
 362   begin
 363     result := '"';
 364     for f := 1 to Length(s) do
 365     begin
 366       ch := s[f];
 367            if (ch = #0) then result += '\z'
 368       else if (ch = #9) then result += '\t'
 369       else if (ch = #10) then result += '\n'
 370       else if (ch = #13) then result += '\r'
 371       else if (ch = #27) then result += '\e'
 372       else if (ch < ' ') or (ch = #127) then
 373       begin
 374         result += '\x';
 375         result += LowerCase(IntToHex(Integer(ch), 2));
 376       end
 377       else if (ch = '"') or (ch = '\') then
 378       begin
 379         result += '\';
 380         result += ch;
 381       end
 382       else
 383       begin
 384         result += ch;
 385       end;
 386     end;
 387     result += '"';
 388   end;
 389
 390 var
 391   needSingle: Boolean = false;
 392   f: Integer;
 393 begin
 394   for f := 1 to Length(s) do
 395   begin
 396     if (s[f] = '''') then begin needSingle := true; continue; end;
 397     if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end;
 398   end;
 399   if needSingle then result := squote(s) else result := ''''+s+'''';
 400 end;
 401
 402
 403 // ////////////////////////////////////////////////////////////////////////// //
 404 function hasWadExtension (fn: AnsiString): Boolean;
 405 begin
 406   fn := ExtractFileExt(fn);
 407   result := StrEquCI1251(fn, '.wad') or StrEquCI1251(fn, '.pk3') or StrEquCI1251(fn, '.zip');
 408 end;
 409
 410
 411 function addWadExtension (fn: AnsiString): AnsiString;
 412 begin
 413   result := fn;
 414   if not hasWadExtension(result) then result := result+'.wad';
 415 end;
 416
 417
 418 function isWadPath (fn: AnsiString): Boolean;
 419 var
 420   p: Integer;
 421   s: AnsiString;
 422 begin
 423   result := false;
 424   while true do
 425   begin
 426     p := Pos(':', fn);
 427     if (p = 0) or (length(fn)-p < 1) then break;
 428     if (p-4 > 1) and (fn[p-4] = '.') and ((fn[p+1] = '\') or (fn[p+1] = '/')) then
 429     begin
 430       s := Copy(fn, p-4, 4);
 431       if StrEquCI1251(s, '.wad') or StrEquCI1251(s, '.pk3') or StrEquCI1251(s, '.zip') then
 432       begin
 433         result := true;
 434         exit;
 435       end;
 436     end;
 437     Delete(fn, 1, p);
 438   end;
 439 end;
 440
 441
 442 function Int64ToStrComma (i: Int64): AnsiString;
 443 var
 444   f: Integer;
 445 begin
 446   Str(i, result);
 447   f := Length(result)+1;
 448   while f > 4 do
 449   begin
 450     Dec(f, 3); Insert(',', result, f);
 451   end;
 452 end;
 453
 454
 455 function UpCase1251 (ch: AnsiChar): AnsiChar; inline;
 456 begin
 457   if ch < #128 then
 458   begin
 459     if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32);
 460   end
 461   else
 462   begin
 463     if (ch >= #224) and (ch <= #255) then
 464     begin
 465       Dec(ch, 32);
 466     end
 467     else
 468     begin
 469       case ch of
 470         #184, #186, #191: Dec(ch, 16);
 471         #162, #179: Dec(ch);
 472       end;
 473     end;
 474   end;
 475   result := ch;
 476 end;
 477
 478
 479 function LoCase1251 (ch: AnsiChar): AnsiChar; inline;
 480 begin
 481   if ch < #128 then
 482   begin
 483     if (ch >= 'A') and (ch <= 'Z') then Inc(ch, 32);
 484   end
 485   else
 486   begin
 487     if (ch >= #192) and (ch <= #223) then
 488     begin
 489       Inc(ch, 32);
 490     end
 491     else
 492     begin
 493       case ch of
 494         #168, #170, #175: Inc(ch, 16);
 495         #161, #178: Inc(ch);
 496       end;
 497     end;
 498   end;
 499   result := ch;
 500 end;
 501
 502
 503 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
 504 var
 505   i: Integer;
 506 begin
 507   result := false;
 508   if length(s0) <> length(s1) then exit;
 509   for i := 1 to length(s0) do if UpCase1251(s0[i]) <> UpCase1251(s1[i]) then exit;
 510   result := true;
 511 end;
 512
 513
 514 function toLowerCase1251 (const s: AnsiString): AnsiString;
 515 var
 516   f: Integer;
 517   ch: AnsiChar;
 518 begin
 519   for ch in s do
 520   begin
 521     if (ch <> LoCase1251(ch)) then
 522     begin
 523       result := '';
 524       SetLength(result, Length(s));
 525       for f := 1 to Length(s) do result[f] := LoCase1251(s[f]);
 526       exit;
 527     end;
 528   end;
 529   // nothing to do
 530   result := s;
 531 end;
 532
 533
 534 // ////////////////////////////////////////////////////////////////////////// //
 535 // utils
 536 // `ch`: utf8 start
 537 // -1: invalid utf8
 538 function utf8CodeLen (ch: Word): Integer;
 539 begin
 540   if ch < $80 then begin result := 1; exit; end;
 541   if (ch and $FE) = $FC then begin result := 6; exit; end;
 542   if (ch and $FC) = $F8 then begin result := 5; exit; end;
 543   if (ch and $F8) = $F0 then begin result := 4; exit; end;
 544   if (ch and $F0) = $E0 then begin result := 3; exit; end;
 545   if (ch and $E0) = $C0 then begin result := 2; exit; end;
 546   result := -1; // invalid
 547 end;
 548
 549
 550 function utf8Valid (const s: AnsiString): Boolean;
 551 var
 552   pos, len: Integer;
 553 begin
 554   result := false;
 555   pos := 1;
 556   while pos <= length(s) do
 557   begin
 558     len := utf8CodeLen(Byte(s[pos]));
 559     if len < 1 then exit; // invalid sequence start
 560     if pos+len-1 > length(s) then exit; // out of chars in string
 561     Dec(len);
 562     Inc(pos);
 563     // check other sequence bytes
 564     while len > 0 do
 565     begin
 566       if (Byte(s[pos]) and $C0) <> $80 then exit;
 567       Dec(len);
 568       Inc(pos);
 569     end;
 570   end;
 571   result := true;
 572 end;
 573
 574
 575 // ////////////////////////////////////////////////////////////////////////// //
 576 const
 577   uni2wint: array [128..255] of Word = (
 578     $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
 579     $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
 580     $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
 581     $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
 582     $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
 583     $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
 584     $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
 585     $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
 586   );
 587
 588
 589 function decodeUtf8Char (s: AnsiString; var pos: Integer): char;
 590 var
 591   b, c: Integer;
 592 begin
 593   (* The following encodings are valid, except for the 5 and 6 byte
 594    * combinations:
 595    *  0xxxxxxx
 596    *  110xxxxx 10xxxxxx
 597    *  1110xxxx 10xxxxxx 10xxxxxx
 598    *  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 599    *  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 600    *  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 601    *)
 602   result := '?';
 603   if pos > length(s) then exit;
 604
 605   b := Byte(s[pos]);
 606   Inc(pos);
 607   if b < $80 then begin result := char(b); exit; end;
 608
 609   // mask out unused bits
 610        if (b and $FE) = $FC then b := b and $01
 611   else if (b and $FC) = $F8 then b := b and $03
 612   else if (b and $F8) = $F0 then b := b and $07
 613   else if (b and $F0) = $E0 then b := b and $0F
 614   else if (b and $E0) = $C0 then b := b and $1F
 615   else exit; // invalid utf8
 616
 617   // now continue
 618   while pos <= length(s) do
 619   begin
 620     c := Byte(s[pos]);
 621     if (c and $C0) <> $80 then break; // no more
 622     b := b shl 6;
 623     b := b or (c and $3F);
 624     Inc(pos);
 625   end;
 626
 627   // done, try 1251
 628   for c := 128 to 255 do if uni2wint[c] = b then begin result := char(c and $FF); exit; end;
 629   // alas
 630 end;
 631
 632
 633 function utf8to1251 (s: AnsiString): AnsiString;
 634 var
 635   pos: Integer;
 636 begin
 637   if not utf8Valid(s) then begin result := s; exit; end;
 638   pos := 1;
 639   while pos <= length(s) do
 640   begin
 641     if Byte(s[pos]) >= $80 then break;
 642     Inc(pos);
 643   end;
 644   if pos > length(s) then begin result := s; exit; end; // nothing to do here
 645   result := '';
 646   pos := 1;
 647   while pos <= length(s) do result := result+decodeUtf8Char(s, pos);
 648 end;
 649
 650
 651 // ////////////////////////////////////////////////////////////////////////// //
 652 // `pathname` will be modified if path is valid
 653 // `lastIsDir` should be `true` if we are searching for directory
 654 // nobody cares about shitdoze, so i'll use the same code path for it
 655 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
 656 var
 657   sr: TSearchRec;
 658   npt: AnsiString;
 659   newname: AnsiString = '';
 660   curname: AnsiString;
 661   wantdir: Boolean;
 662   attr: LongInt;
 663   foundher: Boolean;
 664 begin
 665   npt := pathname;
 666   result := (length(npt) > 0);
 667   if (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) then newname := '/';
 668   while length(npt) > 0 do
 669   begin
 670     // remove trailing slashes
 671     while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
 672     if length(npt) = 0 then break;
 673     // extract name
 674     curname := '';
 675     while (length(npt) > 0) and (npt[1] <> '/') and (npt[1] <> '\') do
 676     begin
 677       curname := curname+npt[1];
 678       Delete(npt, 1, 1);
 679     end;
 680     // remove trailing slashes again
 681     while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
 682     wantdir := lastIsDir or (length(npt) > 0); // do we want directory here?
 683     //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
 684     // try the easiest case first
 685     attr := FileGetAttr(newname+curname);
 686     if attr <> -1 then
 687     begin
 688       if wantdir = ((attr and faDirectory) <> 0) then
 689       begin
 690         // i found her!
 691         newname := newname+curname;
 692         if wantdir then newname := newname+'/';
 693         continue;
 694       end;
 695     end;
 696     //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
 697     // alas, either not found, or invalid attributes
 698     foundher := false;
 699     try
 700       if FindFirst(newname+'*', faAnyFile, sr) = 0 then
 701       repeat
 702         if (wantdir = ((sr.attr and faDirectory) <> 0)) and StrEquCI1251(sr.name, curname) then
 703         begin
 704           // i found her!
 705           newname := newname+sr.name;
 706           if wantdir then newname := newname+'/';
 707           foundher := true;
 708           break;
 709         end;
 710       until FindNext(sr) <> 0;
 711     finally
 712       FindClose(sr);
 713     end;
 714     if not foundher then begin newname := ''; result := false; break; end;
 715   end;
 716   if result then pathname := newname;
 717 end;
 718
 719 function findFileCIStr (pathname: AnsiString): AnsiString;
 720 begin
 721   result := pathname;
 722   findFileCI(result);
 723 end;
 724
 725 function openDiskFileRO (pathname: AnsiString): TStream;
 726 begin
 727   if not findFileCI(pathname) then raise Exception.Create('can''t open file "'+pathname+'"');
 728   result := TFileStream.Create(pathname, fmOpenRead or {fmShareDenyWrite}fmShareDenyNone);
 729 end;
 730
 731 function createDiskFile (pathname: AnsiString): TStream;
 732 var
 733   path: AnsiString;
 734 begin
 735   path := ExtractFilePath(pathname);
 736   if length(path) > 0 then
 737   begin
 738     if not findFileCI(path, true) then raise Exception.Create('can''t create file "'+pathname+'"');
 739   end;
 740   result := TFileStream.Create(path+ExtractFileName(pathname), fmCreate);
 741 end;
 742
 743
 744 procedure writeIntegerLE (st: TStream; vp: Pointer; size: Integer);
 745 {$IFDEF ENDIAN_LITTLE}
 746 begin
 747   st.writeBuffer(vp^, size);
 748 end;
 749 {$ELSE}
 750 var
 751   p: PByte;
 752 begin
 753   p := PByte(vp)+size-1;
 754   while size > 0 do
 755   begin
 756     st.writeBuffer(p^, 1);
 757     Dec(size);
 758     Dec(p);
 759   end;
 760 end;
 761 {$ENDIF}
 762
 763 procedure writeIntegerBE (st: TStream; vp: Pointer; size: Integer);
 764 {$IFDEF ENDIAN_LITTLE}
 765 var
 766   p: PByte;
 767 begin
 768   p := PByte(vp)+size-1;
 769   while size > 0 do
 770   begin
 771     st.writeBuffer(p^, 1);
 772     Dec(size);
 773     Dec(p);
 774   end;
 775 end;
 776 {$ELSE}
 777 begin
 778   st.writeBuffer(vp^, size);
 779 end;
 780 {$ENDIF}
 781
 782 procedure writeInt (st: TStream; v: Byte); overload; begin writeIntegerLE(st, @v, 1); end;
 783 procedure writeInt (st: TStream; v: ShortInt); overload; begin writeIntegerLE(st, @v, 1); end;
 784 procedure writeInt (st: TStream; v: Word); overload; begin writeIntegerLE(st, @v, 2); end;
 785 procedure writeInt (st: TStream; v: SmallInt); overload; begin writeIntegerLE(st, @v, 2); end;
 786 procedure writeInt (st: TStream; v: LongWord); overload; begin writeIntegerLE(st, @v, 4); end;
 787 procedure writeInt (st: TStream; v: LongInt); overload; begin writeIntegerLE(st, @v, 4); end;
 788 procedure writeInt (st: TStream; v: Int64); overload; begin writeIntegerLE(st, @v, 8); end;
 789 procedure writeInt (st: TStream; v: UInt64); overload; begin writeIntegerLE(st, @v, 8); end;
 790
 791 procedure writeIntBE (st: TStream; v: Byte); overload; begin writeIntegerBE(st, @v, 1); end;
 792 procedure writeIntBE (st: TStream; v: ShortInt); overload; begin writeIntegerBE(st, @v, 1); end;
 793 procedure writeIntBE (st: TStream; v: Word); overload; begin writeIntegerBE(st, @v, 2); end;
 794 procedure writeIntBE (st: TStream; v: SmallInt); overload; begin writeIntegerBE(st, @v, 2); end;
 795 procedure writeIntBE (st: TStream; v: LongWord); overload; begin writeIntegerBE(st, @v, 4); end;
 796 procedure writeIntBE (st: TStream; v: LongInt); overload; begin writeIntegerBE(st, @v, 4); end;
 797 procedure writeIntBE (st: TStream; v: Int64); overload; begin writeIntegerBE(st, @v, 8); end;
 798 procedure writeIntBE (st: TStream; v: UInt64); overload; begin writeIntegerBE(st, @v, 8); end;
 799
 800
 801 procedure readIntegerLE (st: TStream; vp: Pointer; size: Integer);
 802 {$IFDEF ENDIAN_LITTLE}
 803 begin
 804   st.readBuffer(vp^, size);
 805 end;
 806 {$ELSE}
 807 var
 808   p: PByte;
 809 begin
 810   p := PByte(vp)+size-1;
 811   while size > 0 do
 812   begin
 813     st.readBuffer(p^, 1);
 814     Dec(size);
 815     Dec(p);
 816   end;
 817 end;
 818 {$ENDIF}
 819
 820 procedure readIntegerBE (st: TStream; vp: Pointer; size: Integer);
 821 {$IFDEF ENDIAN_LITTLE}
 822 var
 823   p: PByte;
 824 begin
 825   p := PByte(vp)+size-1;
 826   while size > 0 do
 827   begin
 828     st.readBuffer(p^, 1);
 829     Dec(size);
 830     Dec(p);
 831   end;
 832 end;
 833 {$ELSE}
 834 begin
 835   st.readBuffer(vp^, size);
 836 end;
 837 {$ENDIF}
 838
 839 function readByte (st: TStream): Byte; begin readIntegerLE(st, @result, 1); end;
 840 function readShortInt (st: TStream): ShortInt; begin readIntegerLE(st, @result, 1); end;
 841 function readWord (st: TStream): Word; begin readIntegerLE(st, @result, 2); end;
 842 function readSmallInt (st: TStream): SmallInt; begin readIntegerLE(st, @result, 2); end;
 843 function readLongWord (st: TStream): LongWord; begin readIntegerLE(st, @result, 4); end;
 844 function readLongInt (st: TStream): LongInt; begin readIntegerLE(st, @result, 4); end;
 845 function readInt64 (st: TStream): Int64; begin readIntegerLE(st, @result, 8); end;
 846 function readUInt64 (st: TStream): UInt64; begin readIntegerLE(st, @result, 8); end;
 847
 848 function readByteBE (st: TStream): Byte; begin readIntegerBE(st, @result, 1); end;
 849 function readShortIntBE (st: TStream): ShortInt; begin readIntegerBE(st, @result, 1); end;
 850 function readWordBE (st: TStream): Word; begin readIntegerBE(st, @result, 2); end;
 851 function readSmallIntBE (st: TStream): SmallInt; begin readIntegerBE(st, @result, 2); end;
 852 function readLongWordBE (st: TStream): LongWord; begin readIntegerBE(st, @result, 4); end;
 853 function readLongIntBE (st: TStream): LongInt; begin readIntegerBE(st, @result, 4); end;
 854 function readInt64BE (st: TStream): Int64; begin readIntegerBE(st, @result, 8); end;
 855 function readUInt64BE (st: TStream): UInt64; begin readIntegerBE(st, @result, 8); end;
 856
 857 end.
 858