src/shared/utils.pas

   1 (* Copyright (C)  DooM 2D:Forever Developers
   2  *
   3  * This program is free software: you can redistribute it and/or modify
   4  * it under the terms of the GNU General Public License as published by
   5  * the Free Software Foundation, either version 3 of the License, or
   6  * (at your option) any later version.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11  * GNU General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public License
  14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15  *)
  16 {$INCLUDE a_modes.inc}
  17 unit utils;
  18
  19 interface
  20
  21 uses
  22   SysUtils, Classes;
  23
  24
  25 // ////////////////////////////////////////////////////////////////////////// //
  26 type
  27   TUtf8DecoderFast = packed record
  28   public
  29     const Replacement = $FFFD; // replacement char for invalid unicode
  30     const Accept = 0;
  31     const Reject = 12;
  32
  33   private
  34     state: LongWord;
  35
  36   public
  37     codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state)
  38
  39   public
  40     constructor Create (v: Boolean{fuck you, fpc});
  41
  42     procedure reset (); inline;
  43
  44     function complete (): Boolean; inline; // is current character complete? take `codepoint` then
  45     function invalid (): Boolean; inline;
  46     function completeOrInvalid (): Boolean; inline;
  47
  48     // process one byte, return `true` if codepoint is ready
  49     function decode (b: Byte): Boolean; inline; overload;
  50     function decode (c: AnsiChar): Boolean; inline; overload;
  51   end;
  52
  53
  54 // ////////////////////////////////////////////////////////////////////////// //
  55 // does filename have one of ".wad", ".pk3", ".zip" extensions?
  56 function hasWadExtension (fn: AnsiString): Boolean;
  57
  58 // does filepath have ".XXX:\" in it?
  59 function isWadPath (fn: AnsiString): Boolean;
  60
  61 // adds ".wad" extension if filename doesn't have one of ".wad", ".pk3", ".zip"
  62 function addWadExtension (fn: AnsiString): AnsiString;
  63
  64 // convert number to strig with nice commas
  65 function Int64ToStrComma (i: Int64): AnsiString;
  66
  67 function UpCase1251 (ch: Char): Char;
  68 function LoCase1251 (ch: Char): Char;
  69
  70 // `true` if strings are equal; ignoring case for cp1251
  71 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
  72
  73 function utf8Valid (const s: AnsiString): Boolean;
  74
  75 function utf8to1251 (s: AnsiString): AnsiString;
  76
  77 // `pathname` will be modified if path is valid
  78 // `lastIsDir` should be `true` if we are searching for directory
  79 // nobody cares about shitdoze, so i'll use the same code path for it
  80 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
  81 function findFileCIStr (pathname: AnsiString): AnsiString;
  82
  83 // they throws
  84 function openDiskFileRO (pathname: AnsiString): TStream;
  85 function createDiskFile (pathname: AnsiString): TStream;
  86
  87 // little endian
  88 procedure writeInt (st: TStream; v: Byte); overload;
  89 procedure writeInt (st: TStream; v: ShortInt); overload;
  90 procedure writeInt (st: TStream; v: Word); overload;
  91 procedure writeInt (st: TStream; v: SmallInt); overload;
  92 procedure writeInt (st: TStream; v: LongWord); overload;
  93 procedure writeInt (st: TStream; v: LongInt); overload;
  94 procedure writeInt (st: TStream; v: Int64); overload;
  95 procedure writeInt (st: TStream; v: UInt64); overload;
  96
  97 function readByte (st: TStream): Byte;
  98 function readShortInt (st: TStream): ShortInt;
  99 function readWord (st: TStream): Word;
 100 function readSmallInt (st: TStream): SmallInt;
 101 function readLongWord (st: TStream): LongWord;
 102 function readLongInt (st: TStream): LongInt;
 103 function readInt64 (st: TStream): Int64;
 104 function readUInt64 (st: TStream): UInt64;
 105
 106 // big endian
 107 procedure writeIntBE (st: TStream; v: Byte); overload;
 108 procedure writeIntBE (st: TStream; v: ShortInt); overload;
 109 procedure writeIntBE (st: TStream; v: Word); overload;
 110 procedure writeIntBE (st: TStream; v: SmallInt); overload;
 111 procedure writeIntBE (st: TStream; v: LongWord); overload;
 112 procedure writeIntBE (st: TStream; v: LongInt); overload;
 113 procedure writeIntBE (st: TStream; v: Int64); overload;
 114 procedure writeIntBE (st: TStream; v: UInt64); overload;
 115
 116 function readByteBE (st: TStream): Byte;
 117 function readShortIntBE (st: TStream): ShortInt;
 118 function readWordBE (st: TStream): Word;
 119 function readSmallIntBE (st: TStream): SmallInt;
 120 function readLongWordBE (st: TStream): LongWord;
 121 function readLongIntBE (st: TStream): LongInt;
 122 function readInt64BE (st: TStream): Int64;
 123 function readUInt64BE (st: TStream): UInt64;
 124
 125
 126 type
 127   TFormatStrFCallback = procedure (constref buf; len: SizeUInt);
 128
 129 function wchar2win (wc: WideChar): AnsiChar; inline;
 130 function utf2win (const s: AnsiString): AnsiString;
 131 function win2utf (const s: AnsiString): AnsiString;
 132 function digitInBase (ch: AnsiChar; base: Integer): Integer;
 133
 134 // returns string in single or double quotes
 135 // single quotes supports only pascal-style '' for single quote char
 136 // double quotes supports c-style escapes
 137 // function will select quote mode automatically
 138 function quoteStr (const s: AnsiString): AnsiString;
 139
 140
 141 // ////////////////////////////////////////////////////////////////////////// //
 142 var
 143   wc2shitmap: array[0..65535] of AnsiChar;
 144   wc2shitmapInited: Boolean = false;
 145
 146
 147 // ////////////////////////////////////////////////////////////////////////// //
 148 const
 149   cp1251: array[0..127] of Word = (
 150     $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
 151     $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
 152     $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
 153     $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
 154     $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
 155     $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
 156     $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
 157     $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
 158   );
 159
 160 implementation
 161
 162 procedure initShitMap ();
 163 var
 164   f: Integer;
 165 begin
 166   for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?';
 167   for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f);
 168   for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128);
 169   wc2shitmapInited := true;
 170 end;
 171
 172
 173 // ////////////////////////////////////////////////////////////////////////// //
 174 // fast state-machine based UTF-8 decoder; using 8 bytes of memory
 175 // code points from invalid range will never be valid, this is the property of the state machine
 176 const
 177   // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
 178   utf8dfa: array[0..$16c-1] of Byte = (
 179     // maps bytes to character classes
 180     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f
 181     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f
 182     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f
 183     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f
 184     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f
 185     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f
 186     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f
 187     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f
 188     $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f
 189     $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f
 190     $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af
 191     $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf
 192     $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf
 193     $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df
 194     $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef
 195     $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff
 196     // maps a combination of a state of the automaton and a character class to a state
 197     $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f
 198     $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f
 199     $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f
 200     $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f
 201     $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f
 202     $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f
 203     $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c);
 204
 205
 206 // ////////////////////////////////////////////////////////////////////////// //
 207 constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end;
 208
 209 procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end;
 210
 211 function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end;
 212 function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end;
 213 function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end;
 214
 215 function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end;
 216
 217 function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload;
 218 var
 219   tp: LongWord;
 220 begin
 221   if (state = Reject) then begin state := Accept; codepoint := 0; end;
 222   tp := utf8dfa[b];
 223   if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b;
 224   state := utf8dfa[256+state+tp];
 225   if (state = Reject) then begin codepoint := Replacement; state := Accept; end;
 226   result := (state = Accept);
 227 end;
 228
 229
 230 // ////////////////////////////////////////////////////////////////////////// //
 231 function wchar2win (wc: WideChar): AnsiChar; inline;
 232 begin
 233   if not wc2shitmapInited then initShitMap();
 234   if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)];
 235 end;
 236
 237
 238 // ////////////////////////////////////////////////////////////////////////// //
 239 function utf2win (const s: AnsiString): AnsiString;
 240 var
 241   f, c: Integer;
 242   ud: TUtf8DecoderFast;
 243 begin
 244   for f := 1 to Length(s) do
 245   begin
 246     if (Byte(s[f]) > 127) then
 247     begin
 248       ud := TUtf8DecoderFast.Create(true);
 249       result := '';
 250       for c := 1 to Length(s) do
 251       begin
 252         if ud.decode(s[c]) then result += wchar2win(WideChar(ud.codepoint));
 253       end;
 254       exit;
 255     end;
 256   end;
 257   result := s;
 258 end;
 259
 260
 261 function win2utf (const s: AnsiString): AnsiString;
 262 var
 263   f, c: Integer;
 264
 265   function utf8Encode (code: Integer): AnsiString;
 266   begin
 267     if (code < 0) or (code > $10FFFF) then begin result := '?'; exit; end;
 268     if (code <= $7f) then
 269     begin
 270       result := Char(code and $ff);
 271     end
 272     else if (code <= $7FF) then
 273     begin
 274       result := Char($C0 or (code shr 6));
 275       result += Char($80 or (code and $3F));
 276     end
 277     else if (code <= $FFFF) then
 278     begin
 279       result := Char($E0 or (code shr 12));
 280       result += Char($80 or ((code shr 6) and $3F));
 281       result += Char($80 or (code and $3F));
 282     end
 283     else if (code <= $10FFFF) then
 284     begin
 285       result := Char($F0 or (code shr 18));
 286       result += Char($80 or ((code shr 12) and $3F));
 287       result += Char($80 or ((code shr 6) and $3F));
 288       result += Char($80 or (code and $3F));
 289     end
 290     else
 291     begin
 292       result := '?';
 293     end;
 294   end;
 295
 296 begin
 297   for f := 1 to Length(s) do
 298   begin
 299     if (Byte(s[f]) > 127) then
 300     begin
 301       result := '';
 302       for c := 1 to Length(s) do
 303       begin
 304         if (Byte(s[c]) < 128) then
 305         begin
 306           result += s[c];
 307         end
 308         else
 309         begin
 310           result += utf8Encode(cp1251[Byte(s[c])-128])
 311         end;
 312       end;
 313       exit;
 314     end;
 315   end;
 316   result := s;
 317 end;
 318
 319
 320 // ////////////////////////////////////////////////////////////////////////// //
 321 function digitInBase (ch: AnsiChar; base: Integer): Integer;
 322 begin
 323   result := -1;
 324   if (base < 1) or (base > 36) then exit;
 325   if (ch < '0') then exit;
 326   if (base <= 10) then
 327   begin
 328     if (Integer(ch) >= 48+base) then exit;
 329     result := Integer(ch)-48;
 330   end
 331   else
 332   begin
 333     if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end;
 334     if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower()
 335     if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit;
 336     result := Integer(ch)-65+10;
 337   end;
 338 end;
 339
 340
 341 // ////////////////////////////////////////////////////////////////////////// //
 342 function quoteStr (const s: AnsiString): AnsiString;
 343
 344   function squote (const s: AnsiString): AnsiString;
 345   var
 346     f: Integer;
 347   begin
 348     result := '''';
 349     for f := 1 to Length(s) do
 350     begin
 351       if (s[f] = '''') then result += '''';
 352       result += s[f];
 353     end;
 354     result += '''';
 355   end;
 356
 357   function dquote (const s: AnsiString): AnsiString;
 358   var
 359     f: Integer;
 360     ch: AnsiChar;
 361   begin
 362     result := '"';
 363     for f := 1 to Length(s) do
 364     begin
 365       ch := s[f];
 366            if (ch = #0) then result += '\z'
 367       else if (ch = #9) then result += '\t'
 368       else if (ch = #10) then result += '\n'
 369       else if (ch = #13) then result += '\r'
 370       else if (ch = #27) then result += '\e'
 371       else if (ch < ' ') or (ch = #127) then
 372       begin
 373         result += '\x';
 374         result += LowerCase(IntToHex(Integer(ch), 2));
 375       end
 376       else if (ch = '"') or (ch = '\') then
 377       begin
 378         result += '\';
 379         result += ch;
 380       end
 381       else
 382       begin
 383         result += ch;
 384       end;
 385     end;
 386     result += '"';
 387   end;
 388
 389 var
 390   needSingle: Boolean = false;
 391   f: Integer;
 392 begin
 393   for f := 1 to Length(s) do
 394   begin
 395     if (s[f] = '''') then begin needSingle := true; continue; end;
 396     if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end;
 397   end;
 398   if needSingle then result := squote(s) else result := ''''+s+'''';
 399 end;
 400
 401
 402 // ////////////////////////////////////////////////////////////////////////// //
 403 function hasWadExtension (fn: AnsiString): Boolean;
 404 begin
 405   fn := ExtractFileExt(fn);
 406   result := StrEquCI1251(fn, '.wad') or StrEquCI1251(fn, '.pk3') or StrEquCI1251(fn, '.zip');
 407 end;
 408
 409
 410 function addWadExtension (fn: AnsiString): AnsiString;
 411 begin
 412   result := fn;
 413   if not hasWadExtension(result) then result := result+'.wad';
 414 end;
 415
 416
 417 function isWadPath (fn: AnsiString): Boolean;
 418 var
 419   p: Integer;
 420   s: AnsiString;
 421 begin
 422   result := false;
 423   while true do
 424   begin
 425     p := Pos(':', fn);
 426     if (p = 0) or (length(fn)-p < 1) then break;
 427     if (p-4 > 1) and (fn[p-4] = '.') and ((fn[p+1] = '\') or (fn[p+1] = '/')) then
 428     begin
 429       s := Copy(fn, p-4, 4);
 430       if StrEquCI1251(s, '.wad') or StrEquCI1251(s, '.pk3') or StrEquCI1251(s, '.zip') then
 431       begin
 432         result := true;
 433         exit;
 434       end;
 435     end;
 436     Delete(fn, 1, p);
 437   end;
 438 end;
 439
 440
 441 function Int64ToStrComma (i: Int64): AnsiString;
 442 var
 443   f: Integer;
 444 begin
 445   Str(i, result);
 446   f := Length(result)+1;
 447   while f > 4 do
 448   begin
 449     Dec(f, 3); Insert(',', result, f);
 450   end;
 451 end;
 452
 453
 454 function UpCase1251 (ch: Char): Char;
 455 begin
 456   if ch < #128 then
 457   begin
 458     if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32);
 459   end
 460   else
 461   begin
 462     if (ch >= #224) and (ch <= #255) then
 463     begin
 464       Dec(ch, 32);
 465     end
 466     else
 467     begin
 468       case ch of
 469         #184, #186, #191: Dec(ch, 16);
 470         #162, #179: Dec(ch);
 471       end;
 472     end;
 473   end;
 474   result := ch;
 475 end;
 476
 477
 478 function LoCase1251 (ch: Char): Char;
 479 begin
 480   if ch < #128 then
 481   begin
 482     if (ch >= 'A') and (ch <= 'Z') then Inc(ch, 32);
 483   end
 484   else
 485   begin
 486     if (ch >= #192) and (ch <= #223) then
 487     begin
 488       Inc(ch, 32);
 489     end
 490     else
 491     begin
 492       case ch of
 493         #168, #170, #175: Inc(ch, 16);
 494         #161, #178: Inc(ch);
 495       end;
 496     end;
 497   end;
 498   result := ch;
 499 end;
 500
 501
 502 function StrEquCI1251 (const s0, s1: AnsiString): Boolean;
 503 var
 504   i: Integer;
 505 begin
 506   result := false;
 507   if length(s0) <> length(s1) then exit;
 508   for i := 1 to length(s0) do if UpCase1251(s0[i]) <> UpCase1251(s1[i]) then exit;
 509   result := true;
 510 end;
 511
 512
 513 // ////////////////////////////////////////////////////////////////////////// //
 514 // utils
 515 // `ch`: utf8 start
 516 // -1: invalid utf8
 517 function utf8CodeLen (ch: Word): Integer;
 518 begin
 519   if ch < $80 then begin result := 1; exit; end;
 520   if (ch and $FE) = $FC then begin result := 6; exit; end;
 521   if (ch and $FC) = $F8 then begin result := 5; exit; end;
 522   if (ch and $F8) = $F0 then begin result := 4; exit; end;
 523   if (ch and $F0) = $E0 then begin result := 3; exit; end;
 524   if (ch and $E0) = $C0 then begin result := 2; exit; end;
 525   result := -1; // invalid
 526 end;
 527
 528
 529 function utf8Valid (const s: AnsiString): Boolean;
 530 var
 531   pos, len: Integer;
 532 begin
 533   result := false;
 534   pos := 1;
 535   while pos <= length(s) do
 536   begin
 537     len := utf8CodeLen(Byte(s[pos]));
 538     if len < 1 then exit; // invalid sequence start
 539     if pos+len-1 > length(s) then exit; // out of chars in string
 540     Dec(len);
 541     Inc(pos);
 542     // check other sequence bytes
 543     while len > 0 do
 544     begin
 545       if (Byte(s[pos]) and $C0) <> $80 then exit;
 546       Dec(len);
 547       Inc(pos);
 548     end;
 549   end;
 550   result := true;
 551 end;
 552
 553
 554 // ////////////////////////////////////////////////////////////////////////// //
 555 const
 556   uni2wint: array [128..255] of Word = (
 557     $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
 558     $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
 559     $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
 560     $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
 561     $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
 562     $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
 563     $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
 564     $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
 565   );
 566
 567
 568 function decodeUtf8Char (s: AnsiString; var pos: Integer): char;
 569 var
 570   b, c: Integer;
 571 begin
 572   (* The following encodings are valid, except for the 5 and 6 byte
 573    * combinations:
 574    *  0xxxxxxx
 575    *  110xxxxx 10xxxxxx
 576    *  1110xxxx 10xxxxxx 10xxxxxx
 577    *  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 578    *  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 579    *  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 580    *)
 581   result := '?';
 582   if pos > length(s) then exit;
 583
 584   b := Byte(s[pos]);
 585   Inc(pos);
 586   if b < $80 then begin result := char(b); exit; end;
 587
 588   // mask out unused bits
 589        if (b and $FE) = $FC then b := b and $01
 590   else if (b and $FC) = $F8 then b := b and $03
 591   else if (b and $F8) = $F0 then b := b and $07
 592   else if (b and $F0) = $E0 then b := b and $0F
 593   else if (b and $E0) = $C0 then b := b and $1F
 594   else exit; // invalid utf8
 595
 596   // now continue
 597   while pos <= length(s) do
 598   begin
 599     c := Byte(s[pos]);
 600     if (c and $C0) <> $80 then break; // no more
 601     b := b shl 6;
 602     b := b or (c and $3F);
 603     Inc(pos);
 604   end;
 605
 606   // done, try 1251
 607   for c := 128 to 255 do if uni2wint[c] = b then begin result := char(c and $FF); exit; end;
 608   // alas
 609 end;
 610
 611
 612 function utf8to1251 (s: AnsiString): AnsiString;
 613 var
 614   pos: Integer;
 615 begin
 616   if not utf8Valid(s) then begin result := s; exit; end;
 617   pos := 1;
 618   while pos <= length(s) do
 619   begin
 620     if Byte(s[pos]) >= $80 then break;
 621     Inc(pos);
 622   end;
 623   if pos > length(s) then begin result := s; exit; end; // nothing to do here
 624   result := '';
 625   pos := 1;
 626   while pos <= length(s) do result := result+decodeUtf8Char(s, pos);
 627 end;
 628
 629
 630 // ////////////////////////////////////////////////////////////////////////// //
 631 // `pathname` will be modified if path is valid
 632 // `lastIsDir` should be `true` if we are searching for directory
 633 // nobody cares about shitdoze, so i'll use the same code path for it
 634 function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean;
 635 var
 636   sr: TSearchRec;
 637   npt: AnsiString;
 638   newname: AnsiString = '';
 639   curname: AnsiString;
 640   wantdir: Boolean;
 641   attr: LongInt;
 642   foundher: Boolean;
 643 begin
 644   npt := pathname;
 645   result := (length(npt) > 0);
 646   if (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) then newname := '/';
 647   while length(npt) > 0 do
 648   begin
 649     // remove trailing slashes
 650     while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
 651     if length(npt) = 0 then break;
 652     // extract name
 653     curname := '';
 654     while (length(npt) > 0) and (npt[1] <> '/') and (npt[1] <> '\') do
 655     begin
 656       curname := curname+npt[1];
 657       Delete(npt, 1, 1);
 658     end;
 659     // remove trailing slashes again
 660     while (length(npt) > 0) and ((npt[1] = '/') or (npt[1] = '\')) do Delete(npt, 1, 1);
 661     wantdir := lastIsDir or (length(npt) > 0); // do we want directory here?
 662     //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
 663     // try the easiest case first
 664     attr := FileGetAttr(newname+curname);
 665     if attr <> -1 then
 666     begin
 667       if wantdir = ((attr and faDirectory) <> 0) then
 668       begin
 669         // i found her!
 670         newname := newname+curname;
 671         if wantdir then newname := newname+'/';
 672         continue;
 673       end;
 674     end;
 675     //writeln(Format('npt=[%s]; newname=[%s]; curname=[%s]; wantdir=%d', [npt, newname, curname, Integer(wantdir)]));
 676     // alas, either not found, or invalid attributes
 677     foundher := false;
 678     try
 679       if FindFirst(newname+'*', faAnyFile, sr) = 0 then
 680       repeat
 681         if (wantdir = ((sr.attr and faDirectory) <> 0)) and StrEquCI1251(sr.name, curname) then
 682         begin
 683           // i found her!
 684           newname := newname+sr.name;
 685           if wantdir then newname := newname+'/';
 686           foundher := true;
 687           break;
 688         end;
 689       until FindNext(sr) <> 0;
 690     finally
 691       FindClose(sr);
 692     end;
 693     if not foundher then begin newname := ''; result := false; break; end;
 694   end;
 695   if result then pathname := newname;
 696 end;
 697
 698 function findFileCIStr (pathname: AnsiString): AnsiString;
 699 begin
 700   result := pathname;
 701   findFileCI(result);
 702 end;
 703
 704 function openDiskFileRO (pathname: AnsiString): TStream;
 705 begin
 706   if not findFileCI(pathname) then raise Exception.Create('can''t open file "'+pathname+'"');
 707   result := TFileStream.Create(pathname, fmOpenRead or {fmShareDenyWrite}fmShareDenyNone);
 708 end;
 709
 710 function createDiskFile (pathname: AnsiString): TStream;
 711 var
 712   path: AnsiString;
 713 begin
 714   path := ExtractFilePath(pathname);
 715   if length(path) > 0 then
 716   begin
 717     if not findFileCI(path, true) then raise Exception.Create('can''t create file "'+pathname+'"');
 718   end;
 719   result := TFileStream.Create(path+ExtractFileName(pathname), fmCreate);
 720 end;
 721
 722
 723 procedure writeIntegerLE (st: TStream; vp: Pointer; size: Integer);
 724 {$IFDEF ENDIAN_LITTLE}
 725 begin
 726   st.writeBuffer(vp^, size);
 727 end;
 728 {$ELSE}
 729 var
 730   p: PByte;
 731 begin
 732   p := PByte(vp)+size-1;
 733   while size > 0 do
 734   begin
 735     st.writeBuffer(p^, 1);
 736     Dec(size);
 737     Dec(p);
 738   end;
 739 end;
 740 {$ENDIF}
 741
 742 procedure writeIntegerBE (st: TStream; vp: Pointer; size: Integer);
 743 {$IFDEF ENDIAN_LITTLE}
 744 var
 745   p: PByte;
 746 begin
 747   p := PByte(vp)+size-1;
 748   while size > 0 do
 749   begin
 750     st.writeBuffer(p^, 1);
 751     Dec(size);
 752     Dec(p);
 753   end;
 754 end;
 755 {$ELSE}
 756 begin
 757   st.writeBuffer(vp^, size);
 758 end;
 759 {$ENDIF}
 760
 761 procedure writeInt (st: TStream; v: Byte); overload; begin writeIntegerLE(st, @v, 1); end;
 762 procedure writeInt (st: TStream; v: ShortInt); overload; begin writeIntegerLE(st, @v, 1); end;
 763 procedure writeInt (st: TStream; v: Word); overload; begin writeIntegerLE(st, @v, 2); end;
 764 procedure writeInt (st: TStream; v: SmallInt); overload; begin writeIntegerLE(st, @v, 2); end;
 765 procedure writeInt (st: TStream; v: LongWord); overload; begin writeIntegerLE(st, @v, 4); end;
 766 procedure writeInt (st: TStream; v: LongInt); overload; begin writeIntegerLE(st, @v, 4); end;
 767 procedure writeInt (st: TStream; v: Int64); overload; begin writeIntegerLE(st, @v, 8); end;
 768 procedure writeInt (st: TStream; v: UInt64); overload; begin writeIntegerLE(st, @v, 8); end;
 769
 770 procedure writeIntBE (st: TStream; v: Byte); overload; begin writeIntegerBE(st, @v, 1); end;
 771 procedure writeIntBE (st: TStream; v: ShortInt); overload; begin writeIntegerBE(st, @v, 1); end;
 772 procedure writeIntBE (st: TStream; v: Word); overload; begin writeIntegerBE(st, @v, 2); end;
 773 procedure writeIntBE (st: TStream; v: SmallInt); overload; begin writeIntegerBE(st, @v, 2); end;
 774 procedure writeIntBE (st: TStream; v: LongWord); overload; begin writeIntegerBE(st, @v, 4); end;
 775 procedure writeIntBE (st: TStream; v: LongInt); overload; begin writeIntegerBE(st, @v, 4); end;
 776 procedure writeIntBE (st: TStream; v: Int64); overload; begin writeIntegerBE(st, @v, 8); end;
 777 procedure writeIntBE (st: TStream; v: UInt64); overload; begin writeIntegerBE(st, @v, 8); end;
 778
 779
 780 procedure readIntegerLE (st: TStream; vp: Pointer; size: Integer);
 781 {$IFDEF ENDIAN_LITTLE}
 782 begin
 783   st.readBuffer(vp^, size);
 784 end;
 785 {$ELSE}
 786 var
 787   p: PByte;
 788 begin
 789   p := PByte(vp)+size-1;
 790   while size > 0 do
 791   begin
 792     st.readBuffer(p^, 1);
 793     Dec(size);
 794     Dec(p);
 795   end;
 796 end;
 797 {$ENDIF}
 798
 799 procedure readIntegerBE (st: TStream; vp: Pointer; size: Integer);
 800 {$IFDEF ENDIAN_LITTLE}
 801 var
 802   p: PByte;
 803 begin
 804   p := PByte(vp)+size-1;
 805   while size > 0 do
 806   begin
 807     st.readBuffer(p^, 1);
 808     Dec(size);
 809     Dec(p);
 810   end;
 811 end;
 812 {$ELSE}
 813 begin
 814   st.readBuffer(vp^, size);
 815 end;
 816 {$ENDIF}
 817
 818 function readByte (st: TStream): Byte; begin readIntegerLE(st, @result, 1); end;
 819 function readShortInt (st: TStream): ShortInt; begin readIntegerLE(st, @result, 1); end;
 820 function readWord (st: TStream): Word; begin readIntegerLE(st, @result, 2); end;
 821 function readSmallInt (st: TStream): SmallInt; begin readIntegerLE(st, @result, 2); end;
 822 function readLongWord (st: TStream): LongWord; begin readIntegerLE(st, @result, 4); end;
 823 function readLongInt (st: TStream): LongInt; begin readIntegerLE(st, @result, 4); end;
 824 function readInt64 (st: TStream): Int64; begin readIntegerLE(st, @result, 8); end;
 825 function readUInt64 (st: TStream): UInt64; begin readIntegerLE(st, @result, 8); end;
 826
 827 function readByteBE (st: TStream): Byte; begin readIntegerBE(st, @result, 1); end;
 828 function readShortIntBE (st: TStream): ShortInt; begin readIntegerBE(st, @result, 1); end;
 829 function readWordBE (st: TStream): Word; begin readIntegerBE(st, @result, 2); end;
 830 function readSmallIntBE (st: TStream): SmallInt; begin readIntegerBE(st, @result, 2); end;
 831 function readLongWordBE (st: TStream): LongWord; begin readIntegerBE(st, @result, 4); end;
 832 function readLongIntBE (st: TStream): LongInt; begin readIntegerBE(st, @result, 4); end;
 833 function readInt64BE (st: TStream): Int64; begin readIntegerBE(st, @result, 8); end;
 834 function readUInt64BE (st: TStream): UInt64; begin readIntegerBE(st, @result, 8); end;
 835
 836 end.
 837