From d4c1e78fe6bfb7cbbff5ced6b94d6e630e06d6f2 Mon Sep 17 00:00:00 2001 From: DeaDDooMER Date: Wed, 27 Sep 2023 09:33:16 +0300 Subject: [PATCH] utils: fix encoding conversion utf8/cp1251 --- src/flexui/sdlcarcass.pas | 40 +--------------------------------- src/shared/envvars.pas | 10 ++------- src/shared/utils.pas | 45 ++++++++++++++++++++++++--------------- 3 files changed, 31 insertions(+), 64 deletions(-) diff --git a/src/flexui/sdlcarcass.pas b/src/flexui/sdlcarcass.pas index 247229b..0e2c91b 100644 --- a/src/flexui/sdlcarcass.pas +++ b/src/flexui/sdlcarcass.pas @@ -67,7 +67,7 @@ property fuiFPS: Integer read getFUIFPS write setFUIFPS; // default: 30 implementation uses - SysUtils, Classes, + SysUtils, Classes, utils, {$INCLUDE ../nogl/noGLuses.inc} {$IF DEFINED(LINUX) OR DEFINED(ANDROID)} unixtype, linux @@ -142,44 +142,6 @@ begin end; -// ////////////////////////////////////////////////////////////////////////// // -var - wc2shitmap: array[0..65535] of AnsiChar; - wc2shitmapInited: Boolean = false; - - -// ////////////////////////////////////////////////////////////////////////// // -const - cp1251: array[0..127] of Word = ( - $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, - $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F, - $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, - $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, - $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, - $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F, - $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F, - $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F - ); - - -procedure initShitMap (); -var - f: Integer; -begin - for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?'; - for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f); - for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128); - wc2shitmapInited := true; -end; - - -function wchar2win (wc: WideChar): AnsiChar; inline; -begin - if not wc2shitmapInited then initShitMap(); - if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)]; -end; - - // ////////////////////////////////////////////////////////////////////////// // function fuiOnSDLEvent (var ev: TSDL_Event): Boolean; var diff --git a/src/shared/envvars.pas b/src/shared/envvars.pas index a76315f..a15feef 100644 --- a/src/shared/envvars.pas +++ b/src/shared/envvars.pas @@ -63,14 +63,8 @@ end; {$ELSE} Result := ''; {$ENDIF} - (* invalidate username with non-cp1251 symbols *) - i := Low(Result); - while i <= High(Result) do - begin - if Result[i] = '?' then - Result := ''; - Inc(i) - end + // Remove non 1251 chars + Result := StringReplace(Result, Invalid1251Char, '', [rfReplaceAll]); end; end. diff --git a/src/shared/utils.pas b/src/shared/utils.pas index c0ba824..4116c3f 100644 --- a/src/shared/utils.pas +++ b/src/shared/utils.pas @@ -12,6 +12,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . *) +{$DEFINE D2DF_FORCE_OBJFPC} {$INCLUDE a_modes.inc} unit utils; @@ -26,6 +27,10 @@ type SSArray = array of ShortString; +const + Invalid1251Char = #$98; // Undefined 1251 char, we use it as replacement for unknown chars + InvalidUnicodeCodepoint = $FFFD; // Unicode REPLACEMENT CHARACTER used to replace an unknown, unrecognised, or unrepresentable character + const wadExtensions: array [0..6] of AnsiString = ( '.dfz', '.wad', @@ -47,7 +52,7 @@ const NilThreadId = 0; type TUtf8DecoderFast = packed record public - const Replacement = $FFFD; // replacement char for invalid unicode + const Replacement = InvalidUnicodeCodepoint; // replacement char for invalid unicode const Accept = 0; const Reject = 12; @@ -132,6 +137,7 @@ function utf8to1251 (s: AnsiString): AnsiString; // necessarily cleared). // last name assumed to be a file, not directory (unless `lastIsDir` flag is set). function findFileCI (var pathname: AnsiString; lastIsDir: Boolean=false): Boolean; +function findFileCIStr (pathname: AnsiString): AnsiString; // findDiskWad tries to find the wad file using common wad extensions // (see `wadExtensions` array). @@ -333,8 +339,8 @@ function GetDiskFileInfo (fname: AnsiString; var info: TDiskFileInfo): Boolean; implementation -uses - xstreams; +//uses +// xstreams; // ////////////////////////////////////////////////////////////////////////// // procedure CopyMemory (Dest: Pointer; Src: Pointer; Len: LongWord); inline; @@ -540,7 +546,7 @@ var const cp1251: array[0..127] of Word = ( $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, - $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$FFFD,$2122,$0459,$203A,$045A,$045C,$045B,$045F, + $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,InvalidUnicodeCodepoint,$2122,$0459,$203A,$045A,$045C,$045B,$045F, $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, @@ -554,7 +560,7 @@ procedure initShitMap (); var f: Integer; begin - for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?'; + for f := 0 to High(wc2shitmap) do wc2shitmap[f] := Invalid1251Char; for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f); for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128); wc2shitmapInited := true; @@ -622,7 +628,7 @@ end; function wchar2win (wc: WideChar): AnsiChar; inline; begin if not wc2shitmapInited then initShitMap(); - if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)]; + if (LongWord(wc) > 65535) then result := Invalid1251Char else result := wc2shitmap[LongWord(wc)]; end; @@ -655,7 +661,8 @@ var function utf8Encode (code: Integer): AnsiString; begin - if (code < 0) or (code > $10FFFF) then begin result := '?'; exit; end; + if (code < 0) or (code > $10FFFF) then + code := InvalidUnicodeCodepoint; if (code <= $7f) then begin result := AnsiChar(code and $ff); @@ -677,10 +684,6 @@ var result += AnsiChar($80 or ((code shr 12) and $3F)); result += AnsiChar($80 or ((code shr 6) and $3F)); result += AnsiChar($80 or (code and $3F)); - end - else - begin - result := '?'; end; end; @@ -1048,12 +1051,12 @@ end; function IsValid1251 (ch: Word): Boolean; begin - result := ((ch = Ord('?')) or (wc2shitmap[ch] <> '?')) and (wc2shitmap[ch] <> #$98) + result := wc2shitmap[ch] <> Invalid1251Char end; function IsPrintable1251 (ch: AnsiChar): Boolean; begin - result := (ch >= #32) and (ch <> #127) and (ch <> #$98) + result := (ch >= #32) and (ch <> #127) and (ch <> Invalid1251Char) end; @@ -1133,7 +1136,7 @@ end; const uni2wint: array [128..255] of Word = ( $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, - $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F, + $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,InvalidUnicodeCodepoint,$2122,$0459,$203A,$045A,$045C,$045B,$045F, $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, @@ -1156,7 +1159,7 @@ begin * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx *) - result := '?'; + result := Invalid1251Char; if pos > length(s) then exit; b := Byte(s[pos]); @@ -1274,6 +1277,14 @@ begin end; +function findFileCIStr (pathname: AnsiString): AnsiString; +begin + Result := pathname; + if findFileCI(Result) = False then + Result := pathname; +end; + + function isWadNamesEqu (wna, wnb: AnsiString): Boolean; var ext, newExt: AnsiString; @@ -1457,7 +1468,7 @@ function readBool (st: TStream): Boolean; begin result := (readByte(st) <> 0); e procedure writeStr (st: TStream; const str: AnsiString; maxlen: LongWord=65535); begin - if (Length(str) > maxlen) then raise XStreamError.Create('string too long'); + if (Length(str) > maxlen) then raise EStreamError.Create('string too long'); if (maxlen <= 65535) then writeInt(st, Word(Length(str))) else writeInt(st, LongWord(Length(str))); if (Length(str) > 0) then st.WriteBuffer(str[1], Length(str)); end; @@ -1468,7 +1479,7 @@ var begin result := ''; if (maxlen <= 65535) then len := readWord(st) else len := Integer(readLongWord(st)); - if (len < 0) or (len > maxlen) then raise XStreamError.Create('string too long'); + if (len < 0) or (len > maxlen) then raise EStreamError.Create('string too long'); if (len > 0) then begin SetLength(result, len); -- 2.29.2