From 6f8bfcced2d551b834fadcfcf2d6d1caed40fe98 Mon Sep 17 00:00:00 2001 From: Ketmar Dark Date: Fri, 15 Apr 2016 13:14:12 +0300 Subject: [PATCH] sfs: hackfix for utf-8 encoded names in zips --- src/sfs/sfs.pas | 124 ++++++++++++++++++++++++++++++++++++++- src/sfs/sfsZipFS.pas | 52 ++++++++++++++-- src/shared/WADEDITOR.pas | 2 +- 3 files changed, 172 insertions(+), 6 deletions(-) diff --git a/src/sfs/sfs.pas b/src/sfs/sfs.pas index 0f2ff81..0153c91 100644 --- a/src/sfs/sfs.pas +++ b/src/sfs/sfs.pas @@ -235,6 +235,8 @@ function SFSDFPathEqu (sfspath: string; path: string): Boolean; function SFSUpCase (ch: Char): Char; +function utf8to1251 (s: TSFSString): TSFSString; + var // ïðàâäà: ðàçðåøåíî èñêàòü ôàéëî íå òîëüêî â ôàéëàõ äàííûõ, íî è íà äèñêå. @@ -800,7 +802,8 @@ begin ReadDirectory(); fFiles.Pack(); - for f := 0 to fFiles.Count-1 do + f := 0; + while f < fFiles.Count do begin sfi := TSFSFileInfo(fFiles[f]); // normalize name & path @@ -818,6 +821,7 @@ begin sfi.fPath := sfi.fPath+tmp; end; sfi.fPath := normalizePath(sfi.fPath); + if (length(sfi.fPath) = 0) and (length(sfi.fName) = 0) then sfi.Free else Inc(f); end; removeCommonPath(); end; @@ -1282,6 +1286,124 @@ begin end; +// ////////////////////////////////////////////////////////////////////////// // +// utils +// `ch`: utf8 start +// -1: invalid utf8 +function utf8CodeLen (ch: Word): Integer; +begin + if ch < $80 then begin result := 1; exit; end; + if (ch and $FE) = $FC then begin result := 6; exit; end; + if (ch and $FC) = $F8 then begin result := 5; exit; end; + if (ch and $F8) = $F0 then begin result := 4; exit; end; + if (ch and $F0) = $E0 then begin result := 3; exit; end; + if (ch and $E0) = $C0 then begin result := 2; exit; end; + result := -1; // invalid +end; + + +function utf8Valid (s: string): Boolean; +var + pos, len: Integer; +begin + result := false; + pos := 1; + while pos <= length(s) do + begin + len := utf8CodeLen(Byte(s[pos])); + if len < 1 then exit; // invalid sequence start + if pos+len-1 > length(s) then exit; // out of chars in string + Dec(len); + Inc(pos); + // check other sequence bytes + while len > 0 do + begin + if (Byte(s[pos]) and $C0) <> $80 then exit; + Dec(len); + Inc(pos); + end; + end; + result := true; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +const + // TODO: move this to a separate file + uni2wint: array [128..255] of Word = ( + $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, + $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F, + $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, + $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, + $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, + $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F, + $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F, + $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F + ); + + +function decodeUtf8Char (s: TSFSString; var pos: Integer): char; +var + b, c: Integer; +begin + (* The following encodings are valid, except for the 5 and 6 byte + * combinations: + * 0xxxxxxx + * 110xxxxx 10xxxxxx + * 1110xxxx 10xxxxxx 10xxxxxx + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + *) + result := '?'; + if pos > length(s) then exit; + + b := Byte(s[pos]); + Inc(pos); + if b < $80 then begin result := char(b); exit; end; + + // mask out unused bits + if (b and $FE) = $FC then b := b and $01 + else if (b and $FC) = $F8 then b := b and $03 + else if (b and $F8) = $F0 then b := b and $07 + else if (b and $F0) = $E0 then b := b and $0F + else if (b and $E0) = $C0 then b := b and $1F + else exit; // invalid utf8 + + // now continue + while pos <= length(s) do + begin + c := Byte(s[pos]); + if (c and $C0) <> $80 then break; // no more + b := b shl 6; + b := b or (c and $3F); + Inc(pos); + end; + + // done, try 1251 + for c := 128 to 255 do if uni2wint[c] = b then begin result := char(c and $FF); exit; end; + // alas +end; + + +function utf8to1251 (s: TSFSString): TSFSString; +var + pos: Integer; +begin + if not utf8Valid(s) then begin result := s; exit; end; + pos := 1; + while pos <= length(s) do + begin + if Byte(s[pos]) >= $80 then break; + Inc(pos); + end; + if pos > length(s) then begin result := s; exit; end; // nothing to do here + result := ''; + pos := 1; + while pos <= length(s) do result := result+decodeUtf8Char(s, pos); +end; + + initialization factories := TObjectList.Create(true); volumes := TObjectList.Create(true); diff --git a/src/sfs/sfsZipFS.pas b/src/sfs/sfsZipFS.pas index ee1cc1c..4abd880 100644 --- a/src/sfs/sfsZipFS.pas +++ b/src/sfs/sfsZipFS.pas @@ -215,6 +215,9 @@ var crc, psz, usz: LongWord; buf: packed array of Byte; bufPos, bufUsed: Integer; + efid, efsz: Word; + izver: Byte; + izcrc: LongWord; begin SetLength(buf, 0); // read local directory @@ -230,12 +233,54 @@ begin fi.fPackSz := 0; fi.fMethod := 0; + //fi.fOfs := fFileStream.Position; + fFileStream.ReadBuffer(lhdr, SizeOf(lhdr)); if lhdr.fnameSz > 255 then name[0] := #255 else name[0] := chr(lhdr.fnameSz); fFileStream.ReadBuffer(name[1], Length(name)); fFileStream.Seek(lhdr.fnameSz-Length(name), soCurrent); // rest of the name (if any) - fi.fName := name; - fFileStream.Seek(lhdr.localExtraSz, soCurrent); + fi.fName := utf8to1251(name); + //writeln(Format('0x%08x : %s', [Integer(fi.fOfs), name])); + + // here we should process extra field: it may contain utf8 filename + //fFileStream.Seek(lhdr.localExtraSz, soCurrent); + while lhdr.localExtraSz >= 4 do + begin + efid := 0; + efsz := 0; + fFileStream.ReadBuffer(efid, 2); + fFileStream.ReadBuffer(efsz, 2); + Dec(lhdr.localExtraSz, 4); + if efsz > lhdr.localExtraSz then break; + // Info-ZIP Unicode Path Extra Field? + if (efid = $7075) and (efsz <= 255+5) and (efsz > 5) then + begin + fFileStream.ReadBuffer(izver, 1); + if izver <> 1 then + begin + // skip it + Dec(efsz, 1); + end + else + begin + Dec(lhdr.localExtraSz, efsz); + fFileStream.ReadBuffer(izcrc, 4); // name crc, ignore it + Dec(efsz, 5); + name[0] := chr(efsz); + fFileStream.ReadBuffer(name[1], Length(name)); + fi.fName := utf8to1251(name); + break; + end; + end; + // skip it + if efsz > 0 then + begin + fFileStream.Seek(efsz, soCurrent); + Dec(lhdr.localExtraSz, efsz); + end; + end; + // skip the rest + if lhdr.localExtraSz > 0 then fFileStream.Seek(lhdr.localExtraSz, soCurrent); if (lhdr.flags and 1) <> 0 then begin @@ -284,8 +329,7 @@ begin begin bufPos := 0; // int64! - if fFileStream.Size-fFileStream.Position > Length(buf) then - bufUsed := Length(buf) + if fFileStream.Size-fFileStream.Position > Length(buf) then bufUsed := Length(buf) else bufUsed := fFileStream.Size-fFileStream.Position; if bufUsed = 0 then raise ESFSError.Create('invalid ZIP file'); fFileStream.ReadBuffer(buf[0], bufUsed); diff --git a/src/shared/WADEDITOR.pas b/src/shared/WADEDITOR.pas index 079dc7d..ac9e747 100644 --- a/src/shared/WADEDITOR.pas +++ b/src/shared/WADEDITOR.pas @@ -169,7 +169,7 @@ begin begin fi := fIter.Files[f]; if fi = nil then continue; - //e_WriteLog(Format('DFWAD: searching for [%s : %s] in [%s]; current is [%s : %s] (%d, %d)', [Section, Resource, fFileName, fi.path, fi.name, SFSStrEqu(fi.path, Section), SFSStrEqu(fi.name, Resource)]), MSG_NOTIFY); + //e_WriteLog(Format('DFWAD: searching for [%s : %s] in [%s]; current is [%s : %s]', [Section, Resource, fFileName, fi.path, fi.name]), MSG_NOTIFY); if {SFSStrEqu}SFSDFPathEqu(fi.path, Section) and SFSStrEqu(removeExt(fi.name), Resource) then begin // i found her! -- 2.29.2