X-Git-Url: http://deadsoftware.ru/gitweb?p=d2df-sdl.git;a=blobdiff_plain;f=src%2Fsfs%2Fsfs.pas;h=0153c91379793a6ad188f32819c15fffa34fdec0;hp=0f2ff81379c0802bb6936867fa1ee693d17dc1e5;hb=6f8bfcced2d551b834fadcfcf2d6d1caed40fe98;hpb=fd5ef4c3c2e4c56f7b1547318b430a5d10b6860e diff --git a/src/sfs/sfs.pas b/src/sfs/sfs.pas index 0f2ff81..0153c91 100644 --- a/src/sfs/sfs.pas +++ b/src/sfs/sfs.pas @@ -235,6 +235,8 @@ function SFSDFPathEqu (sfspath: string; path: string): Boolean; function SFSUpCase (ch: Char): Char; +function utf8to1251 (s: TSFSString): TSFSString; + var // ïðàâäà: ðàçðåøåíî èñêàòü ôàéëî íå òîëüêî â ôàéëàõ äàííûõ, íî è íà äèñêå. @@ -800,7 +802,8 @@ begin ReadDirectory(); fFiles.Pack(); - for f := 0 to fFiles.Count-1 do + f := 0; + while f < fFiles.Count do begin sfi := TSFSFileInfo(fFiles[f]); // normalize name & path @@ -818,6 +821,7 @@ begin sfi.fPath := sfi.fPath+tmp; end; sfi.fPath := normalizePath(sfi.fPath); + if (length(sfi.fPath) = 0) and (length(sfi.fName) = 0) then sfi.Free else Inc(f); end; removeCommonPath(); end; @@ -1282,6 +1286,124 @@ begin end; +// ////////////////////////////////////////////////////////////////////////// // +// utils +// `ch`: utf8 start +// -1: invalid utf8 +function utf8CodeLen (ch: Word): Integer; +begin + if ch < $80 then begin result := 1; exit; end; + if (ch and $FE) = $FC then begin result := 6; exit; end; + if (ch and $FC) = $F8 then begin result := 5; exit; end; + if (ch and $F8) = $F0 then begin result := 4; exit; end; + if (ch and $F0) = $E0 then begin result := 3; exit; end; + if (ch and $E0) = $C0 then begin result := 2; exit; end; + result := -1; // invalid +end; + + +function utf8Valid (s: string): Boolean; +var + pos, len: Integer; +begin + result := false; + pos := 1; + while pos <= length(s) do + begin + len := utf8CodeLen(Byte(s[pos])); + if len < 1 then exit; // invalid sequence start + if pos+len-1 > length(s) then exit; // out of chars in string + Dec(len); + Inc(pos); + // check other sequence bytes + while len > 0 do + begin + if (Byte(s[pos]) and $C0) <> $80 then exit; + Dec(len); + Inc(pos); + end; + end; + result := true; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +const + // TODO: move this to a separate file + uni2wint: array [128..255] of Word = ( + $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, + $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F, + $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, + $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, + $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, + $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F, + $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F, + $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F + ); + + +function decodeUtf8Char (s: TSFSString; var pos: Integer): char; +var + b, c: Integer; +begin + (* The following encodings are valid, except for the 5 and 6 byte + * combinations: + * 0xxxxxxx + * 110xxxxx 10xxxxxx + * 1110xxxx 10xxxxxx 10xxxxxx + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + *) + result := '?'; + if pos > length(s) then exit; + + b := Byte(s[pos]); + Inc(pos); + if b < $80 then begin result := char(b); exit; end; + + // mask out unused bits + if (b and $FE) = $FC then b := b and $01 + else if (b and $FC) = $F8 then b := b and $03 + else if (b and $F8) = $F0 then b := b and $07 + else if (b and $F0) = $E0 then b := b and $0F + else if (b and $E0) = $C0 then b := b and $1F + else exit; // invalid utf8 + + // now continue + while pos <= length(s) do + begin + c := Byte(s[pos]); + if (c and $C0) <> $80 then break; // no more + b := b shl 6; + b := b or (c and $3F); + Inc(pos); + end; + + // done, try 1251 + for c := 128 to 255 do if uni2wint[c] = b then begin result := char(c and $FF); exit; end; + // alas +end; + + +function utf8to1251 (s: TSFSString): TSFSString; +var + pos: Integer; +begin + if not utf8Valid(s) then begin result := s; exit; end; + pos := 1; + while pos <= length(s) do + begin + if Byte(s[pos]) >= $80 then break; + Inc(pos); + end; + if pos > length(s) then begin result := s; exit; end; // nothing to do here + result := ''; + pos := 1; + while pos <= length(s) do result := result+decodeUtf8Char(s, pos); +end; + + initialization factories := TObjectList.Create(true); volumes := TObjectList.Create(true);