Conversation
add option "charset" to support double-byte encoding language,
this can fix chinese word erro show.
```
options = utils.extend(options || {}, {
base64: false,
checkCRC32: false,
optimizedBinaryString: false,
createFolders: false,
decodeFileName: utf8.utf8decode,
charset:'gbk'
});
```
decode:function(u8,i){
i = i||0;
var charset = this.loadOptions.charset;
if(charset&&charset!='utf8'){
for(;i<u8.byteLength;i++){
if(u8[i]>127){
//not a ascii
var utf8 = false;
var k=0;
for(var j=1;j<u8[i].toString(2).split('0')[0].length;j++){
if(u8[i+j]>>6==2){
//10xxxxxx
k+=1;
}
}
if(k>0&&k==j-1&&u8[i+j]>>6!=2){
if(k==1&&charset=='gbk'){
//double byte
//some gbk will erro
//return this.decode(u8,j);
}else{
utf8 = true;
}
}
if(utf8===false)return new TextDecoder(charset).decode(u8);
break;
}
}
}
return new TextDecoder().decode(u8);
},
handleUTF8: function() {
var charset = this.loadOptions.charset,
utf8decode = utf8.utf8decode,
decode = this.loadOptions.decodeFileName||utf8decode;
if(charset&&'TextDecoder' in window&&'Uint8Array' in window){
this.fileNameStr = this.decode(this.fileName);
this.fileCommentStr = this.decode(this.fileComment);
}else if(this.useUTF8()){
this.fileNameStr = utf8decode(this.fileName);
this.fileCommentStr = utf8decode(this.fileComment);
}else{
var decodeParamType = support.uint8array ? "uint8array" : "array";
var upath = this.findExtraFieldUnicodePath();
if (upath !== null) {
this.fileNameStr = upath;
} else {
// ASCII text or unsupported code page
var fileNameByteArray = utils.transformTo(decodeParamType, this.fileName);
this.fileNameStr = decode(fileNameByteArray);
}
var ucomment = this.findExtraFieldUnicodeComment();
if (ucomment !== null) {
this.fileCommentStr = ucomment;
} else {
// ASCII text or unsupported code page
var commentByteArray = utils.transformTo(decodeParamType, this.fileComment);
this.fileCommentStr = decode(commentByteArray);
}
}
}, |
let the "Options" support "charset" decode filename
|
Thanks for the PR! Does this only work for And a few thoughts/comments:
|
|
add option "charset" to support double-byte encoding language,
this can fix chinese word erro show.