unicode to array of utf8速度比較

Uint8Arrayを使うのが正解。
実装が簡単(タイプ数が少ない)なのはFileReaderを使用した場合。

http://jsperf.com/test-unicode-to-utf8
http://jsperf.com/unicode-to-utf8-long

あー、uriにtestつけるの忘れてる。

function unicode2utf8_array(str){
var n = str.length,
idx = -1,
bytes = [],
i, j, c;

for(i = 0; i < n; ++i){
c = str.charCodeAt(i);
if(c <= 0x7F){
bytes[++idx] = c;
} else if(c <= 0x7FF){
bytes[++idx] = 0xC0 | (c >>> 6);
bytes[++idx] = 0x80 | (c & 0x3F);
} else if(c <= 0xFFFF){
bytes[++idx] = 0xE0 | (c >>> 12);
bytes[++idx] = 0x80 | ((c >>> 6) & 0x3F);
bytes[++idx] = 0x80 | (c & 0x3F);
} else {
bytes[++idx] = 0xF0 | (c >>> 18);
bytes[++idx] = 0x80 | ((c >>> 12) & 0x3F);
bytes[++idx] = 0x80 | ((c >>> 6) & 0x3F);
bytes[++idx] = 0x80 | (c & 0x3F);
}
}
return bytes;
}

function unicode2utf8_uint8array(str){
var n = str.length,
idx = -1,
byteLength = 512,
bytes = new Uint8Array(byteLength),
i, c, _bytes;

for(i = 0; i < n; ++i){
c = str.charCodeAt(i);
if(c <= 0x7F){
bytes[++idx] = c;
} else if(c <= 0x7FF){
bytes[++idx] = 0xC0 | (c >>> 6);
bytes[++idx] = 0x80 | (c & 0x3F);
} else if(c <= 0xFFFF){
bytes[++idx] = 0xE0 | (c >>> 12);
bytes[++idx] = 0x80 | ((c >>> 6) & 0x3F);
bytes[++idx] = 0x80 | (c & 0x3F);
} else {
bytes[++idx] = 0xF0 | (c >>> 18);
bytes[++idx] = 0x80 | ((c >>> 12) & 0x3F);
bytes[++idx] = 0x80 | ((c >>> 6) & 0x3F);
bytes[++idx] = 0x80 | (c & 0x3F);
}
if(byteLength - idx <= 4){
_bytes = bytes;
byteLength *= 2;
bytes = new Uint8Array(byteLength);
bytes.set(_bytes);
}
}
return bytes.subarray(0, ++idx);
}

function unicode2utf8_filereader(str, callback) {
var fr = new FileReader();
fr.onloadend = function() {
callback(new Uint8Array(fr.result));
};
fr.readAsArrayBuffer(new Blob([str]));
}

次はarray of utf8 to unicodeで。
文字列結合と関数呼び出しの勝負(すごい汚コードになる予感)。
一番素直なのはFileReader#readAsTextを使う方法かな。


posted by 右京 | javascript
blog comments powered by Disqus
×

この広告は1年以上新しい記事の投稿がないブログに表示されております。