288 lines
5.3 MiB
JavaScript
288 lines
5.3 MiB
JavaScript
|
'use strict';
|
||
|
|
||
|
var base64 = require('base64-js');
|
||
|
|
||
|
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
||
|
|
||
|
var base64__default = /*#__PURE__*/_interopDefault(base64);
|
||
|
|
||
|
var __defProp = Object.defineProperty;
|
||
|
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
|
||
|
var __publicField = (obj, key, value) => {
|
||
|
__defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
|
||
|
return value;
|
||
|
};
|
||
|
|
||
|
// src/ranks/gpt2.js
|
||
|
var gpt2_default = { "explicit_n_vocab": 50257, "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 50256 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== IHQ= IGE= aGU= aW4= cmU= b24= IHRoZQ== ZXI= IHM= YXQ= IHc= IG8= ZW4= IGM= aXQ= aXM= YW4= b3I= ZXM= IGI= ZWQ= IGY= aW5n IHA= b3U= IGFu YWw= YXI= IHRv IG0= IG9m IGlu IGQ= IGg= IGFuZA== aWM= YXM= bGU= IHRo aW9u b20= bGw= ZW50 IG4= IGw= c3Q= IHJl dmU= IGU= cm8= bHk= IGJl IGc= IFQ= Y3Q= IFM= aWQ= b3Q= IEk= dXQ= ZXQ= IEE= IGlz IG9u aW0= YW0= b3c= YXk= YWQ= c2U= IHRoYXQ= IEM= aWc= IGZvcg== YWM= IHk= dmVy dXI= IHU= bGQ= IHN0 IE0= J3M= IGhl IGl0 YXRpb24= aXRo aXI= Y2U= IHlvdQ== aWw= IEI= IHdo b2w= IFA= IHdpdGg= IDE= dGVy Y2g= IGFz IHdl ICg= bmQ= aWxs IEQ= aWY= IDI= YWc= ZXJz a2U= ICI= IEg= ZW0= IGNvbg== IFc= IFI= aGVy IHdhcw== IHI= b2Q= IEY= dWw= YXRl IGF0 cmk= cHA= b3Jl IFRoZQ== IHNl dXM= IHBybw== IGhh dW0= IGFyZQ== IGRl YWlu YW5k IG9y aWdo ZXN0 aXN0 YWI= cm9t IE4= dGg= IGNvbQ== IEc= dW4= b3A= MDA= IEw= IG5vdA== ZXNz IGV4 IHY= cmVz IEU= ZXc= aXR5 YW50 IGJ5 ZWw= b3M= b3J0 b2M= cXU= IGZyb20= IGhhdmU= IHN1 aXZl b3VsZA== IHNo IHRoaXM= bnQ= cmE= cGU= aWdodA== YXJ0 bWVudA== IGFs dXN0 ZW5k LS0= YWxs IE8= YWNr IGNo IGxl aWVz cmVk YXJk 4oA= b3V0 IEo= IGFi ZWFy aXY= YWxseQ== b3Vy b3N0 Z2g= cHQ= IHBs YXN0 IGNhbg== YWs= b21l dWQ= VGhl IGhpcw== IGRv IGdv IGhhcw== Z2U= J3Q= IFU= cm91 IHNh IGo= IGJ1dA== IHdvcg== IGFsbA== ZWN0 IGs= YW1l IHdpbGw= b2s= IHdoZQ== IHRoZXk= aWRl MDE= ZmY= aWNo cGw= dGhlcg== IHRy Li4= IGludA== aWU= dXJl YWdl IG5l aWFs YXA= aW5l aWNl IG1l IG91dA== YW5z b25l b25n aW9ucw== IHdobw== IEs= IHVw IHRoZWly IGFk IDM= IHVz YXRlZA== b3Vz IG1vcmU= dWU= b2c= IFN0 aW5k aWtl IHNv aW1l cGVy LiI= YmVy aXo= YWN0 IG9uZQ== IHNhaWQ= IC0= YXJl IHlvdXI= Y2M= IFRo IGNs ZXA= YWtl YWJsZQ== aXA= IGNvbnQ= IHdoaWNo aWE= IGlt IGFib3V0 IHdlcmU= dmVyeQ== dWI= IGhhZA== IGVu IGNvbXA= LCI= IElu IHVu IGFn aXJl YWNl YXU= YXJ5 IHdvdWxk YXNz cnk= IOKA Y2w= b29r ZXJl c28= IFY= aWdu aWI= IG9mZg== IHRl dmVu IFk= aWxl b3Nl aXRl b3Jt IDIwMQ== IHJlcw== IG1hbg== IHBlcg== IG90aGVy b3Jk dWx0 IGJlZW4= IGxpa2U= YXNl YW5jZQ== a3M= YXlz b3du ZW5jZQ== IGRpcw== Y3Rpb24= IGFueQ== IGFwcA== IHNw aW50 cmVzcw== YXRpb25z YWls IDQ= aWNhbA== IHRoZW0= IGhlcg== b3VudA== IENo IGFy IGlm IHRoZXJl IHBl IHllYXI= YXY= IG15 IHNvbWU= IHdoZW4= b3VnaA== YWNo IHRoYW4= cnU= b25k aWNr IG92ZXI= dmVs IHF1 Cgo= IHNj cmVhdA== cmVl IEl0 b3VuZA== cG9ydA== IGFsc28= IHBhcnQ= ZnRlcg== IGtu IGJlYw== IHRpbWU= ZW5z IDU= b3BsZQ== IHdoYXQ= IG5v ZHU= bWVy YW5n IG5ldw== LS0tLQ== IGdldA== b3J5 aXRpb24= aW5ncw== IGp1c3Q= IGludG8= IDA= ZW50cw== b3Zl dGU= IHBlb3BsZQ== IHByZQ== IGl0cw== IHJlYw== IHR3 aWFu aXJzdA== YXJr b3Jz IHdvcms= YWRl b2I= IHNoZQ== IG91cg== d24= aW5r bGlj IDE5 IEhl aXNo bmRlcg== YXVzZQ== IGhpbQ== b25z IFs= IHJv Zm9ybQ== aWxk YXRlcw==
|
||
|
|
||
|
// src/ranks/p50k_base.js
|
||
|
var p50k_base_default = { "explicit_n_vocab": 50281, "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 50256 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== IHQ= IGE= aGU= aW4= cmU= b24= IHRoZQ== ZXI= IHM= YXQ= IHc= IG8= ZW4= IGM= aXQ= aXM= YW4= b3I= ZXM= IGI= ZWQ= IGY= aW5n IHA= b3U= IGFu YWw= YXI= IHRv IG0= IG9m IGlu IGQ= IGg= IGFuZA== aWM= YXM= bGU= IHRo aW9u b20= bGw= ZW50 IG4= IGw= c3Q= IHJl dmU= IGU= cm8= bHk= IGJl IGc= IFQ= Y3Q= IFM= aWQ= b3Q= IEk= dXQ= ZXQ= IEE= IGlz IG9u aW0= YW0= b3c= YXk= YWQ= c2U= IHRoYXQ= IEM= aWc= IGZvcg== YWM= IHk= dmVy dXI= IHU= bGQ= IHN0 IE0= J3M= IGhl IGl0 YXRpb24= aXRo aXI= Y2U= IHlvdQ== aWw= IEI= IHdo b2w= IFA= IHdpdGg= IDE= dGVy Y2g= IGFz IHdl ICg= bmQ= aWxs IEQ= aWY= IDI= YWc= ZXJz a2U= ICI= IEg= ZW0= IGNvbg== IFc= IFI= aGVy IHdhcw== IHI= b2Q= IEY= dWw= YXRl IGF0 cmk= cHA= b3Jl IFRoZQ== IHNl dXM= IHBybw== IGhh dW0= IGFyZQ== IGRl YWlu YW5k IG9y aWdo ZXN0 aXN0 YWI= cm9t IE4= dGg= IGNvbQ== IEc= dW4= b3A= MDA= IEw= IG5vdA== ZXNz IGV4 IHY= cmVz IEU= ZXc= aXR5 YW50 IGJ5 ZWw= b3M= b3J0 b2M= cXU= IGZyb20= IGhhdmU= IHN1 aXZl b3VsZA== IHNo IHRoaXM= bnQ= cmE= cGU= aWdodA== YXJ0 bWVudA== IGFs dXN0 ZW5k LS0= YWxs IE8= YWNr IGNo IGxl aWVz cmVk YXJk 4oA= b3V0 IEo= IGFi ZWFy aXY= YWxseQ== b3Vy b3N0 Z2g= cHQ= IHBs YXN0 IGNhbg== YWs= b21l dWQ= VGhl IGhpcw== IGRv IGdv IGhhcw== Z2U= J3Q= IFU= cm91 IHNh IGo= IGJ1dA== IHdvcg== IGFsbA== ZWN0 IGs= YW1l IHdpbGw= b2s= IHdoZQ== IHRoZXk= aWRl MDE= ZmY= aWNo cGw= dGhlcg== IHRy Li4= IGludA== aWU= dXJl YWdl IG5l aWFs YXA= aW5l aWNl IG1l IG91dA== YW5z b25l b25n aW9ucw== IHdobw== IEs= IHVw IHRoZWly IGFk IDM= IHVz YXRlZA== b3Vz IG1vcmU= dWU= b2c= IFN0 aW5k aWtl IHNv aW1l cGVy LiI= YmVy aXo= YWN0 IG9uZQ== IHNhaWQ= IC0= YXJl IHlvdXI= Y2M= IFRo IGNs ZXA= YWtl YWJsZQ== aXA= IGNvbnQ= IHdoaWNo aWE= IGlt IGFib3V0 IHdlcmU= dmVyeQ== dWI= IGhhZA== IGVu IGNvbXA= LCI= IElu IHVu IGFn aXJl YWNl YXU= YXJ5 IHdvdWxk YXNz cnk= IOKA Y2w= b29r ZXJl c28= IFY= aWdu aWI= IG9mZg== IHRl dmVu IFk= aWxl b3Nl aXRl b3Jt IDIwMQ== IHJlcw== IG1hbg== IHBlcg== IG90aGVy b3Jk dWx0 IGJlZW4= IGxpa2U= YXNl YW5jZQ== a3M= YXlz b3du ZW5jZQ== IGRpcw== Y3Rpb24= IGFueQ== IGFwcA== IHNw aW50 cmVzcw== YXRpb25z YWls IDQ= aWNhbA== IHRoZW0= IGhlcg== b3VudA== IENo IGFy IGlm IHRoZXJl IHBl IHllYXI= YXY= IG15 IHNvbWU= IHdoZW4= b3VnaA== YWNo IHRoYW4= cnU= b25k aWNr IG92ZXI= dmVs IHF1 Cgo= IHNj cmVhdA== cmVl IEl0 b3VuZA== cG9ydA== IGFsc28= IHBhcnQ= ZnRlcg== IGtu IGJlYw== IHRpbWU= ZW5z IDU= b3BsZQ== IHdoYXQ= IG5v ZHU= bWVy YW5n IG5ldw== LS0tLQ== IGdldA== b3J5 aXRpb24= aW5ncw== IGp1c3Q= IGludG8= IDA= ZW50cw== b3Zl dGU= IHBlb3BsZQ== IHByZQ== IGl0cw== IHJlYw== IHR3 aWFu aXJzdA== YXJr b3Jz IHdvcms= YWRl b2I= IHNoZQ== IG91cg== d24= aW5r bGlj IDE5 IEhl aXNo bmRlcg== YXVzZQ== IGhpbQ== b25z IFs= IHJv Zm9ybQ== aWxk YXRl
|
||
|
|
||
|
// src/ranks/p50k_edit.js
|
||
|
var p50k_edit_default = { "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 50256, "<|fim_prefix|>": 50281, "<|fim_middle|>": 50282, "<|fim_suffix|>": 50283 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== IHQ= IGE= aGU= aW4= cmU= b24= IHRoZQ== ZXI= IHM= YXQ= IHc= IG8= ZW4= IGM= aXQ= aXM= YW4= b3I= ZXM= IGI= ZWQ= IGY= aW5n IHA= b3U= IGFu YWw= YXI= IHRv IG0= IG9m IGlu IGQ= IGg= IGFuZA== aWM= YXM= bGU= IHRo aW9u b20= bGw= ZW50 IG4= IGw= c3Q= IHJl dmU= IGU= cm8= bHk= IGJl IGc= IFQ= Y3Q= IFM= aWQ= b3Q= IEk= dXQ= ZXQ= IEE= IGlz IG9u aW0= YW0= b3c= YXk= YWQ= c2U= IHRoYXQ= IEM= aWc= IGZvcg== YWM= IHk= dmVy dXI= IHU= bGQ= IHN0 IE0= J3M= IGhl IGl0 YXRpb24= aXRo aXI= Y2U= IHlvdQ== aWw= IEI= IHdo b2w= IFA= IHdpdGg= IDE= dGVy Y2g= IGFz IHdl ICg= bmQ= aWxs IEQ= aWY= IDI= YWc= ZXJz a2U= ICI= IEg= ZW0= IGNvbg== IFc= IFI= aGVy IHdhcw== IHI= b2Q= IEY= dWw= YXRl IGF0 cmk= cHA= b3Jl IFRoZQ== IHNl dXM= IHBybw== IGhh dW0= IGFyZQ== IGRl YWlu YW5k IG9y aWdo ZXN0 aXN0 YWI= cm9t IE4= dGg= IGNvbQ== IEc= dW4= b3A= MDA= IEw= IG5vdA== ZXNz IGV4 IHY= cmVz IEU= ZXc= aXR5 YW50 IGJ5 ZWw= b3M= b3J0 b2M= cXU= IGZyb20= IGhhdmU= IHN1 aXZl b3VsZA== IHNo IHRoaXM= bnQ= cmE= cGU= aWdodA== YXJ0 bWVudA== IGFs dXN0 ZW5k LS0= YWxs IE8= YWNr IGNo IGxl aWVz cmVk YXJk 4oA= b3V0 IEo= IGFi ZWFy aXY= YWxseQ== b3Vy b3N0 Z2g= cHQ= IHBs YXN0 IGNhbg== YWs= b21l dWQ= VGhl IGhpcw== IGRv IGdv IGhhcw== Z2U= J3Q= IFU= cm91 IHNh IGo= IGJ1dA== IHdvcg== IGFsbA== ZWN0 IGs= YW1l IHdpbGw= b2s= IHdoZQ== IHRoZXk= aWRl MDE= ZmY= aWNo cGw= dGhlcg== IHRy Li4= IGludA== aWU= dXJl YWdl IG5l aWFs YXA= aW5l aWNl IG1l IG91dA== YW5z b25l b25n aW9ucw== IHdobw== IEs= IHVw IHRoZWly IGFk IDM= IHVz YXRlZA== b3Vz IG1vcmU= dWU= b2c= IFN0 aW5k aWtl IHNv aW1l cGVy LiI= YmVy aXo= YWN0 IG9uZQ== IHNhaWQ= IC0= YXJl IHlvdXI= Y2M= IFRo IGNs ZXA= YWtl YWJsZQ== aXA= IGNvbnQ= IHdoaWNo aWE= IGlt IGFib3V0 IHdlcmU= dmVyeQ== dWI= IGhhZA== IGVu IGNvbXA= LCI= IElu IHVu IGFn aXJl YWNl YXU= YXJ5 IHdvdWxk YXNz cnk= IOKA Y2w= b29r ZXJl c28= IFY= aWdu aWI= IG9mZg== IHRl dmVu IFk= aWxl b3Nl aXRl b3Jt IDIwMQ== IHJlcw== IG1hbg== IHBlcg== IG90aGVy b3Jk dWx0 IGJlZW4= IGxpa2U= YXNl YW5jZQ== a3M= YXlz b3du ZW5jZQ== IGRpcw== Y3Rpb24= IGFueQ== IGFwcA== IHNw aW50 cmVzcw== YXRpb25z YWls IDQ= aWNhbA== IHRoZW0= IGhlcg== b3VudA== IENo IGFy IGlm IHRoZXJl IHBl IHllYXI= YXY= IG15 IHNvbWU= IHdoZW4= b3VnaA== YWNo IHRoYW4= cnU= b25k aWNr IG92ZXI= dmVs IHF1 Cgo= IHNj cmVhdA== cmVl IEl0 b3VuZA== cG9ydA== IGFsc28= IHBhcnQ= ZnRlcg== IGtu IGJlYw== IHRpbWU= ZW5z IDU= b3BsZQ== IHdoYXQ= IG5v ZHU= bWVy YW5n IG5ldw== LS0tLQ== IGdldA== b3J5 aXRpb24= aW5ncw== IGp1c3Q= IGludG8= IDA= ZW50cw== b3Zl dGU= IHBlb3BsZQ== IHByZQ== IGl0cw== IHJlYw== IHR3 aWFu aXJzdA== YXJr b3Jz IHdvcms= YWRl b2I= IHNoZQ== IG91cg== d24= aW5r bGlj IDE5 IEhl aXNo bmRlcg== YXV
|
||
|
|
||
|
// src/ranks/r50k_base.js
|
||
|
var r50k_base_default = { "explicit_n_vocab": 50257, "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 50256 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== IHQ= IGE= aGU= aW4= cmU= b24= IHRoZQ== ZXI= IHM= YXQ= IHc= IG8= ZW4= IGM= aXQ= aXM= YW4= b3I= ZXM= IGI= ZWQ= IGY= aW5n IHA= b3U= IGFu YWw= YXI= IHRv IG0= IG9m IGlu IGQ= IGg= IGFuZA== aWM= YXM= bGU= IHRo aW9u b20= bGw= ZW50 IG4= IGw= c3Q= IHJl dmU= IGU= cm8= bHk= IGJl IGc= IFQ= Y3Q= IFM= aWQ= b3Q= IEk= dXQ= ZXQ= IEE= IGlz IG9u aW0= YW0= b3c= YXk= YWQ= c2U= IHRoYXQ= IEM= aWc= IGZvcg== YWM= IHk= dmVy dXI= IHU= bGQ= IHN0 IE0= J3M= IGhl IGl0 YXRpb24= aXRo aXI= Y2U= IHlvdQ== aWw= IEI= IHdo b2w= IFA= IHdpdGg= IDE= dGVy Y2g= IGFz IHdl ICg= bmQ= aWxs IEQ= aWY= IDI= YWc= ZXJz a2U= ICI= IEg= ZW0= IGNvbg== IFc= IFI= aGVy IHdhcw== IHI= b2Q= IEY= dWw= YXRl IGF0 cmk= cHA= b3Jl IFRoZQ== IHNl dXM= IHBybw== IGhh dW0= IGFyZQ== IGRl YWlu YW5k IG9y aWdo ZXN0 aXN0 YWI= cm9t IE4= dGg= IGNvbQ== IEc= dW4= b3A= MDA= IEw= IG5vdA== ZXNz IGV4 IHY= cmVz IEU= ZXc= aXR5 YW50 IGJ5 ZWw= b3M= b3J0 b2M= cXU= IGZyb20= IGhhdmU= IHN1 aXZl b3VsZA== IHNo IHRoaXM= bnQ= cmE= cGU= aWdodA== YXJ0 bWVudA== IGFs dXN0 ZW5k LS0= YWxs IE8= YWNr IGNo IGxl aWVz cmVk YXJk 4oA= b3V0 IEo= IGFi ZWFy aXY= YWxseQ== b3Vy b3N0 Z2g= cHQ= IHBs YXN0 IGNhbg== YWs= b21l dWQ= VGhl IGhpcw== IGRv IGdv IGhhcw== Z2U= J3Q= IFU= cm91 IHNh IGo= IGJ1dA== IHdvcg== IGFsbA== ZWN0 IGs= YW1l IHdpbGw= b2s= IHdoZQ== IHRoZXk= aWRl MDE= ZmY= aWNo cGw= dGhlcg== IHRy Li4= IGludA== aWU= dXJl YWdl IG5l aWFs YXA= aW5l aWNl IG1l IG91dA== YW5z b25l b25n aW9ucw== IHdobw== IEs= IHVw IHRoZWly IGFk IDM= IHVz YXRlZA== b3Vz IG1vcmU= dWU= b2c= IFN0 aW5k aWtl IHNv aW1l cGVy LiI= YmVy aXo= YWN0 IG9uZQ== IHNhaWQ= IC0= YXJl IHlvdXI= Y2M= IFRo IGNs ZXA= YWtl YWJsZQ== aXA= IGNvbnQ= IHdoaWNo aWE= IGlt IGFib3V0 IHdlcmU= dmVyeQ== dWI= IGhhZA== IGVu IGNvbXA= LCI= IElu IHVu IGFn aXJl YWNl YXU= YXJ5 IHdvdWxk YXNz cnk= IOKA Y2w= b29r ZXJl c28= IFY= aWdu aWI= IG9mZg== IHRl dmVu IFk= aWxl b3Nl aXRl b3Jt IDIwMQ== IHJlcw== IG1hbg== IHBlcg== IG90aGVy b3Jk dWx0 IGJlZW4= IGxpa2U= YXNl YW5jZQ== a3M= YXlz b3du ZW5jZQ== IGRpcw== Y3Rpb24= IGFueQ== IGFwcA== IHNw aW50 cmVzcw== YXRpb25z YWls IDQ= aWNhbA== IHRoZW0= IGhlcg== b3VudA== IENo IGFy IGlm IHRoZXJl IHBl IHllYXI= YXY= IG15 IHNvbWU= IHdoZW4= b3VnaA== YWNo IHRoYW4= cnU= b25k aWNr IG92ZXI= dmVs IHF1 Cgo= IHNj cmVhdA== cmVl IEl0 b3VuZA== cG9ydA== IGFsc28= IHBhcnQ= ZnRlcg== IGtu IGJlYw== IHRpbWU= ZW5z IDU= b3BsZQ== IHdoYXQ= IG5v ZHU= bWVy YW5n IG5ldw== LS0tLQ== IGdldA== b3J5 aXRpb24= aW5ncw== IGp1c3Q= IGludG8= IDA= ZW50cw== b3Zl dGU= IHBlb3BsZQ== IHByZQ== IGl0cw== IHJlYw== IHR3 aWFu aXJzdA== YXJr b3Jz IHdvcms= YWRl b2I= IHNoZQ== IG91cg== d24= aW5r bGlj IDE5 IEhl aXNo bmRlcg== YXVzZQ== IGhpbQ== b25z IFs= IHJv Zm9ybQ== aWxk YXRl
|
||
|
|
||
|
// src/ranks/cl100k_base.js
|
||
|
var cl100k_base_default = { "pat_str": "('s|'S|'t|'T|'re|'rE|'Re|'RE|'ve|'vE|'Ve|'VE|'m|'M|'ll|'lL|'Ll|'LL|'d|'D)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 100257, "<|fim_prefix|>": 100258, "<|fim_middle|>": 100259, "<|fim_suffix|>": 100260, "<|endofprompt|>": 100276 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== ICA= ICAgIA== aW4= IHQ= ICAgICAgICA= ZXI= ICAg b24= IGE= cmU= YXQ= c3Q= ZW4= b3I= IHRo Cgo= IGM= bGU= IHM= aXQ= YW4= YXI= YWw= IHRoZQ== Owo= IHA= IGY= b3U= ID0= aXM= ICAgICAgIA== aW5n ZXM= IHc= aW9u ZWQ= aWM= IGI= IGQ= ZXQ= IG0= IG8= CQk= cm8= YXM= ZWw= Y3Q= bmQ= IGlu IGg= ZW50 aWQ= IG4= YW0= ICAgICAgICAgICA= IHRv IHJl LS0= IHs= IG9m b20= KTsK aW0= DQo= ICg= aWw= Ly8= IGFuZA== dXI= c2U= IGw= ZXg= IFM= YWQ= ICI= Y2g= dXQ= aWY= Kio= IH0= ZW0= b2w= ICAgICAgICAgICAgICAgIA== dGg= KQo= IHsK IGc= aWc= aXY= LAo= Y2U= b2Q= IHY= YXRl IFQ= YWc= YXk= ICo= b3Q= dXM= IEM= IHN0 IEk= dW4= dWw= dWU= IEE= b3c= ICc= ZXc= IDw= YXRpb24= KCk= IGZvcg== YWI= b3J0 dW0= YW1l IGlz cGU= dHI= Y2s= 4oA= IHk= aXN0 LS0tLQ== LgoK aGU= IGU= bG8= IE0= IGJl ZXJz IG9u IGNvbg== YXA= dWI= IFA= ICAgICAgICAgICAgICAg YXNz aW50 Pgo= bHk= dXJu ICQ= OwoK YXY= cG9ydA== aXI= LT4= bnQ= Y3Rpb24= ZW5k IGRl MDA= aXRo b3V0 dHVybg== b3Vy ICAgICA= bGlj cmVz cHQ= PT0= IHRoaXM= IHdo IGlm IEQ= dmVy YWdl IEI= aHQ= ZXh0 PSI= IHRoYXQ= KioqKg== IFI= IGl0 ZXNz IEY= IHI= b3M= YW5k IGFz ZWN0 a2U= cm9t IC8v Y29u IEw= KCI= cXU= bGFzcw== IHdpdGg= aXo= ZGU= IE4= IGFs b3A= dXA= Z2V0 IH0K aWxl IGFu YXRh b3Jl cmk= IHBybw== Ow0K CQkJCQ== dGVy YWlu IFc= IEU= IGNvbQ== IHJldHVybg== YXJ0 IEg= YWNr aW1wb3J0 dWJsaWM= IG9y ZXN0 bWVudA== IEc= YWJsZQ== IC0= aW5l aWxs aW5k ZXJl Ojo= aXR5 ICs= IHRy ZWxm aWdodA== KCc= b3Jt dWx0 c3Ry Li4= Iiw= IHlvdQ== eXBl cGw= IG5ldw== IGo= ICAgICAgICAgICAgICAgICAgIA== IGZyb20= IGV4 IE8= MjA= bGQ= IFs= b2M= Ogo= IHNl IGxl LS0tLS0tLS0= LnM= ewo= Jyw= YW50 IGF0 YXNl LmM= IGNo PC8= YXZl YW5n IGFyZQ== IGludA== 4oCZ X3Q= ZXJ0 aWFs YWN0 fQo= aXZl b2Rl b3N0 IGNsYXNz IG5vdA== b2c= b3Jk YWx1ZQ== YWxs ZmY= KCk7Cg== b250 aW1l YXJl IFU= IHBy IDo= aWVz aXpl dXJl IGJ5 aXJl IH0KCg== LnA= IHNo aWNl YXN0 cHRpb24= dHJpbmc= b2s= X18= Y2w= IyM= IGhl YXJk KS4= IEA= aWV3 CQkJ IHdhcw== aXA= dGhpcw== IHU= IFRoZQ== aWRl YWNl aWI= YWM= cm91 IHdl amVjdA== IHB1YmxpYw== YWs= dmU= YXRo b2lk ID0+ dXN0 cXVl IHJlcw== KSk= J3M= IGs= YW5z eXN0 dW5jdGlvbg== KioqKioqKio= IGk= IHVz cHA= MTA= b25l YWls PT09PQ== bmFtZQ== IHN0cg== IC8= ICY= YWNo ZGl2 eXN0ZW0= ZWxs IGhhdmU= ZXJy b3VsZA== dWxs cG9u IEo= X3A= ID09 aWdu U3Q= Lgo= IHBs KTsKCg== Zm9ybQ== cHV0 b3VudA== fQoK ZGQ= aXRl IGdldA== cnI= b21l IOKA YXJhbQ== Y2M= ICov RVI= SW4= bGVz X3M= b25n aWU= IGNhbg== IFY= ZXJ2 cHI= IHVu cm93 YmVy IGRv bGw= IGVs IHNlbGY= YXRlZA== YXJ5 IC4= J10= dWQ= IGVu IFRo ICAgICAgICAgICAgICAgIC
|
||
|
|
||
|
// src/ranks/o200k_base.js
|
||
|
var o200k_base_default = { "pat_str": "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+('s|'S|'t|'T|'re|'rE|'Re|'RE|'ve|'vE|'Ve|'VE|'m|'M|'ll|'lL|'Ll|'LL|'d|'D)?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*('s|'S|'t|'T|'re|'rE|'Re|'RE|'ve|'vE|'Ve|'VE|'m|'M|'ll|'lL|'Ll|'LL|'d|'D)?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 199999, "<|endofprompt|>": 200018 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== ICA= ICAgIA== aW4= ZXI= IHQ= IGE= ZW4= b24= cmU= IHM= YXQ= b3I= ZXM= ICAgICAgICA= YW4= ICAg IGQ= aGU= IGM= IHA= aXM= YXI= aXQ= Cgo= YWw= 4KQ= bGU= b3U= IG0= IGY= IHc= IGI= YXM= aW5n IHRoZQ== aWM= ZXQ= IG8= aW9u ZWQ= ZWw= IG4= cm8= ZW50 INA= bmQ= c3Q= 4YM= 0LA= IGw= IGlu Owo= Y3Q= ICAgICAgIA== b20= aWw= IGg= YW0= ID0= aWQ= IHRv 0L4= 4oA= IGU= 2Kc= aW0= IHJl IHY= YWQ= IHRo IGFuZA== 0LU= IG9m IGc= dXI= 0Lg= Y2g= IOA= IGRl CQk= IFM= IHU= 0YI= dXQ= b2w= 0L0= IHk= aWc= c2U= 0YA= b3Q= ZW0= YWc= aXY= ICg= cXU= ICAgICAgICAgICA= IFQ= IHs= IEE= YXk= IEk= 4KU= YWM= 4KY= dWw= KTsK IEM= LgoK b3M= aWY= IOCk b2Q= 0YE= DQo= dHI= IGs= dW4= 2YQ= LS0= ZXN0 w6k= YWI= YXRl aXI= 4Lg= IGlz 0Ls= b3c= dXM= 4LQ= YXRpb24= IH0= INg= b3J0 IE0= ICI= Ly8= dW0= IGZvcg== aWU= 4Ko= IFA= dGVy Y2U= b3Vy IG9u YXA= dGg= IHsK IGNvbg== YXY= ICAgICAgICAgICAgICAgIA== ZXJz Kio= dGU= LAo= IGJl 2YU= IEQ= KCk= 2Yg= IEI= ZW5k IHN0 aXN0 YWs= bHk= cGU= ICo= 2YY= YW5k IHI= 4LI= cmVz dWI= b2M= 2LE= IGFs 4LA= ZXNz aXRo 4oCZ 0L7Q IHBybw== IGo= 0YM= 2Yo= YW1l dmVy KQo= cG9ydA== IGFu w6E= w6Q= IEw= IGNvbQ== IG5l IEY= MjA= YXNz IEU= IHNl bGlj IFI= ICc= 0Lo= INc= IDw= 0Ys= dWU= b3A= b3V0 IGVu aHQ= IHdo IGFz 4K4= IHF1 Y3Rpb24= 44A= YWdl IE4= b2c= IGl0 IHlvdQ== aXo= IHdpdGg= IHRoYXQ= Li4= IFc= IEg= aXA= 0LQ= ZXg= aW50 aWxs YW50 dXJu IHRoaXM= INin YXJ0 IHRy IEc= 0Lw= IGhl OwoK IG9y MDA= IGxl ICAgICAgICAgICAgICAg aWs= bWVudA== 4Kc= b3Jl cm9t ZXJl IGV4 aW5l 4LU= YW5n 2Ko= LS0tLQ== 4KS+ 0LI= aW5k Z2V0 Pgo= YWlu b2s= aWI= dWQ= cGw= dHVybg== ICAgICA= w7M= IE8= IC0= ZWN0 aWE= aXR5 IHVu IGlm 2Yc= IGF0 24w= Y29u cGVy 0Y8= 4bs= YXRh 2K8= ICQ= IGNo IHBy b3Jt YWNr IGFyZQ== aWxl cHQ= w60= IGxh Lgo= IOKA PT0= aW1wb3J0 YWJsZQ== aWFs b3N0 Y2w= aWVz IGNs KCI= b3Y= dXN0 IOCm w7w= w7Y= 4Ks= IGk= dWx0 YXZl bnQ= IHo= PSI= IHdl bGQ= aWdodA== dWJsaWM= aXZl YWxs 4YOQ YWN0 INin2YQ= 77w= IGZyb20= w6U= IHN1 aXJl KioqKg== ZWxs YXJk 15k= c2Vy dHJpbmc= IFU= IC8v aWNl b3Jk 44E= IH0K IOCq ZmY= LT4= Ow0K bmU= xLE= ZGU= cmE= 0Yw= YW5z aWRl YXNl 1aE= IG5ldw== IGRv IHJldHVybg== IFRoZQ== 5Lg= IG5vdA== CQkJCQ== dXJl 4KWH YXN0 b2I= IFY= 4KSw IHBs IHlvdXI= 0Lc= IOGD SW4= cHI= Y29t LnM= IHNo aW1l IEo= YXJl b250 IGFy 4K8= INC/ eXBl IGVs aXRl 2LM= 15U= cHA= 5aQ= IGJ5 2Kg= IEs= ICs= 4LE= IHF1ZQ== aWV3 YWls 0Yc= IGNhbg== 0L8= MjAx IG1l INGB 4
|
||
|
|
||
|
// src/core.ts
|
||
|
function bytePairMerge(piece, ranks) {
|
||
|
let parts = Array.from(
|
||
|
{ length: piece.length },
|
||
|
(_, i) => ({ start: i, end: i + 1 })
|
||
|
);
|
||
|
while (parts.length > 1) {
|
||
|
let minRank = null;
|
||
|
for (let i = 0; i < parts.length - 1; i++) {
|
||
|
const slice = piece.slice(parts[i].start, parts[i + 1].end);
|
||
|
const rank = ranks.get(slice.join(","));
|
||
|
if (rank == null)
|
||
|
continue;
|
||
|
if (minRank == null || rank < minRank[0]) {
|
||
|
minRank = [rank, i];
|
||
|
}
|
||
|
}
|
||
|
if (minRank != null) {
|
||
|
const i = minRank[1];
|
||
|
parts[i] = { start: parts[i].start, end: parts[i + 1].end };
|
||
|
parts.splice(i + 1, 1);
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return parts;
|
||
|
}
|
||
|
function bytePairEncode(piece, ranks) {
|
||
|
if (piece.length === 1)
|
||
|
return [ranks.get(piece.join(","))];
|
||
|
return bytePairMerge(piece, ranks).map((p) => ranks.get(piece.slice(p.start, p.end).join(","))).filter((x) => x != null);
|
||
|
}
|
||
|
function escapeRegex(str) {
|
||
|
return str.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&");
|
||
|
}
|
||
|
var _Tiktoken = class {
|
||
|
/** @internal */
|
||
|
specialTokens;
|
||
|
/** @internal */
|
||
|
inverseSpecialTokens;
|
||
|
/** @internal */
|
||
|
patStr;
|
||
|
/** @internal */
|
||
|
textEncoder = new TextEncoder();
|
||
|
/** @internal */
|
||
|
textDecoder = new TextDecoder("utf-8");
|
||
|
/** @internal */
|
||
|
rankMap = /* @__PURE__ */ new Map();
|
||
|
/** @internal */
|
||
|
textMap = /* @__PURE__ */ new Map();
|
||
|
constructor(ranks, extendedSpecialTokens) {
|
||
|
this.patStr = ranks.pat_str;
|
||
|
const uncompressed = ranks.bpe_ranks.split("\n").filter(Boolean).reduce((memo, x) => {
|
||
|
const [_, offsetStr, ...tokens] = x.split(" ");
|
||
|
const offset = Number.parseInt(offsetStr, 10);
|
||
|
tokens.forEach((token, i) => memo[token] = offset + i);
|
||
|
return memo;
|
||
|
}, {});
|
||
|
for (const [token, rank] of Object.entries(uncompressed)) {
|
||
|
const bytes = base64__default.default.toByteArray(token);
|
||
|
this.rankMap.set(bytes.join(","), rank);
|
||
|
this.textMap.set(rank, bytes);
|
||
|
}
|
||
|
this.specialTokens = { ...ranks.special_tokens, ...extendedSpecialTokens };
|
||
|
this.inverseSpecialTokens = Object.entries(this.specialTokens).reduce((memo, [text, rank]) => {
|
||
|
memo[rank] = this.textEncoder.encode(text);
|
||
|
return memo;
|
||
|
}, {});
|
||
|
}
|
||
|
encode(text, allowedSpecial = [], disallowedSpecial = "all") {
|
||
|
const regexes = new RegExp(this.patStr, "ug");
|
||
|
const specialRegex = _Tiktoken.specialTokenRegex(
|
||
|
Object.keys(this.specialTokens)
|
||
|
);
|
||
|
const ret = [];
|
||
|
const allowedSpecialSet = new Set(
|
||
|
allowedSpecial === "all" ? Object.keys(this.specialTokens) : allowedSpecial
|
||
|
);
|
||
|
const disallowedSpecialSet = new Set(
|
||
|
disallowedSpecial === "all" ? Object.keys(this.specialTokens).filter(
|
||
|
(x) => !allowedSpecialSet.has(x)
|
||
|
) : disallowedSpecial
|
||
|
);
|
||
|
if (disallowedSpecialSet.size > 0) {
|
||
|
const disallowedSpecialRegex = _Tiktoken.specialTokenRegex([
|
||
|
...disallowedSpecialSet
|
||
|
]);
|
||
|
const specialMatch = text.match(disallowedSpecialRegex);
|
||
|
if (specialMatch != null) {
|
||
|
throw new Error(
|
||
|
`The text contains a special token that is not allowed: ${specialMatch[0]}`
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
let start = 0;
|
||
|
while (true) {
|
||
|
let nextSpecial = null;
|
||
|
let startFind = start;
|
||
|
while (true) {
|
||
|
specialRegex.lastIndex = startFind;
|
||
|
nextSpecial = specialRegex.exec(text);
|
||
|
if (nextSpecial == null || allowedSpecialSet.has(nextSpecial[0]))
|
||
|
break;
|
||
|
startFind = nextSpecial.index + 1;
|
||
|
}
|
||
|
const end = nextSpecial?.index ?? text.length;
|
||
|
for (const match of text.substring(start, end).matchAll(regexes)) {
|
||
|
const piece = this.textEncoder.encode(match[0]);
|
||
|
const token2 = this.rankMap.get(piece.join(","));
|
||
|
if (token2 != null) {
|
||
|
ret.push(token2);
|
||
|
continue;
|
||
|
}
|
||
|
ret.push(...bytePairEncode(piece, this.rankMap));
|
||
|
}
|
||
|
if (nextSpecial == null)
|
||
|
break;
|
||
|
let token = this.specialTokens[nextSpecial[0]];
|
||
|
ret.push(token);
|
||
|
start = nextSpecial.index + nextSpecial[0].length;
|
||
|
}
|
||
|
return ret;
|
||
|
}
|
||
|
decode(tokens) {
|
||
|
const res = [];
|
||
|
let length = 0;
|
||
|
for (let i2 = 0; i2 < tokens.length; ++i2) {
|
||
|
const token = tokens[i2];
|
||
|
const bytes = this.textMap.get(token) ?? this.inverseSpecialTokens[token];
|
||
|
if (bytes != null) {
|
||
|
res.push(bytes);
|
||
|
length += bytes.length;
|
||
|
}
|
||
|
}
|
||
|
const mergedArray = new Uint8Array(length);
|
||
|
let i = 0;
|
||
|
for (const bytes of res) {
|
||
|
mergedArray.set(bytes, i);
|
||
|
i += bytes.length;
|
||
|
}
|
||
|
return this.textDecoder.decode(mergedArray);
|
||
|
}
|
||
|
};
|
||
|
var Tiktoken = _Tiktoken;
|
||
|
__publicField(Tiktoken, "specialTokenRegex", (tokens) => {
|
||
|
return new RegExp(tokens.map((i) => escapeRegex(i)).join("|"), "g");
|
||
|
});
|
||
|
function getEncodingNameForModel(model) {
|
||
|
switch (model) {
|
||
|
case "gpt2": {
|
||
|
return "gpt2";
|
||
|
}
|
||
|
case "code-cushman-001":
|
||
|
case "code-cushman-002":
|
||
|
case "code-davinci-001":
|
||
|
case "code-davinci-002":
|
||
|
case "cushman-codex":
|
||
|
case "davinci-codex":
|
||
|
case "davinci-002":
|
||
|
case "text-davinci-002":
|
||
|
case "text-davinci-003": {
|
||
|
return "p50k_base";
|
||
|
}
|
||
|
case "code-davinci-edit-001":
|
||
|
case "text-davinci-edit-001": {
|
||
|
return "p50k_edit";
|
||
|
}
|
||
|
case "ada":
|
||
|
case "babbage":
|
||
|
case "babbage-002":
|
||
|
case "code-search-ada-code-001":
|
||
|
case "code-search-babbage-code-001":
|
||
|
case "curie":
|
||
|
case "davinci":
|
||
|
case "text-ada-001":
|
||
|
case "text-babbage-001":
|
||
|
case "text-curie-001":
|
||
|
case "text-davinci-001":
|
||
|
case "text-search-ada-doc-001":
|
||
|
case "text-search-babbage-doc-001":
|
||
|
case "text-search-curie-doc-001":
|
||
|
case "text-search-davinci-doc-001":
|
||
|
case "text-similarity-ada-001":
|
||
|
case "text-similarity-babbage-001":
|
||
|
case "text-similarity-curie-001":
|
||
|
case "text-similarity-davinci-001": {
|
||
|
return "r50k_base";
|
||
|
}
|
||
|
case "gpt-3.5-turbo-instruct-0914":
|
||
|
case "gpt-3.5-turbo-instruct":
|
||
|
case "gpt-3.5-turbo-16k-0613":
|
||
|
case "gpt-3.5-turbo-16k":
|
||
|
case "gpt-3.5-turbo-0613":
|
||
|
case "gpt-3.5-turbo-0301":
|
||
|
case "gpt-3.5-turbo":
|
||
|
case "gpt-4-32k-0613":
|
||
|
case "gpt-4-32k-0314":
|
||
|
case "gpt-4-32k":
|
||
|
case "gpt-4-0613":
|
||
|
case "gpt-4-0314":
|
||
|
case "gpt-4":
|
||
|
case "gpt-3.5-turbo-1106":
|
||
|
case "gpt-35-turbo":
|
||
|
case "gpt-4-1106-preview":
|
||
|
case "gpt-4-vision-preview":
|
||
|
case "gpt-3.5-turbo-0125":
|
||
|
case "gpt-4-turbo":
|
||
|
case "gpt-4-turbo-2024-04-09":
|
||
|
case "gpt-4-turbo-preview":
|
||
|
case "gpt-4-0125-preview":
|
||
|
case "text-embedding-ada-002":
|
||
|
case "text-embedding-3-small":
|
||
|
case "text-embedding-3-large": {
|
||
|
return "cl100k_base";
|
||
|
}
|
||
|
case "gpt-4o":
|
||
|
case "gpt-4o-2024-05-13":
|
||
|
case "gpt-4o-2024-08-06":
|
||
|
case "gpt-4o-mini-2024-07-18":
|
||
|
case "gpt-4o-mini": {
|
||
|
return "o200k_base";
|
||
|
}
|
||
|
default:
|
||
|
throw new Error("Unknown model");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// src/index.ts
|
||
|
function getEncoding(encoding, extendSpecialTokens) {
|
||
|
switch (encoding) {
|
||
|
case "gpt2":
|
||
|
return new Tiktoken(gpt2_default, extendSpecialTokens);
|
||
|
case "r50k_base":
|
||
|
return new Tiktoken(r50k_base_default, extendSpecialTokens);
|
||
|
case "p50k_base":
|
||
|
return new Tiktoken(p50k_base_default, extendSpecialTokens);
|
||
|
case "p50k_edit":
|
||
|
return new Tiktoken(p50k_edit_default, extendSpecialTokens);
|
||
|
case "cl100k_base":
|
||
|
return new Tiktoken(cl100k_base_default, extendSpecialTokens);
|
||
|
case "o200k_base":
|
||
|
return new Tiktoken(o200k_base_default, extendSpecialTokens);
|
||
|
default:
|
||
|
throw new Error("Unknown encoding");
|
||
|
}
|
||
|
}
|
||
|
function encodingForModel(model, extendSpecialTokens) {
|
||
|
return getEncoding(getEncodingNameForModel(model), extendSpecialTokens);
|
||
|
}
|
||
|
|
||
|
exports.Tiktoken = Tiktoken;
|
||
|
exports.encodingForModel = encodingForModel;
|
||
|
exports.getEncoding = getEncoding;
|
||
|
exports.getEncodingNameForModel = getEncodingNameForModel;
|