TrieBuilder.js 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. var Trie_1 = require("./Trie");
  4. var base64_arraybuffer_1 = require("base64-arraybuffer");
  5. /**
  6. * Trie2 constants, defining shift widths, index array lengths, etc.
  7. *
  8. * These are needed for the runtime macros but users can treat these as
  9. * implementation details and skip to the actual public API further below.
  10. */
  11. // const UTRIE2_OPTIONS_VALUE_BITS_MASK = 0x000f;
  12. /** Number of code points per index-1 table entry. 2048=0x800 */
  13. var UTRIE2_CP_PER_INDEX_1_ENTRY = 1 << Trie_1.UTRIE2_SHIFT_1;
  14. /** The alignment size of a data block. Also the granularity for compaction. */
  15. var UTRIE2_DATA_GRANULARITY = 1 << Trie_1.UTRIE2_INDEX_SHIFT;
  16. /* Fixed layout of the first part of the index array. ------------------- */
  17. /**
  18. * The BMP part of the index-2 table is fixed and linear and starts at offset 0.
  19. * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2.
  20. */
  21. var UTRIE2_INDEX_2_OFFSET = 0;
  22. var UTRIE2_MAX_INDEX_1_LENGTH = 0x100000 >> Trie_1.UTRIE2_SHIFT_1;
  23. /*
  24. * Fixed layout of the first part of the data array. -----------------------
  25. * Starts with 4 blocks (128=0x80 entries) for ASCII.
  26. */
  27. /**
  28. * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
  29. * Used with linear access for single bytes 0..0xbf for simple error handling.
  30. * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
  31. */
  32. var UTRIE2_BAD_UTF8_DATA_OFFSET = 0x80;
  33. /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */
  34. var UTRIE2_DATA_START_OFFSET = 0xc0;
  35. /* Building a Trie2 ---------------------------------------------------------- */
  36. /*
  37. * These definitions are mostly needed by utrie2_builder.c, but also by
  38. * utrie2_get32() and utrie2_enum().
  39. */
  40. /*
  41. * At build time, leave a gap in the index-2 table,
  42. * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
  43. * and the supplementary index-1 table.
  44. * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
  45. */
  46. var UNEWTRIE2_INDEX_GAP_OFFSET = Trie_1.UTRIE2_INDEX_2_BMP_LENGTH;
  47. var UNEWTRIE2_INDEX_GAP_LENGTH = (Trie_1.UTRIE2_UTF8_2B_INDEX_2_LENGTH + UTRIE2_MAX_INDEX_1_LENGTH + Trie_1.UTRIE2_INDEX_2_MASK) & ~Trie_1.UTRIE2_INDEX_2_MASK;
  48. /**
  49. * Maximum length of the build-time index-2 array.
  50. * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
  51. * plus the part of the index-2 table for lead surrogate code points,
  52. * plus the build-time index gap,
  53. * plus the null index-2 block.
  54. */
  55. var UNEWTRIE2_MAX_INDEX_2_LENGTH = (0x110000 >> Trie_1.UTRIE2_SHIFT_2) +
  56. Trie_1.UTRIE2_LSCP_INDEX_2_LENGTH +
  57. UNEWTRIE2_INDEX_GAP_LENGTH +
  58. Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  59. var UNEWTRIE2_INDEX_1_LENGTH = 0x110000 >> Trie_1.UTRIE2_SHIFT_1;
  60. /**
  61. * Maximum length of the build-time data array.
  62. * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
  63. * plus values for the 0x400 surrogate code units.
  64. */
  65. var UNEWTRIE2_MAX_DATA_LENGTH = 0x110000 + 0x40 + 0x40 + 0x400;
  66. /* Start with allocation of 16k data entries. */
  67. var UNEWTRIE2_INITIAL_DATA_LENGTH = 1 << 14;
  68. /* Grow about 8x each time. */
  69. var UNEWTRIE2_MEDIUM_DATA_LENGTH = 1 << 17;
  70. /** The null index-2 block, following the gap in the index-2 table. */
  71. var UNEWTRIE2_INDEX_2_NULL_OFFSET = UNEWTRIE2_INDEX_GAP_OFFSET + UNEWTRIE2_INDEX_GAP_LENGTH;
  72. /** The start of allocated index-2 blocks. */
  73. var UNEWTRIE2_INDEX_2_START_OFFSET = UNEWTRIE2_INDEX_2_NULL_OFFSET + Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  74. /**
  75. * The null data block.
  76. * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
  77. * to work with 6-bit trail bytes from 2-byte UTF-8.
  78. */
  79. var UNEWTRIE2_DATA_NULL_OFFSET = UTRIE2_DATA_START_OFFSET;
  80. /** The start of allocated data blocks. */
  81. var UNEWTRIE2_DATA_START_OFFSET = UNEWTRIE2_DATA_NULL_OFFSET + 0x40;
  82. /**
  83. * The start of data blocks for U+0800 and above.
  84. * Below, compaction uses a block length of 64 for 2-byte UTF-8.
  85. * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
  86. * Data values for 0x780 code points beyond ASCII.
  87. */
  88. var UNEWTRIE2_DATA_0800_OFFSET = UNEWTRIE2_DATA_START_OFFSET + 0x780;
  89. /**
  90. * Maximum length of the runtime index array.
  91. * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
  92. * (The actual maximum length is lower,
  93. * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
  94. */
  95. var UTRIE2_MAX_INDEX_LENGTH = 0xffff;
  96. /**
  97. * Maximum length of the runtime data array.
  98. * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
  99. * and by uint16_t UTrie2Header.shiftedDataLength.
  100. */
  101. var UTRIE2_MAX_DATA_LENGTH = 0xffff << Trie_1.UTRIE2_INDEX_SHIFT;
  102. exports.BITS_16 = 16;
  103. exports.BITS_32 = 32;
  104. var isHighSurrogate = function (c) { return c >= 0xd800 && c <= 0xdbff; };
  105. var equalInt = function (a, s, t, length) {
  106. for (var i = 0; i < length; i++) {
  107. if (a[s + i] !== a[t + i]) {
  108. return false;
  109. }
  110. }
  111. return true;
  112. };
  113. var TrieBuilder = /** @class */ (function () {
  114. function TrieBuilder(initialValue, errorValue) {
  115. if (initialValue === void 0) { initialValue = 0; }
  116. if (errorValue === void 0) { errorValue = 0; }
  117. this.initialValue = initialValue;
  118. this.errorValue = errorValue;
  119. this.highStart = 0x110000;
  120. this.data = new Uint32Array(UNEWTRIE2_INITIAL_DATA_LENGTH);
  121. this.dataCapacity = UNEWTRIE2_INITIAL_DATA_LENGTH;
  122. this.highStart = 0x110000;
  123. this.firstFreeBlock = 0; /* no free block in the list */
  124. this.isCompacted = false;
  125. this.index1 = new Uint32Array(UNEWTRIE2_INDEX_1_LENGTH);
  126. this.index2 = new Uint32Array(UNEWTRIE2_MAX_INDEX_2_LENGTH);
  127. /*
  128. * Multi-purpose per-data-block table.
  129. *
  130. * Before compacting:
  131. *
  132. * Per-data-block reference counters/free-block list.
  133. * 0: unused
  134. * >0: reference counter (number of index-2 entries pointing here)
  135. * <0: next free data block in free-block list
  136. *
  137. * While compacting:
  138. *
  139. * Map of adjusted indexes, used in compactData() and compactIndex2().
  140. * Maps from original indexes to new ones.
  141. */
  142. this.map = new Uint32Array(UNEWTRIE2_MAX_DATA_LENGTH >> Trie_1.UTRIE2_SHIFT_2);
  143. /*
  144. * preallocate and reset
  145. * - ASCII
  146. * - the bad-UTF-8-data block
  147. * - the null data block
  148. */
  149. var i, j;
  150. for (i = 0; i < 0x80; ++i) {
  151. this.data[i] = initialValue;
  152. }
  153. for (; i < 0xc0; ++i) {
  154. this.data[i] = errorValue;
  155. }
  156. for (i = UNEWTRIE2_DATA_NULL_OFFSET; i < UNEWTRIE2_DATA_START_OFFSET; ++i) {
  157. this.data[i] = initialValue;
  158. }
  159. this.dataNullOffset = UNEWTRIE2_DATA_NULL_OFFSET;
  160. this.dataLength = UNEWTRIE2_DATA_START_OFFSET;
  161. /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
  162. for (i = 0, j = 0; j < 0x80; ++i, j += Trie_1.UTRIE2_DATA_BLOCK_LENGTH) {
  163. this.index2[i] = j;
  164. this.map[i] = 1;
  165. }
  166. /* reference counts for the bad-UTF-8-data block */
  167. for (; j < 0xc0; ++i, j += Trie_1.UTRIE2_DATA_BLOCK_LENGTH) {
  168. this.map[i] = 0;
  169. }
  170. /*
  171. * Reference counts for the null data block: all blocks except for the ASCII blocks.
  172. * Plus 1 so that we don't drop this block during compaction.
  173. * Plus as many as needed for lead surrogate code points.
  174. */
  175. /* i==newTrie->dataNullOffset */
  176. this.map[i++] = (0x110000 >> Trie_1.UTRIE2_SHIFT_2) - (0x80 >> Trie_1.UTRIE2_SHIFT_2) + 1 + Trie_1.UTRIE2_LSCP_INDEX_2_LENGTH;
  177. j += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  178. for (; j < UNEWTRIE2_DATA_START_OFFSET; ++i, j += Trie_1.UTRIE2_DATA_BLOCK_LENGTH) {
  179. this.map[i] = 0;
  180. }
  181. /*
  182. * set the remaining indexes in the BMP index-2 block
  183. * to the null data block
  184. */
  185. for (i = 0x80 >> Trie_1.UTRIE2_SHIFT_2; i < Trie_1.UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
  186. this.index2[i] = UNEWTRIE2_DATA_NULL_OFFSET;
  187. }
  188. /*
  189. * Fill the index gap with impossible values so that compaction
  190. * does not overlap other index-2 blocks with the gap.
  191. */
  192. for (i = 0; i < UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
  193. this.index2[UNEWTRIE2_INDEX_GAP_OFFSET + i] = -1;
  194. }
  195. /* set the indexes in the null index-2 block */
  196. for (i = 0; i < Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
  197. this.index2[UNEWTRIE2_INDEX_2_NULL_OFFSET + i] = UNEWTRIE2_DATA_NULL_OFFSET;
  198. }
  199. this.index2NullOffset = UNEWTRIE2_INDEX_2_NULL_OFFSET;
  200. this.index2Length = UNEWTRIE2_INDEX_2_START_OFFSET;
  201. /* set the index-1 indexes for the linear index-2 block */
  202. for (i = 0, j = 0; i < Trie_1.UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; ++i, j += Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH) {
  203. this.index1[i] = j;
  204. }
  205. /* set the remaining index-1 indexes to the null index-2 block */
  206. for (; i < UNEWTRIE2_INDEX_1_LENGTH; ++i) {
  207. this.index1[i] = UNEWTRIE2_INDEX_2_NULL_OFFSET;
  208. }
  209. /*
  210. * Preallocate and reset data for U+0080..U+07ff,
  211. * for 2-byte UTF-8 which will be compacted in 64-blocks
  212. * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
  213. */
  214. for (i = 0x80; i < 0x800; i += Trie_1.UTRIE2_DATA_BLOCK_LENGTH) {
  215. this.set(i, initialValue);
  216. }
  217. }
  218. /**
  219. * Set a value for a code point.
  220. *
  221. * @param c the code point
  222. * @param value the value
  223. */
  224. TrieBuilder.prototype.set = function (c, value) {
  225. if (c < 0 || c > 0x10ffff) {
  226. throw new Error('Invalid code point.');
  227. }
  228. this._set(c, true, value);
  229. return this;
  230. };
  231. /**
  232. * Set a value in a range of code points [start..end].
  233. * All code points c with start<=c<=end will get the value if
  234. * overwrite is TRUE or if the old value is the initial value.
  235. *
  236. * @param start the first code point to get the value
  237. * @param end the last code point to get the value (inclusive)
  238. * @param value the value
  239. * @param overwrite flag for whether old non-initial values are to be overwritten
  240. */
  241. TrieBuilder.prototype.setRange = function (start, end, value, overwrite) {
  242. if (overwrite === void 0) { overwrite = false; }
  243. /*
  244. * repeat value in [start..end]
  245. * mark index values for repeat-data blocks by setting bit 31 of the index values
  246. * fill around existing values if any, if(overwrite)
  247. */
  248. var block, rest, repeatBlock;
  249. if (start > 0x10ffff || start < 0 || end > 0x10ffff || end < 0 || start > end) {
  250. throw new Error('Invalid code point range.');
  251. }
  252. if (!overwrite && value === this.initialValue) {
  253. return this; /* nothing to do */
  254. }
  255. if (this.isCompacted) {
  256. throw new Error('Trie was already compacted');
  257. }
  258. var limit = end + 1;
  259. if ((start & Trie_1.UTRIE2_DATA_MASK) !== 0) {
  260. /* set partial block at [start..following block boundary[ */
  261. block = this.getDataBlock(start, true);
  262. var nextStart = (start + Trie_1.UTRIE2_DATA_BLOCK_LENGTH) & ~Trie_1.UTRIE2_DATA_MASK;
  263. if (nextStart <= limit) {
  264. this.fillBlock(block, start & Trie_1.UTRIE2_DATA_MASK, Trie_1.UTRIE2_DATA_BLOCK_LENGTH, value, this.initialValue, overwrite);
  265. start = nextStart;
  266. }
  267. else {
  268. this.fillBlock(block, start & Trie_1.UTRIE2_DATA_MASK, limit & Trie_1.UTRIE2_DATA_MASK, value, this.initialValue, overwrite);
  269. return this;
  270. }
  271. }
  272. /* number of positions in the last, partial block */
  273. rest = limit & Trie_1.UTRIE2_DATA_MASK;
  274. /* round down limit to a block boundary */
  275. limit &= ~Trie_1.UTRIE2_DATA_MASK;
  276. /* iterate over all-value blocks */
  277. repeatBlock = value === this.initialValue ? this.dataNullOffset : -1;
  278. while (start < limit) {
  279. var i2 = void 0;
  280. var setRepeatBlock = false;
  281. if (value === this.initialValue && this.isInNullBlock(start, true)) {
  282. start += Trie_1.UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
  283. continue;
  284. }
  285. /* get index value */
  286. i2 = this.getIndex2Block(start, true);
  287. i2 += (start >> Trie_1.UTRIE2_SHIFT_2) & Trie_1.UTRIE2_INDEX_2_MASK;
  288. block = this.index2[i2];
  289. if (this.isWritableBlock(block)) {
  290. /* already allocated */
  291. if (overwrite && block >= UNEWTRIE2_DATA_0800_OFFSET) {
  292. /*
  293. * We overwrite all values, and it's not a
  294. * protected (ASCII-linear or 2-byte UTF-8) block:
  295. * replace with the repeatBlock.
  296. */
  297. setRepeatBlock = true;
  298. }
  299. else {
  300. /* !overwrite, or protected block: just write the values into this block */
  301. this.fillBlock(block, 0, Trie_1.UTRIE2_DATA_BLOCK_LENGTH, value, this.initialValue, overwrite);
  302. }
  303. }
  304. else if (this.data[block] !== value && (overwrite || block === this.dataNullOffset)) {
  305. /*
  306. * Set the repeatBlock instead of the null block or previous repeat block:
  307. *
  308. * If !isWritableBlock() then all entries in the block have the same value
  309. * because it's the null block or a range block (the repeatBlock from a previous
  310. * call to utrie2_setRange32()).
  311. * No other blocks are used multiple times before compacting.
  312. *
  313. * The null block is the only non-writable block with the initialValue because
  314. * of the repeatBlock initialization above. (If value==initialValue, then
  315. * the repeatBlock will be the null data block.)
  316. *
  317. * We set our repeatBlock if the desired value differs from the block's value,
  318. * and if we overwrite any data or if the data is all initial values
  319. * (which is the same as the block being the null block, see above).
  320. */
  321. setRepeatBlock = true;
  322. }
  323. if (setRepeatBlock) {
  324. if (repeatBlock >= 0) {
  325. this.setIndex2Entry(i2, repeatBlock);
  326. }
  327. else {
  328. /* create and set and fill the repeatBlock */
  329. repeatBlock = this.getDataBlock(start, true);
  330. this.writeBlock(repeatBlock, value);
  331. }
  332. }
  333. start += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  334. }
  335. if (rest > 0) {
  336. /* set partial block at [last block boundary..limit[ */
  337. block = this.getDataBlock(start, true);
  338. this.fillBlock(block, 0, rest, value, this.initialValue, overwrite);
  339. }
  340. return this;
  341. };
  342. /**
  343. * Get the value for a code point as stored in the Trie2.
  344. *
  345. * @param codePoint the code point
  346. * @return the value
  347. */
  348. TrieBuilder.prototype.get = function (codePoint) {
  349. if (codePoint < 0 || codePoint > 0x10ffff) {
  350. return this.errorValue;
  351. }
  352. else {
  353. return this._get(codePoint, true);
  354. }
  355. };
  356. TrieBuilder.prototype._get = function (c, fromLSCP) {
  357. var i2;
  358. if (c >= this.highStart && (!(c >= 0xd800 && c < 0xdc00) || fromLSCP)) {
  359. return this.data[this.dataLength - UTRIE2_DATA_GRANULARITY];
  360. }
  361. if (c >= 0xd800 && c < 0xdc00 && fromLSCP) {
  362. i2 = Trie_1.UTRIE2_LSCP_INDEX_2_OFFSET - (0xd800 >> Trie_1.UTRIE2_SHIFT_2) + (c >> Trie_1.UTRIE2_SHIFT_2);
  363. }
  364. else {
  365. i2 = this.index1[c >> Trie_1.UTRIE2_SHIFT_1] + ((c >> Trie_1.UTRIE2_SHIFT_2) & Trie_1.UTRIE2_INDEX_2_MASK);
  366. }
  367. var block = this.index2[i2];
  368. return this.data[block + (c & Trie_1.UTRIE2_DATA_MASK)];
  369. };
  370. TrieBuilder.prototype.freeze = function (valueBits) {
  371. if (valueBits === void 0) { valueBits = exports.BITS_32; }
  372. var i;
  373. var allIndexesLength;
  374. var dataMove; /* >0 if the data is moved to the end of the index array */
  375. /* compact if necessary */
  376. if (!this.isCompacted) {
  377. this.compactTrie();
  378. }
  379. allIndexesLength = this.highStart <= 0x10000 ? Trie_1.UTRIE2_INDEX_1_OFFSET : this.index2Length;
  380. if (valueBits === exports.BITS_16) {
  381. // dataMove = allIndexesLength;
  382. dataMove = 0;
  383. }
  384. else {
  385. dataMove = 0;
  386. }
  387. /* are indexLength and dataLength within limits? */
  388. if (
  389. /* for unshifted indexLength */
  390. allIndexesLength > UTRIE2_MAX_INDEX_LENGTH ||
  391. /* for unshifted dataNullOffset */
  392. dataMove + this.dataNullOffset > 0xffff ||
  393. /* for unshifted 2-byte UTF-8 index-2 values */
  394. dataMove + UNEWTRIE2_DATA_0800_OFFSET > 0xffff ||
  395. /* for shiftedDataLength */
  396. dataMove + this.dataLength > UTRIE2_MAX_DATA_LENGTH) {
  397. throw new Error('Trie data is too large.');
  398. }
  399. var index = new Uint16Array(allIndexesLength);
  400. /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
  401. var destIdx = 0;
  402. for (i = 0; i < Trie_1.UTRIE2_INDEX_2_BMP_LENGTH; i++) {
  403. index[destIdx++] = (this.index2[i] + dataMove) >> Trie_1.UTRIE2_INDEX_SHIFT;
  404. }
  405. /* write UTF-8 2-byte index-2 values, not right-shifted */
  406. for (i = 0; i < 0xc2 - 0xc0; ++i) {
  407. /* C0..C1 */
  408. index[destIdx++] = dataMove + UTRIE2_BAD_UTF8_DATA_OFFSET;
  409. }
  410. for (; i < 0xe0 - 0xc0; ++i) {
  411. /* C2..DF */
  412. index[destIdx++] = dataMove + this.index2[i << (6 - Trie_1.UTRIE2_SHIFT_2)];
  413. }
  414. if (this.highStart > 0x10000) {
  415. var index1Length = (this.highStart - 0x10000) >> Trie_1.UTRIE2_SHIFT_1;
  416. var index2Offset = Trie_1.UTRIE2_INDEX_2_BMP_LENGTH + Trie_1.UTRIE2_UTF8_2B_INDEX_2_LENGTH + index1Length;
  417. /* write 16-bit index-1 values for supplementary code points */
  418. for (i = 0; i < index1Length; i++) {
  419. index[destIdx++] = UTRIE2_INDEX_2_OFFSET + this.index1[i + Trie_1.UTRIE2_OMITTED_BMP_INDEX_1_LENGTH];
  420. }
  421. /*
  422. * write the index-2 array values for supplementary code points,
  423. * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
  424. */
  425. for (i = 0; i < this.index2Length - index2Offset; i++) {
  426. index[destIdx++] = (dataMove + this.index2[index2Offset + i]) >> Trie_1.UTRIE2_INDEX_SHIFT;
  427. }
  428. }
  429. /* write the 16/32-bit data array */
  430. switch (valueBits) {
  431. case exports.BITS_16:
  432. /* write 16-bit data values */
  433. var data16 = new Uint16Array(this.dataLength);
  434. for (i = 0; i < this.dataLength; i++) {
  435. data16[i] = this.data[i];
  436. }
  437. return new Trie_1.Trie(this.initialValue, this.errorValue, this.highStart, dataMove + this.dataLength - UTRIE2_DATA_GRANULARITY, index, data16);
  438. case exports.BITS_32:
  439. /* write 32-bit data values */
  440. var data32 = new Uint32Array(this.dataLength);
  441. for (i = 0; i < this.dataLength; i++) {
  442. data32[i] = this.data[i];
  443. }
  444. return new Trie_1.Trie(this.initialValue, this.errorValue, this.highStart, dataMove + this.dataLength - UTRIE2_DATA_GRANULARITY, index, data32);
  445. default:
  446. throw new Error('Bits should be either 16 or 32');
  447. }
  448. };
  449. /*
  450. * Find the start of the last range in the trie by enumerating backward.
  451. * Indexes for supplementary code points higher than this will be omitted.
  452. */
  453. TrieBuilder.prototype.findHighStart = function (highValue) {
  454. var value;
  455. var i2, j, i2Block, prevI2Block, block, prevBlock;
  456. /* set variables for previous range */
  457. if (highValue === this.initialValue) {
  458. prevI2Block = this.index2NullOffset;
  459. prevBlock = this.dataNullOffset;
  460. }
  461. else {
  462. prevI2Block = -1;
  463. prevBlock = -1;
  464. }
  465. var prev = 0x110000;
  466. /* enumerate index-2 blocks */
  467. var i1 = UNEWTRIE2_INDEX_1_LENGTH;
  468. var c = prev;
  469. while (c > 0) {
  470. i2Block = this.index1[--i1];
  471. if (i2Block === prevI2Block) {
  472. /* the index-2 block is the same as the previous one, and filled with highValue */
  473. c -= UTRIE2_CP_PER_INDEX_1_ENTRY;
  474. continue;
  475. }
  476. prevI2Block = i2Block;
  477. if (i2Block === this.index2NullOffset) {
  478. /* this is the null index-2 block */
  479. if (highValue !== this.initialValue) {
  480. return c;
  481. }
  482. c -= UTRIE2_CP_PER_INDEX_1_ENTRY;
  483. }
  484. else {
  485. /* enumerate data blocks for one index-2 block */
  486. for (i2 = Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH; i2 > 0;) {
  487. block = this.index2[i2Block + --i2];
  488. if (block === prevBlock) {
  489. /* the block is the same as the previous one, and filled with highValue */
  490. c -= Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  491. continue;
  492. }
  493. prevBlock = block;
  494. if (block === this.dataNullOffset) {
  495. /* this is the null data block */
  496. if (highValue !== this.initialValue) {
  497. return c;
  498. }
  499. c -= Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  500. }
  501. else {
  502. for (j = Trie_1.UTRIE2_DATA_BLOCK_LENGTH; j > 0;) {
  503. value = this.data[block + --j];
  504. if (value !== highValue) {
  505. return c;
  506. }
  507. --c;
  508. }
  509. }
  510. }
  511. }
  512. }
  513. /* deliver last range */
  514. return 0;
  515. };
  516. /*
  517. * Compact a build-time trie.
  518. *
  519. * The compaction
  520. * - removes blocks that are identical with earlier ones
  521. * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
  522. * - moves blocks in steps of the data granularity
  523. * - moves and overlaps blocks that overlap with multiple values in the overlap region
  524. *
  525. * It does not
  526. * - try to move and overlap blocks that are not already adjacent
  527. */
  528. TrieBuilder.prototype.compactData = function () {
  529. var start, movedStart;
  530. var blockLength, overlap;
  531. var i, mapIndex, blockCount;
  532. /* do not compact linear-ASCII data */
  533. var newStart = UTRIE2_DATA_START_OFFSET;
  534. for (start = 0, i = 0; start < newStart; start += Trie_1.UTRIE2_DATA_BLOCK_LENGTH, ++i) {
  535. this.map[i] = start;
  536. }
  537. /*
  538. * Start with a block length of 64 for 2-byte UTF-8,
  539. * then switch to UTRIE2_DATA_BLOCK_LENGTH.
  540. */
  541. blockLength = 64;
  542. blockCount = blockLength >> Trie_1.UTRIE2_SHIFT_2;
  543. for (start = newStart; start < this.dataLength;) {
  544. /*
  545. * start: index of first entry of current block
  546. * newStart: index where the current block is to be moved
  547. * (right after current end of already-compacted data)
  548. */
  549. if (start === UNEWTRIE2_DATA_0800_OFFSET) {
  550. blockLength = Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  551. blockCount = 1;
  552. }
  553. /* skip blocks that are not used */
  554. if (this.map[start >> Trie_1.UTRIE2_SHIFT_2] <= 0) {
  555. /* advance start to the next block */
  556. start += blockLength;
  557. /* leave newStart with the previous block! */
  558. continue;
  559. }
  560. /* search for an identical block */
  561. movedStart = this.findSameDataBlock(newStart, start, blockLength);
  562. if (movedStart >= 0) {
  563. /* found an identical block, set the other block's index value for the current block */
  564. for (i = blockCount, mapIndex = start >> Trie_1.UTRIE2_SHIFT_2; i > 0; --i) {
  565. this.map[mapIndex++] = movedStart;
  566. movedStart += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  567. }
  568. /* advance start to the next block */
  569. start += blockLength;
  570. /* leave newStart with the previous block! */
  571. continue;
  572. }
  573. /* see if the beginning of this block can be overlapped with the end of the previous block */
  574. /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
  575. for (overlap = blockLength - UTRIE2_DATA_GRANULARITY; overlap > 0 && !equalInt(this.data, newStart - overlap, start, overlap); overlap -= UTRIE2_DATA_GRANULARITY) { }
  576. if (overlap > 0 || newStart < start) {
  577. /* some overlap, or just move the whole block */
  578. movedStart = newStart - overlap;
  579. for (i = blockCount, mapIndex = start >> Trie_1.UTRIE2_SHIFT_2; i > 0; --i) {
  580. this.map[mapIndex++] = movedStart;
  581. movedStart += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  582. }
  583. /* move the non-overlapping indexes to their new positions */
  584. start += overlap;
  585. for (i = blockLength - overlap; i > 0; --i) {
  586. this.data[newStart++] = this.data[start++];
  587. }
  588. }
  589. else {
  590. /* no overlap && newStart==start */
  591. for (i = blockCount, mapIndex = start >> Trie_1.UTRIE2_SHIFT_2; i > 0; --i) {
  592. this.map[mapIndex++] = start;
  593. start += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  594. }
  595. newStart = start;
  596. }
  597. }
  598. /* now adjust the index-2 table */
  599. for (i = 0; i < this.index2Length; ++i) {
  600. if (i === UNEWTRIE2_INDEX_GAP_OFFSET) {
  601. /* Gap indexes are invalid (-1). Skip over the gap. */
  602. i += UNEWTRIE2_INDEX_GAP_LENGTH;
  603. }
  604. this.index2[i] = this.map[this.index2[i] >> Trie_1.UTRIE2_SHIFT_2];
  605. }
  606. this.dataNullOffset = this.map[this.dataNullOffset >> Trie_1.UTRIE2_SHIFT_2];
  607. /* ensure dataLength alignment */
  608. while ((newStart & (UTRIE2_DATA_GRANULARITY - 1)) !== 0) {
  609. this.data[newStart++] = this.initialValue;
  610. }
  611. this.dataLength = newStart;
  612. };
  613. TrieBuilder.prototype.findSameDataBlock = function (dataLength, otherBlock, blockLength) {
  614. var block = 0;
  615. /* ensure that we do not even partially get past dataLength */
  616. dataLength -= blockLength;
  617. for (; block <= dataLength; block += UTRIE2_DATA_GRANULARITY) {
  618. if (equalInt(this.data, block, otherBlock, blockLength)) {
  619. return block;
  620. }
  621. }
  622. return -1;
  623. };
  624. TrieBuilder.prototype.compactTrie = function () {
  625. var highValue = this.get(0x10ffff);
  626. /* find highStart and round it up */
  627. var localHighStart = this.findHighStart(highValue);
  628. localHighStart = (localHighStart + (UTRIE2_CP_PER_INDEX_1_ENTRY - 1)) & ~(UTRIE2_CP_PER_INDEX_1_ENTRY - 1);
  629. if (localHighStart === 0x110000) {
  630. highValue = this.errorValue;
  631. }
  632. /*
  633. * Set trie->highStart only after utrie2_get32(trie, highStart).
  634. * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
  635. */
  636. this.highStart = localHighStart;
  637. if (this.highStart < 0x110000) {
  638. /* Blank out [highStart..10ffff] to release associated data blocks. */
  639. var suppHighStart = this.highStart <= 0x10000 ? 0x10000 : this.highStart;
  640. this.setRange(suppHighStart, 0x10ffff, this.initialValue, true);
  641. }
  642. this.compactData();
  643. if (this.highStart > 0x10000) {
  644. this.compactIndex2();
  645. }
  646. /*
  647. * Store the highValue in the data array and round up the dataLength.
  648. * Must be done after compactData() because that assumes that dataLength
  649. * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
  650. */
  651. this.data[this.dataLength++] = highValue;
  652. while ((this.dataLength & (UTRIE2_DATA_GRANULARITY - 1)) !== 0) {
  653. this.data[this.dataLength++] = this.initialValue;
  654. }
  655. this.isCompacted = true;
  656. };
  657. TrieBuilder.prototype.compactIndex2 = function () {
  658. var i, start, movedStart, overlap;
  659. /* do not compact linear-BMP index-2 blocks */
  660. var newStart = Trie_1.UTRIE2_INDEX_2_BMP_LENGTH;
  661. for (start = 0, i = 0; start < newStart; start += Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
  662. this.map[i] = start;
  663. }
  664. /* Reduce the index table gap to what will be needed at runtime. */
  665. newStart += Trie_1.UTRIE2_UTF8_2B_INDEX_2_LENGTH + ((this.highStart - 0x10000) >> Trie_1.UTRIE2_SHIFT_1);
  666. for (start = UNEWTRIE2_INDEX_2_NULL_OFFSET; start < this.index2Length;) {
  667. /*
  668. * start: index of first entry of current block
  669. * newStart: index where the current block is to be moved
  670. * (right after current end of already-compacted data)
  671. */
  672. /* search for an identical block */
  673. if ((movedStart = this.findSameIndex2Block(newStart, start)) >= 0) {
  674. /* found an identical block, set the other block's index value for the current block */
  675. this.map[start >> Trie_1.UTRIE2_SHIFT_1_2] = movedStart;
  676. /* advance start to the next block */
  677. start += Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  678. /* leave newStart with the previous block! */
  679. continue;
  680. }
  681. /* see if the beginning of this block can be overlapped with the end of the previous block */
  682. /* look for maximum overlap with the previous, adjacent block */
  683. for (overlap = Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH - 1; overlap > 0 && !equalInt(this.index2, newStart - overlap, start, overlap); --overlap) { }
  684. if (overlap > 0 || newStart < start) {
  685. /* some overlap, or just move the whole block */
  686. this.map[start >> Trie_1.UTRIE2_SHIFT_1_2] = newStart - overlap;
  687. /* move the non-overlapping indexes to their new positions */
  688. start += overlap;
  689. for (i = Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH - overlap; i > 0; --i) {
  690. this.index2[newStart++] = this.index2[start++];
  691. }
  692. }
  693. else {
  694. /* no overlap && newStart==start */ this.map[start >> Trie_1.UTRIE2_SHIFT_1_2] = start;
  695. start += Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  696. newStart = start;
  697. }
  698. }
  699. /* now adjust the index-1 table */
  700. for (i = 0; i < UNEWTRIE2_INDEX_1_LENGTH; ++i) {
  701. this.index1[i] = this.map[this.index1[i] >> Trie_1.UTRIE2_SHIFT_1_2];
  702. }
  703. this.index2NullOffset = this.map[this.index2NullOffset >> Trie_1.UTRIE2_SHIFT_1_2];
  704. /*
  705. * Ensure data table alignment:
  706. * Needs to be granularity-aligned for 16-bit trie
  707. * (so that dataMove will be down-shiftable),
  708. * and 2-aligned for uint32_t data.
  709. */
  710. while ((newStart & ((UTRIE2_DATA_GRANULARITY - 1) | 1)) !== 0) {
  711. /* Arbitrary value: 0x3fffc not possible for real data. */
  712. this.index2[newStart++] = 0x0000ffff << Trie_1.UTRIE2_INDEX_SHIFT;
  713. }
  714. this.index2Length = newStart;
  715. };
  716. TrieBuilder.prototype.findSameIndex2Block = function (index2Length, otherBlock) {
  717. /* ensure that we do not even partially get past index2Length */
  718. index2Length -= Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  719. for (var block = 0; block <= index2Length; ++block) {
  720. if (equalInt(this.index2, block, otherBlock, Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH)) {
  721. return block;
  722. }
  723. }
  724. return -1;
  725. };
  726. TrieBuilder.prototype._set = function (c, forLSCP, value) {
  727. if (this.isCompacted) {
  728. throw new Error('Trie was already compacted');
  729. }
  730. var block = this.getDataBlock(c, forLSCP);
  731. this.data[block + (c & Trie_1.UTRIE2_DATA_MASK)] = value;
  732. return this;
  733. };
  734. TrieBuilder.prototype.writeBlock = function (block, value) {
  735. var limit = block + Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  736. while (block < limit) {
  737. this.data[block++] = value;
  738. }
  739. };
  740. TrieBuilder.prototype.isInNullBlock = function (c, forLSCP) {
  741. var i2 = isHighSurrogate(c) && forLSCP
  742. ? Trie_1.UTRIE2_LSCP_INDEX_2_OFFSET - (0xd800 >> Trie_1.UTRIE2_SHIFT_2) + (c >> Trie_1.UTRIE2_SHIFT_2)
  743. : this.index1[c >> Trie_1.UTRIE2_SHIFT_1] + ((c >> Trie_1.UTRIE2_SHIFT_2) & Trie_1.UTRIE2_INDEX_2_MASK);
  744. var block = this.index2[i2];
  745. return block === this.dataNullOffset;
  746. };
  747. TrieBuilder.prototype.fillBlock = function (block, start, limit, value, initialValue, overwrite) {
  748. var pLimit = block + limit;
  749. if (overwrite) {
  750. for (var i = block + start; i < pLimit; i++) {
  751. this.data[i] = value;
  752. }
  753. }
  754. else {
  755. for (var i = block + start; i < pLimit; i++) {
  756. if (this.data[i] === initialValue) {
  757. this.data[i] = value;
  758. }
  759. }
  760. }
  761. };
  762. TrieBuilder.prototype.setIndex2Entry = function (i2, block) {
  763. ++this.map[block >> Trie_1.UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */
  764. var oldBlock = this.index2[i2];
  765. if (0 === --this.map[oldBlock >> Trie_1.UTRIE2_SHIFT_2]) {
  766. this.releaseDataBlock(oldBlock);
  767. }
  768. this.index2[i2] = block;
  769. };
  770. TrieBuilder.prototype.releaseDataBlock = function (block) {
  771. /* put this block at the front of the free-block chain */
  772. this.map[block >> Trie_1.UTRIE2_SHIFT_2] = -this.firstFreeBlock;
  773. this.firstFreeBlock = block;
  774. };
  775. TrieBuilder.prototype.getDataBlock = function (c, forLSCP) {
  776. var i2 = this.getIndex2Block(c, forLSCP);
  777. i2 += (c >> Trie_1.UTRIE2_SHIFT_2) & Trie_1.UTRIE2_INDEX_2_MASK;
  778. var oldBlock = this.index2[i2];
  779. if (this.isWritableBlock(oldBlock)) {
  780. return oldBlock;
  781. }
  782. /* allocate a new data block */
  783. var newBlock = this.allocDataBlock(oldBlock);
  784. this.setIndex2Entry(i2, newBlock);
  785. return newBlock;
  786. };
  787. TrieBuilder.prototype.isWritableBlock = function (block) {
  788. return block !== this.dataNullOffset && 1 === this.map[block >> Trie_1.UTRIE2_SHIFT_2];
  789. };
  790. TrieBuilder.prototype.getIndex2Block = function (c, forLSCP) {
  791. if (c >= 0xd800 && c < 0xdc00 && forLSCP) {
  792. return Trie_1.UTRIE2_LSCP_INDEX_2_OFFSET;
  793. }
  794. var i1 = c >> Trie_1.UTRIE2_SHIFT_1;
  795. var i2 = this.index1[i1];
  796. if (i2 === this.index2NullOffset) {
  797. i2 = this.allocIndex2Block();
  798. this.index1[i1] = i2;
  799. }
  800. return i2;
  801. };
  802. TrieBuilder.prototype.allocDataBlock = function (copyBlock) {
  803. var newBlock;
  804. if (this.firstFreeBlock !== 0) {
  805. /* get the first free block */
  806. newBlock = this.firstFreeBlock;
  807. this.firstFreeBlock = -this.map[newBlock >> Trie_1.UTRIE2_SHIFT_2];
  808. }
  809. else {
  810. /* get a new block from the high end */
  811. newBlock = this.dataLength;
  812. var newTop = newBlock + Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  813. if (newTop > this.dataCapacity) {
  814. var capacity = void 0;
  815. /* out of memory in the data array */
  816. if (this.dataCapacity < UNEWTRIE2_MEDIUM_DATA_LENGTH) {
  817. capacity = UNEWTRIE2_MEDIUM_DATA_LENGTH;
  818. }
  819. else if (this.dataCapacity < UNEWTRIE2_MAX_DATA_LENGTH) {
  820. capacity = UNEWTRIE2_MAX_DATA_LENGTH;
  821. }
  822. else {
  823. /*
  824. * Should never occur.
  825. * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
  826. * or the code writes more values than should be possible.
  827. */
  828. throw new Error('Internal error in Trie creation.');
  829. }
  830. var newData = new Uint32Array(capacity);
  831. newData.set(this.data.subarray(0, this.dataLength));
  832. this.data = newData;
  833. this.dataCapacity = capacity;
  834. }
  835. this.dataLength = newTop;
  836. }
  837. this.data.set(this.data.subarray(copyBlock, copyBlock + Trie_1.UTRIE2_DATA_BLOCK_LENGTH), newBlock);
  838. this.map[newBlock >> Trie_1.UTRIE2_SHIFT_2] = 0;
  839. return newBlock;
  840. };
  841. TrieBuilder.prototype.allocIndex2Block = function () {
  842. var newBlock = this.index2Length;
  843. var newTop = newBlock + Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  844. if (newTop > this.index2.length) {
  845. throw new Error('Internal error in Trie creation.');
  846. /*
  847. * Should never occur.
  848. * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
  849. * or the code writes more values than should be possible.
  850. */
  851. }
  852. this.index2Length = newTop;
  853. this.index2.set(this.index2.subarray(this.index2NullOffset, this.index2NullOffset + Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH), newBlock);
  854. return newBlock;
  855. };
  856. return TrieBuilder;
  857. }());
  858. exports.TrieBuilder = TrieBuilder;
  859. exports.serializeBase64 = function (trie) {
  860. var index = trie.index;
  861. var data = trie.data;
  862. if (!(index instanceof Uint16Array) || !(data instanceof Uint16Array || data instanceof Uint32Array)) {
  863. throw new Error('TrieBuilder serializer only support TypedArrays');
  864. }
  865. var headerLength = Uint32Array.BYTES_PER_ELEMENT * 6;
  866. var bufferLength = headerLength + index.byteLength + data.byteLength;
  867. var buffer = new ArrayBuffer(Math.ceil(bufferLength / 4) * 4);
  868. var view32 = new Uint32Array(buffer);
  869. var view16 = new Uint16Array(buffer);
  870. view32[0] = trie.initialValue;
  871. view32[1] = trie.errorValue;
  872. view32[2] = trie.highStart;
  873. view32[3] = trie.highValueIndex;
  874. view32[4] = index.byteLength;
  875. // $FlowFixMe
  876. view32[5] = data.BYTES_PER_ELEMENT;
  877. view16.set(index, headerLength / Uint16Array.BYTES_PER_ELEMENT);
  878. if (data.BYTES_PER_ELEMENT === Uint16Array.BYTES_PER_ELEMENT) {
  879. view16.set(data, (headerLength + index.byteLength) / Uint16Array.BYTES_PER_ELEMENT);
  880. }
  881. else {
  882. view32.set(data, Math.ceil((headerLength + index.byteLength) / Uint32Array.BYTES_PER_ELEMENT));
  883. }
  884. return base64_arraybuffer_1.encode(buffer);
  885. };
  886. //# sourceMappingURL=TrieBuilder.js.map