Trie.js 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. var Util_1 = require("./Util");
  4. /** Shift size for getting the index-2 table offset. */
  5. exports.UTRIE2_SHIFT_2 = 5;
  6. /** Shift size for getting the index-1 table offset. */
  7. exports.UTRIE2_SHIFT_1 = 6 + 5;
  8. /**
  9. * Shift size for shifting left the index array values.
  10. * Increases possible data size with 16-bit index values at the cost
  11. * of compactability.
  12. * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
  13. */
  14. exports.UTRIE2_INDEX_SHIFT = 2;
  15. /**
  16. * Difference between the two shift sizes,
  17. * for getting an index-1 offset from an index-2 offset. 6=11-5
  18. */
  19. exports.UTRIE2_SHIFT_1_2 = exports.UTRIE2_SHIFT_1 - exports.UTRIE2_SHIFT_2;
  20. /**
  21. * The part of the index-2 table for U+D800..U+DBFF stores values for
  22. * lead surrogate code _units_ not code _points_.
  23. * Values for lead surrogate code _points_ are indexed with this portion of the table.
  24. * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
  25. */
  26. exports.UTRIE2_LSCP_INDEX_2_OFFSET = 0x10000 >> exports.UTRIE2_SHIFT_2;
  27. /** Number of entries in a data block. 32=0x20 */
  28. exports.UTRIE2_DATA_BLOCK_LENGTH = 1 << exports.UTRIE2_SHIFT_2;
  29. /** Mask for getting the lower bits for the in-data-block offset. */
  30. exports.UTRIE2_DATA_MASK = exports.UTRIE2_DATA_BLOCK_LENGTH - 1;
  31. exports.UTRIE2_LSCP_INDEX_2_LENGTH = 0x400 >> exports.UTRIE2_SHIFT_2;
  32. /** Count the lengths of both BMP pieces. 2080=0x820 */
  33. exports.UTRIE2_INDEX_2_BMP_LENGTH = exports.UTRIE2_LSCP_INDEX_2_OFFSET + exports.UTRIE2_LSCP_INDEX_2_LENGTH;
  34. /**
  35. * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
  36. * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
  37. */
  38. exports.UTRIE2_UTF8_2B_INDEX_2_OFFSET = exports.UTRIE2_INDEX_2_BMP_LENGTH;
  39. exports.UTRIE2_UTF8_2B_INDEX_2_LENGTH = 0x800 >> 6; /* U+0800 is the first code point after 2-byte UTF-8 */
  40. /**
  41. * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
  42. * Variable length, for code points up to highStart, where the last single-value range starts.
  43. * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
  44. * (For 0x100000 supplementary code points U+10000..U+10ffff.)
  45. *
  46. * The part of the index-2 table for supplementary code points starts
  47. * after this index-1 table.
  48. *
  49. * Both the index-1 table and the following part of the index-2 table
  50. * are omitted completely if there is only BMP data.
  51. */
  52. exports.UTRIE2_INDEX_1_OFFSET = exports.UTRIE2_UTF8_2B_INDEX_2_OFFSET + exports.UTRIE2_UTF8_2B_INDEX_2_LENGTH;
  53. /**
  54. * Number of index-1 entries for the BMP. 32=0x20
  55. * This part of the index-1 table is omitted from the serialized form.
  56. */
  57. exports.UTRIE2_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> exports.UTRIE2_SHIFT_1;
  58. /** Number of entries in an index-2 block. 64=0x40 */
  59. exports.UTRIE2_INDEX_2_BLOCK_LENGTH = 1 << exports.UTRIE2_SHIFT_1_2;
  60. /** Mask for getting the lower bits for the in-index-2-block offset. */
  61. exports.UTRIE2_INDEX_2_MASK = exports.UTRIE2_INDEX_2_BLOCK_LENGTH - 1;
  62. var slice16 = function (view, start, end) {
  63. if (view.slice) {
  64. return view.slice(start, end);
  65. }
  66. return new Uint16Array(Array.prototype.slice.call(view, start, end));
  67. };
  68. var slice32 = function (view, start, end) {
  69. if (view.slice) {
  70. return view.slice(start, end);
  71. }
  72. return new Uint32Array(Array.prototype.slice.call(view, start, end));
  73. };
  74. exports.createTrieFromBase64 = function (base64) {
  75. var buffer = Util_1.decode(base64);
  76. var view32 = Array.isArray(buffer) ? Util_1.polyUint32Array(buffer) : new Uint32Array(buffer);
  77. var view16 = Array.isArray(buffer) ? Util_1.polyUint16Array(buffer) : new Uint16Array(buffer);
  78. var headerLength = 24;
  79. var index = slice16(view16, headerLength / 2, view32[4] / 2);
  80. var data = view32[5] === 2
  81. ? slice16(view16, (headerLength + view32[4]) / 2)
  82. : slice32(view32, Math.ceil((headerLength + view32[4]) / 4));
  83. return new Trie(view32[0], view32[1], view32[2], view32[3], index, data);
  84. };
  85. var Trie = /** @class */ (function () {
  86. function Trie(initialValue, errorValue, highStart, highValueIndex, index, data) {
  87. this.initialValue = initialValue;
  88. this.errorValue = errorValue;
  89. this.highStart = highStart;
  90. this.highValueIndex = highValueIndex;
  91. this.index = index;
  92. this.data = data;
  93. }
  94. /**
  95. * Get the value for a code point as stored in the Trie.
  96. *
  97. * @param codePoint the code point
  98. * @return the value
  99. */
  100. Trie.prototype.get = function (codePoint) {
  101. var ix;
  102. if (codePoint >= 0) {
  103. if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
  104. // Ordinary BMP code point, excluding leading surrogates.
  105. // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index.
  106. // 16 bit data is stored in the index array itself.
  107. ix = this.index[codePoint >> exports.UTRIE2_SHIFT_2];
  108. ix = (ix << exports.UTRIE2_INDEX_SHIFT) + (codePoint & exports.UTRIE2_DATA_MASK);
  109. return this.data[ix];
  110. }
  111. if (codePoint <= 0xffff) {
  112. // Lead Surrogate Code Point. A Separate index section is stored for
  113. // lead surrogate code units and code points.
  114. // The main index has the code unit data.
  115. // For this function, we need the code point data.
  116. // Note: this expression could be refactored for slightly improved efficiency, but
  117. // surrogate code points will be so rare in practice that it's not worth it.
  118. ix = this.index[exports.UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> exports.UTRIE2_SHIFT_2)];
  119. ix = (ix << exports.UTRIE2_INDEX_SHIFT) + (codePoint & exports.UTRIE2_DATA_MASK);
  120. return this.data[ix];
  121. }
  122. if (codePoint < this.highStart) {
  123. // Supplemental code point, use two-level lookup.
  124. ix = exports.UTRIE2_INDEX_1_OFFSET - exports.UTRIE2_OMITTED_BMP_INDEX_1_LENGTH + (codePoint >> exports.UTRIE2_SHIFT_1);
  125. ix = this.index[ix];
  126. ix += (codePoint >> exports.UTRIE2_SHIFT_2) & exports.UTRIE2_INDEX_2_MASK;
  127. ix = this.index[ix];
  128. ix = (ix << exports.UTRIE2_INDEX_SHIFT) + (codePoint & exports.UTRIE2_DATA_MASK);
  129. return this.data[ix];
  130. }
  131. if (codePoint <= 0x10ffff) {
  132. return this.data[this.highValueIndex];
  133. }
  134. }
  135. // Fall through. The code point is outside of the legal range of 0..0x10ffff.
  136. return this.errorValue;
  137. };
  138. return Trie;
  139. }());
  140. exports.Trie = Trie;
  141. //# sourceMappingURL=Trie.js.map