LineBreak.js 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. /* @flow */
  2. 'use strict';
  3. Object.defineProperty(exports, "__esModule", { value: true });
  4. var Trie_1 = require("./Trie");
  5. var linebreak_trie_1 = require("./linebreak-trie");
  6. var Util_1 = require("./Util");
  7. exports.LETTER_NUMBER_MODIFIER = 50;
  8. // Non-tailorable Line Breaking Classes
  9. var BK = 1; // Cause a line break (after)
  10. var CR = 2; // Cause a line break (after), except between CR and LF
  11. var LF = 3; // Cause a line break (after)
  12. var CM = 4; // Prohibit a line break between the character and the preceding character
  13. var NL = 5; // Cause a line break (after)
  14. var SG = 6; // Do not occur in well-formed text
  15. var WJ = 7; // Prohibit line breaks before and after
  16. var ZW = 8; // Provide a break opportunity
  17. var GL = 9; // Prohibit line breaks before and after
  18. var SP = 10; // Enable indirect line breaks
  19. var ZWJ = 11; // Prohibit line breaks within joiner sequences
  20. // Break Opportunities
  21. var B2 = 12; // Provide a line break opportunity before and after the character
  22. var BA = 13; // Generally provide a line break opportunity after the character
  23. var BB = 14; // Generally provide a line break opportunity before the character
  24. var HY = 15; // Provide a line break opportunity after the character, except in numeric context
  25. var CB = 16; // Provide a line break opportunity contingent on additional information
  26. // Characters Prohibiting Certain Breaks
  27. var CL = 17; // Prohibit line breaks before
  28. var CP = 18; // Prohibit line breaks before
  29. var EX = 19; // Prohibit line breaks before
  30. var IN = 20; // Allow only indirect line breaks between pairs
  31. var NS = 21; // Allow only indirect line breaks before
  32. var OP = 22; // Prohibit line breaks after
  33. var QU = 23; // Act like they are both opening and closing
  34. // Numeric Context
  35. var IS = 24; // Prevent breaks after any and before numeric
  36. var NU = 25; // Form numeric expressions for line breaking purposes
  37. var PO = 26; // Do not break following a numeric expression
  38. var PR = 27; // Do not break in front of a numeric expression
  39. var SY = 28; // Prevent a break before; and allow a break after
  40. // Other Characters
  41. var AI = 29; // Act like AL when the resolvedEAW is N; otherwise; act as ID
  42. var AL = 30; // Are alphabetic characters or symbols that are used with alphabetic characters
  43. var CJ = 31; // Treat as NS or ID for strict or normal breaking.
  44. var EB = 32; // Do not break from following Emoji Modifier
  45. var EM = 33; // Do not break from preceding Emoji Base
  46. var H2 = 34; // Form Korean syllable blocks
  47. var H3 = 35; // Form Korean syllable blocks
  48. var HL = 36; // Do not break around a following hyphen; otherwise act as Alphabetic
  49. var ID = 37; // Break before or after; except in some numeric context
  50. var JL = 38; // Form Korean syllable blocks
  51. var JV = 39; // Form Korean syllable blocks
  52. var JT = 40; // Form Korean syllable blocks
  53. var RI = 41; // Keep pairs together. For pairs; break before and after other classes
  54. var SA = 42; // Provide a line break opportunity contingent on additional, language-specific context analysis
  55. var XX = 43; // Have as yet unknown line breaking behavior or unassigned code positions
  56. exports.classes = {
  57. BK: BK,
  58. CR: CR,
  59. LF: LF,
  60. CM: CM,
  61. NL: NL,
  62. SG: SG,
  63. WJ: WJ,
  64. ZW: ZW,
  65. GL: GL,
  66. SP: SP,
  67. ZWJ: ZWJ,
  68. B2: B2,
  69. BA: BA,
  70. BB: BB,
  71. HY: HY,
  72. CB: CB,
  73. CL: CL,
  74. CP: CP,
  75. EX: EX,
  76. IN: IN,
  77. NS: NS,
  78. OP: OP,
  79. QU: QU,
  80. IS: IS,
  81. NU: NU,
  82. PO: PO,
  83. PR: PR,
  84. SY: SY,
  85. AI: AI,
  86. AL: AL,
  87. CJ: CJ,
  88. EB: EB,
  89. EM: EM,
  90. H2: H2,
  91. H3: H3,
  92. HL: HL,
  93. ID: ID,
  94. JL: JL,
  95. JV: JV,
  96. JT: JT,
  97. RI: RI,
  98. SA: SA,
  99. XX: XX,
  100. };
  101. exports.BREAK_MANDATORY = '!';
  102. exports.BREAK_NOT_ALLOWED = '×';
  103. exports.BREAK_ALLOWED = '÷';
  104. exports.UnicodeTrie = Trie_1.createTrieFromBase64(linebreak_trie_1.base64);
  105. var ALPHABETICS = [AL, HL];
  106. var HARD_LINE_BREAKS = [BK, CR, LF, NL];
  107. var SPACE = [SP, ZW];
  108. var PREFIX_POSTFIX = [PR, PO];
  109. var LINE_BREAKS = HARD_LINE_BREAKS.concat(SPACE);
  110. var KOREAN_SYLLABLE_BLOCK = [JL, JV, JT, H2, H3];
  111. var HYPHEN = [HY, BA];
  112. exports.codePointsToCharacterClasses = function (codePoints, lineBreak) {
  113. if (lineBreak === void 0) { lineBreak = 'strict'; }
  114. var types = [];
  115. var indicies = [];
  116. var categories = [];
  117. codePoints.forEach(function (codePoint, index) {
  118. var classType = exports.UnicodeTrie.get(codePoint);
  119. if (classType > exports.LETTER_NUMBER_MODIFIER) {
  120. categories.push(true);
  121. classType -= exports.LETTER_NUMBER_MODIFIER;
  122. }
  123. else {
  124. categories.push(false);
  125. }
  126. if (['normal', 'auto', 'loose'].indexOf(lineBreak) !== -1) {
  127. // U+2010, – U+2013, 〜 U+301C, ゠ U+30A0
  128. if ([0x2010, 0x2013, 0x301c, 0x30a0].indexOf(codePoint) !== -1) {
  129. indicies.push(index);
  130. return types.push(CB);
  131. }
  132. }
  133. if (classType === CM || classType === ZWJ) {
  134. // LB10 Treat any remaining combining mark or ZWJ as AL.
  135. if (index === 0) {
  136. indicies.push(index);
  137. return types.push(AL);
  138. }
  139. // LB9 Do not break a combining character sequence; treat it as if it has the line breaking class of
  140. // the base character in all of the following rules. Treat ZWJ as if it were CM.
  141. var prev = types[index - 1];
  142. if (LINE_BREAKS.indexOf(prev) === -1) {
  143. indicies.push(indicies[index - 1]);
  144. return types.push(prev);
  145. }
  146. indicies.push(index);
  147. return types.push(AL);
  148. }
  149. indicies.push(index);
  150. if (classType === CJ) {
  151. return types.push(lineBreak === 'strict' ? NS : ID);
  152. }
  153. if (classType === SA) {
  154. return types.push(AL);
  155. }
  156. if (classType === AI) {
  157. return types.push(AL);
  158. }
  159. // For supplementary characters, a useful default is to treat characters in the range 10000..1FFFD as AL
  160. // and characters in the ranges 20000..2FFFD and 30000..3FFFD as ID, until the implementation can be revised
  161. // to take into account the actual line breaking properties for these characters.
  162. if (classType === XX) {
  163. if ((codePoint >= 0x20000 && codePoint <= 0x2fffd) || (codePoint >= 0x30000 && codePoint <= 0x3fffd)) {
  164. return types.push(ID);
  165. }
  166. else {
  167. return types.push(AL);
  168. }
  169. }
  170. types.push(classType);
  171. });
  172. return [indicies, types, categories];
  173. };
  174. var isAdjacentWithSpaceIgnored = function (a, b, currentIndex, classTypes) {
  175. var current = classTypes[currentIndex];
  176. if (Array.isArray(a) ? a.indexOf(current) !== -1 : a === current) {
  177. var i = currentIndex;
  178. while (i <= classTypes.length) {
  179. i++;
  180. var next = classTypes[i];
  181. if (next === b) {
  182. return true;
  183. }
  184. if (next !== SP) {
  185. break;
  186. }
  187. }
  188. }
  189. if (current === SP) {
  190. var i = currentIndex;
  191. while (i > 0) {
  192. i--;
  193. var prev = classTypes[i];
  194. if (Array.isArray(a) ? a.indexOf(prev) !== -1 : a === prev) {
  195. var n = currentIndex;
  196. while (n <= classTypes.length) {
  197. n++;
  198. var next = classTypes[n];
  199. if (next === b) {
  200. return true;
  201. }
  202. if (next !== SP) {
  203. break;
  204. }
  205. }
  206. }
  207. if (prev !== SP) {
  208. break;
  209. }
  210. }
  211. }
  212. return false;
  213. };
  214. var previousNonSpaceClassType = function (currentIndex, classTypes) {
  215. var i = currentIndex;
  216. while (i >= 0) {
  217. var type = classTypes[i];
  218. if (type === SP) {
  219. i--;
  220. }
  221. else {
  222. return type;
  223. }
  224. }
  225. return 0;
  226. };
  227. var _lineBreakAtIndex = function (codePoints, classTypes, indicies, index, forbiddenBreaks) {
  228. if (indicies[index] === 0) {
  229. return exports.BREAK_NOT_ALLOWED;
  230. }
  231. var currentIndex = index - 1;
  232. if (Array.isArray(forbiddenBreaks) && forbiddenBreaks[currentIndex] === true) {
  233. return exports.BREAK_NOT_ALLOWED;
  234. }
  235. var beforeIndex = currentIndex - 1;
  236. var afterIndex = currentIndex + 1;
  237. var current = classTypes[currentIndex];
  238. // LB4 Always break after hard line breaks.
  239. // LB5 Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
  240. var before = beforeIndex >= 0 ? classTypes[beforeIndex] : 0;
  241. var next = classTypes[afterIndex];
  242. if (current === CR && next === LF) {
  243. return exports.BREAK_NOT_ALLOWED;
  244. }
  245. if (HARD_LINE_BREAKS.indexOf(current) !== -1) {
  246. return exports.BREAK_MANDATORY;
  247. }
  248. // LB6 Do not break before hard line breaks.
  249. if (HARD_LINE_BREAKS.indexOf(next) !== -1) {
  250. return exports.BREAK_NOT_ALLOWED;
  251. }
  252. // LB7 Do not break before spaces or zero width space.
  253. if (SPACE.indexOf(next) !== -1) {
  254. return exports.BREAK_NOT_ALLOWED;
  255. }
  256. // LB8 Break before any character following a zero-width space, even if one or more spaces intervene.
  257. if (previousNonSpaceClassType(currentIndex, classTypes) === ZW) {
  258. return exports.BREAK_ALLOWED;
  259. }
  260. // LB8a Do not break between a zero width joiner and an ideograph, emoji base or emoji modifier.
  261. if (exports.UnicodeTrie.get(codePoints[currentIndex]) === ZWJ && (next === ID || next === EB || next === EM)) {
  262. return exports.BREAK_NOT_ALLOWED;
  263. }
  264. // LB11 Do not break before or after Word joiner and related characters.
  265. if (current === WJ || next === WJ) {
  266. return exports.BREAK_NOT_ALLOWED;
  267. }
  268. // LB12 Do not break after NBSP and related characters.
  269. if (current === GL) {
  270. return exports.BREAK_NOT_ALLOWED;
  271. }
  272. // LB12a Do not break before NBSP and related characters, except after spaces and hyphens.
  273. if ([SP, BA, HY].indexOf(current) === -1 && next === GL) {
  274. return exports.BREAK_NOT_ALLOWED;
  275. }
  276. // LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
  277. if ([CL, CP, EX, IS, SY].indexOf(next) !== -1) {
  278. return exports.BREAK_NOT_ALLOWED;
  279. }
  280. // LB14 Do not break after ‘[’, even after spaces.
  281. if (previousNonSpaceClassType(currentIndex, classTypes) === OP) {
  282. return exports.BREAK_NOT_ALLOWED;
  283. }
  284. // LB15 Do not break within ‘”[’, even with intervening spaces.
  285. if (isAdjacentWithSpaceIgnored(QU, OP, currentIndex, classTypes)) {
  286. return exports.BREAK_NOT_ALLOWED;
  287. }
  288. // LB16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces.
  289. if (isAdjacentWithSpaceIgnored([CL, CP], NS, currentIndex, classTypes)) {
  290. return exports.BREAK_NOT_ALLOWED;
  291. }
  292. // LB17 Do not break within ‘——’, even with intervening spaces.
  293. if (isAdjacentWithSpaceIgnored(B2, B2, currentIndex, classTypes)) {
  294. return exports.BREAK_NOT_ALLOWED;
  295. }
  296. // LB18 Break after spaces.
  297. if (current === SP) {
  298. return exports.BREAK_ALLOWED;
  299. }
  300. // LB19 Do not break before or after quotation marks, such as ‘ ” ’.
  301. if (current === QU || next === QU) {
  302. return exports.BREAK_NOT_ALLOWED;
  303. }
  304. // LB20 Break before and after unresolved CB.
  305. if (next === CB || current === CB) {
  306. return exports.BREAK_ALLOWED;
  307. }
  308. // LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana, and other non-starters, or after acute accents.
  309. if ([BA, HY, NS].indexOf(next) !== -1 || current === BB) {
  310. return exports.BREAK_NOT_ALLOWED;
  311. }
  312. // LB21a Don't break after Hebrew + Hyphen.
  313. if (before === HL && HYPHEN.indexOf(current) !== -1) {
  314. return exports.BREAK_NOT_ALLOWED;
  315. }
  316. // LB21b Don’t break between Solidus and Hebrew letters.
  317. if (current === SY && next === HL) {
  318. return exports.BREAK_NOT_ALLOWED;
  319. }
  320. // LB22 Do not break between two ellipses, or between letters, numbers or exclamations and ellipsis.
  321. if (next === IN && ALPHABETICS.concat(IN, EX, NU, ID, EB, EM).indexOf(current) !== -1) {
  322. return exports.BREAK_NOT_ALLOWED;
  323. }
  324. // LB23 Do not break between digits and letters.
  325. if ((ALPHABETICS.indexOf(next) !== -1 && current === NU) || (ALPHABETICS.indexOf(current) !== -1 && next === NU)) {
  326. return exports.BREAK_NOT_ALLOWED;
  327. }
  328. // LB23a Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
  329. if ((current === PR && [ID, EB, EM].indexOf(next) !== -1) ||
  330. ([ID, EB, EM].indexOf(current) !== -1 && next === PO)) {
  331. return exports.BREAK_NOT_ALLOWED;
  332. }
  333. // LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix.
  334. if ((ALPHABETICS.indexOf(current) !== -1 && PREFIX_POSTFIX.indexOf(next) !== -1) ||
  335. (PREFIX_POSTFIX.indexOf(current) !== -1 && ALPHABETICS.indexOf(next) !== -1)) {
  336. return exports.BREAK_NOT_ALLOWED;
  337. }
  338. // LB25 Do not break between the following pairs of classes relevant to numbers:
  339. if (
  340. // (PR | PO) × ( OP | HY )? NU
  341. ([PR, PO].indexOf(current) !== -1 &&
  342. (next === NU || ([OP, HY].indexOf(next) !== -1 && classTypes[afterIndex + 1] === NU))) ||
  343. // ( OP | HY ) × NU
  344. ([OP, HY].indexOf(current) !== -1 && next === NU) ||
  345. // NU × (NU | SY | IS)
  346. (current === NU && [NU, SY, IS].indexOf(next) !== -1)) {
  347. return exports.BREAK_NOT_ALLOWED;
  348. }
  349. // NU (NU | SY | IS)* × (NU | SY | IS | CL | CP)
  350. if ([NU, SY, IS, CL, CP].indexOf(next) !== -1) {
  351. var prevIndex = currentIndex;
  352. while (prevIndex >= 0) {
  353. var type = classTypes[prevIndex];
  354. if (type === NU) {
  355. return exports.BREAK_NOT_ALLOWED;
  356. }
  357. else if ([SY, IS].indexOf(type) !== -1) {
  358. prevIndex--;
  359. }
  360. else {
  361. break;
  362. }
  363. }
  364. }
  365. // NU (NU | SY | IS)* (CL | CP)? × (PO | PR))
  366. if ([PR, PO].indexOf(next) !== -1) {
  367. var prevIndex = [CL, CP].indexOf(current) !== -1 ? beforeIndex : currentIndex;
  368. while (prevIndex >= 0) {
  369. var type = classTypes[prevIndex];
  370. if (type === NU) {
  371. return exports.BREAK_NOT_ALLOWED;
  372. }
  373. else if ([SY, IS].indexOf(type) !== -1) {
  374. prevIndex--;
  375. }
  376. else {
  377. break;
  378. }
  379. }
  380. }
  381. // LB26 Do not break a Korean syllable.
  382. if ((JL === current && [JL, JV, H2, H3].indexOf(next) !== -1) ||
  383. ([JV, H2].indexOf(current) !== -1 && [JV, JT].indexOf(next) !== -1) ||
  384. ([JT, H3].indexOf(current) !== -1 && next === JT)) {
  385. return exports.BREAK_NOT_ALLOWED;
  386. }
  387. // LB27 Treat a Korean Syllable Block the same as ID.
  388. if ((KOREAN_SYLLABLE_BLOCK.indexOf(current) !== -1 && [IN, PO].indexOf(next) !== -1) ||
  389. (KOREAN_SYLLABLE_BLOCK.indexOf(next) !== -1 && current === PR)) {
  390. return exports.BREAK_NOT_ALLOWED;
  391. }
  392. // LB28 Do not break between alphabetics (“at”).
  393. if (ALPHABETICS.indexOf(current) !== -1 && ALPHABETICS.indexOf(next) !== -1) {
  394. return exports.BREAK_NOT_ALLOWED;
  395. }
  396. // LB29 Do not break between numeric punctuation and alphabetics (“e.g.”).
  397. if (current === IS && ALPHABETICS.indexOf(next) !== -1) {
  398. return exports.BREAK_NOT_ALLOWED;
  399. }
  400. // LB30 Do not break between letters, numbers, or ordinary symbols and opening or closing parentheses.
  401. if ((ALPHABETICS.concat(NU).indexOf(current) !== -1 && next === OP) ||
  402. (ALPHABETICS.concat(NU).indexOf(next) !== -1 && current === CP)) {
  403. return exports.BREAK_NOT_ALLOWED;
  404. }
  405. // LB30a Break between two regional indicator symbols if and only if there are an even number of regional
  406. // indicators preceding the position of the break.
  407. if (current === RI && next === RI) {
  408. var i = indicies[currentIndex];
  409. var count = 1;
  410. while (i > 0) {
  411. i--;
  412. if (classTypes[i] === RI) {
  413. count++;
  414. }
  415. else {
  416. break;
  417. }
  418. }
  419. if (count % 2 !== 0) {
  420. return exports.BREAK_NOT_ALLOWED;
  421. }
  422. }
  423. // LB30b Do not break between an emoji base and an emoji modifier.
  424. if (current === EB && next === EM) {
  425. return exports.BREAK_NOT_ALLOWED;
  426. }
  427. return exports.BREAK_ALLOWED;
  428. };
  429. exports.lineBreakAtIndex = function (codePoints, index) {
  430. // LB2 Never break at the start of text.
  431. if (index === 0) {
  432. return exports.BREAK_NOT_ALLOWED;
  433. }
  434. // LB3 Always break at the end of text.
  435. if (index >= codePoints.length) {
  436. return exports.BREAK_MANDATORY;
  437. }
  438. var _a = exports.codePointsToCharacterClasses(codePoints), indicies = _a[0], classTypes = _a[1];
  439. return _lineBreakAtIndex(codePoints, classTypes, indicies, index);
  440. };
  441. var cssFormattedClasses = function (codePoints, options) {
  442. if (!options) {
  443. options = { lineBreak: 'normal', wordBreak: 'normal' };
  444. }
  445. var _a = exports.codePointsToCharacterClasses(codePoints, options.lineBreak), indicies = _a[0], classTypes = _a[1], isLetterNumber = _a[2];
  446. if (options.wordBreak === 'break-all' || options.wordBreak === 'break-word') {
  447. classTypes = classTypes.map(function (type) { return ([NU, AL, SA].indexOf(type) !== -1 ? ID : type); });
  448. }
  449. var forbiddenBreakpoints = options.wordBreak === 'keep-all'
  450. ? isLetterNumber.map(function (letterNumber, i) {
  451. return letterNumber && codePoints[i] >= 0x4e00 && codePoints[i] <= 0x9fff;
  452. })
  453. : undefined;
  454. return [indicies, classTypes, forbiddenBreakpoints];
  455. };
  456. exports.inlineBreakOpportunities = function (str, options) {
  457. var codePoints = Util_1.toCodePoints(str);
  458. var output = exports.BREAK_NOT_ALLOWED;
  459. var _a = cssFormattedClasses(codePoints, options), indicies = _a[0], classTypes = _a[1], forbiddenBreakpoints = _a[2];
  460. codePoints.forEach(function (codePoint, i) {
  461. output +=
  462. Util_1.fromCodePoint(codePoint) +
  463. (i >= codePoints.length - 1
  464. ? exports.BREAK_MANDATORY
  465. : _lineBreakAtIndex(codePoints, classTypes, indicies, i + 1, forbiddenBreakpoints));
  466. });
  467. return output;
  468. };
  469. var Break = /** @class */ (function () {
  470. function Break(codePoints, lineBreak, start, end) {
  471. this.codePoints = codePoints;
  472. this.required = lineBreak === exports.BREAK_MANDATORY;
  473. this.start = start;
  474. this.end = end;
  475. }
  476. Break.prototype.slice = function () {
  477. return Util_1.fromCodePoint.apply(void 0, this.codePoints.slice(this.start, this.end));
  478. };
  479. return Break;
  480. }());
  481. exports.LineBreaker = function (str, options) {
  482. var codePoints = Util_1.toCodePoints(str);
  483. var _a = cssFormattedClasses(codePoints, options), indicies = _a[0], classTypes = _a[1], forbiddenBreakpoints = _a[2];
  484. var length = codePoints.length;
  485. var lastEnd = 0;
  486. var nextIndex = 0;
  487. return {
  488. next: function () {
  489. if (nextIndex >= length) {
  490. return { done: true, value: null };
  491. }
  492. var lineBreak = exports.BREAK_NOT_ALLOWED;
  493. while (nextIndex < length &&
  494. (lineBreak = _lineBreakAtIndex(codePoints, classTypes, indicies, ++nextIndex, forbiddenBreakpoints)) ===
  495. exports.BREAK_NOT_ALLOWED) { }
  496. if (lineBreak !== exports.BREAK_NOT_ALLOWED || nextIndex === length) {
  497. var value = new Break(codePoints, lineBreak, lastEnd, nextIndex);
  498. lastEnd = nextIndex;
  499. return { value: value, done: false };
  500. }
  501. return { done: true, value: null };
  502. },
  503. };
  504. };
  505. //# sourceMappingURL=LineBreak.js.map