741 lines
22 KiB
TypeScript
741 lines
22 KiB
TypeScript
import { isDevEnv, isTestEnv } from "@excalidraw/common";
|
||
|
||
import { charWidth, getLineWidth } from "./textMeasurements";
|
||
|
||
import type { FontString } from "./types";
|
||
|
||
/**
|
||
* This module approximates browser-like soft wrapping for Excalidraw text.
|
||
*
|
||
* The flow is:
|
||
* 1. `parseTokens()` splits a hard line into breakable tokens using a unicode-aware regex.
|
||
* 2. `getWrappedTextLines()` reflows each hard line into one or more visual lines and
|
||
* records where each visual line came from in the source text.
|
||
* 3. `wrapLine()` assembles tokens into lines, and `wrapWord()` handles a single token
|
||
* that is wider than the available width.
|
||
* 4. `trimLine()` / `trimLineEndAtSoftBreak()` mirror browser behavior around trailing
|
||
* whitespace so the rendered text stays consistent with what users see on canvas.
|
||
*
|
||
* Mostly, you'll want to use wrapText(). getWrappedTextLines() is for callers
|
||
* that need metadata such as mapping visual lines back to `originalText`
|
||
* for caret placement or future editor features.
|
||
*/
|
||
let cachedCjkRegex: RegExp | undefined;
|
||
let cachedLineBreakRegex: RegExp | undefined;
|
||
let cachedEmojiRegex: RegExp | undefined;
|
||
|
||
/**
|
||
* Test if a given text contains any CJK characters (including symbols, punctuation, etc,).
|
||
*/
|
||
export const containsCJK = (text: string) => {
|
||
if (!cachedCjkRegex) {
|
||
cachedCjkRegex = Regex.class(...Object.values(CJK));
|
||
}
|
||
|
||
return cachedCjkRegex.test(text);
|
||
};
|
||
|
||
const getLineBreakRegex = () => {
|
||
if (!cachedLineBreakRegex) {
|
||
try {
|
||
cachedLineBreakRegex = getLineBreakRegexAdvanced();
|
||
} catch {
|
||
cachedLineBreakRegex = getLineBreakRegexSimple();
|
||
}
|
||
}
|
||
|
||
return cachedLineBreakRegex;
|
||
};
|
||
|
||
const getEmojiRegex = () => {
|
||
if (!cachedEmojiRegex) {
|
||
cachedEmojiRegex = getEmojiRegexUnicode();
|
||
}
|
||
|
||
return cachedEmojiRegex;
|
||
};
|
||
|
||
/**
|
||
* Common symbols used across different languages.
|
||
*/
|
||
const COMMON = {
|
||
/**
|
||
* Natural breaking points for any grammars.
|
||
*
|
||
* Hello world
|
||
* ↑ BREAK ALWAYS " " → ["Hello", " ", "world"]
|
||
* Hello-world
|
||
* ↑ BREAK AFTER "-" → ["Hello-", "world"]
|
||
*/
|
||
WHITESPACE: /\s/u,
|
||
HYPHEN: /-/u,
|
||
/**
|
||
* Generally do not break, unless closed symbol is followed by an opening symbol.
|
||
*
|
||
* Also, western punctation is often used in modern Korean and expects to be treated
|
||
* similarly to the CJK opening and closing symbols.
|
||
*
|
||
* Hello(한글)→ ["Hello", "(한", "글)"]
|
||
* ↑ BREAK BEFORE "("
|
||
* ↑ BREAK AFTER ")"
|
||
*/
|
||
OPENING: /<\(\[\{/u,
|
||
CLOSING: />\)\]\}.,:;!\?…\//u,
|
||
};
|
||
|
||
/**
|
||
* Characters and symbols used in Chinese, Japanese and Korean.
|
||
*/
|
||
const CJK = {
|
||
/**
|
||
* Every CJK breaks before and after, unless it's paired with an opening or closing symbol.
|
||
*
|
||
* Does not include every possible char used in CJK texts, such as currency, parentheses or punctuation.
|
||
*/
|
||
CHAR: /\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}`'^〃〰〆#&*+-ー/\=|¦〒¬ ̄/u,
|
||
/**
|
||
* Opening and closing CJK punctuation breaks before and after all such characters (in case of many),
|
||
* and creates pairs with neighboring characters.
|
||
*
|
||
* Hello た。→ ["Hello", "た。"]
|
||
* ↑ DON'T BREAK "た。"
|
||
* * Hello「た」 World → ["Hello", "「た」", "World"]
|
||
* ↑ DON'T BREAK "「た"
|
||
* ↑ DON'T BREAK "た"
|
||
* ↑ BREAK BEFORE "「"
|
||
* ↑ BREAK AFTER "」"
|
||
*/
|
||
// eslint-disable-next-line prettier/prettier
|
||
OPENING:/([{〈《⦅「「『【〖〔〘〚<〝/u,
|
||
CLOSING: /)]}〉》⦆」」』】〗〕〙〛>。.,、〟‥?!:;・〜〞/u,
|
||
/**
|
||
* Currency symbols break before, not after
|
||
*
|
||
* Price¥100 → ["Price", "¥100"]
|
||
* ↑ BREAK BEFORE "¥"
|
||
*/
|
||
CURRENCY: /¥₩£¢$/u,
|
||
};
|
||
|
||
const EMOJI = {
|
||
FLAG: /\p{RI}\p{RI}/u,
|
||
JOINER:
|
||
/(?:\p{Emoji_Modifier}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?/u,
|
||
ZWJ: /\u200D/u,
|
||
ANY: /[\p{Emoji}]/u,
|
||
MOST: /[\p{Extended_Pictographic}\p{Emoji_Presentation}]/u,
|
||
};
|
||
|
||
/**
|
||
* Simple fallback for browsers (mainly Safari < 16.4) that don't support "Lookbehind assertion".
|
||
*
|
||
* Browser support as of 10/2024:
|
||
* - 91% Lookbehind assertion https://caniuse.com/mdn-javascript_regular_expressions_lookbehind_assertion
|
||
* - 94% Unicode character class escape https://caniuse.com/mdn-javascript_regular_expressions_unicode_character_class_escape
|
||
*
|
||
* Does not include advanced CJK breaking rules, but covers most of the core cases, especially for latin.
|
||
*/
|
||
const getLineBreakRegexSimple = () =>
|
||
Regex.or(
|
||
getEmojiRegex(),
|
||
Break.On(COMMON.HYPHEN, COMMON.WHITESPACE, CJK.CHAR),
|
||
);
|
||
|
||
/**
|
||
* Specifies the line breaking rules based for alphabetic-based languages,
|
||
* Chinese, Japanese, Korean and Emojis.
|
||
*
|
||
* "Hello-world" → ["Hello-", "world"]
|
||
* "Hello 「世界。」🌎🗺" → ["Hello", " ", "「世", "界。」", "🌎", "🗺"]
|
||
*/
|
||
const getLineBreakRegexAdvanced = () =>
|
||
Regex.or(
|
||
// Unicode-defined regex for (multi-codepoint) Emojis
|
||
getEmojiRegex(),
|
||
// Rules for whitespace and hyphen
|
||
Break.Before(COMMON.WHITESPACE).Build(),
|
||
Break.After(COMMON.WHITESPACE, COMMON.HYPHEN).Build(),
|
||
// Rules for CJK (chars, symbols, currency)
|
||
Break.Before(CJK.CHAR, CJK.CURRENCY)
|
||
.NotPrecededBy(COMMON.OPENING, CJK.OPENING)
|
||
.Build(),
|
||
Break.After(CJK.CHAR)
|
||
.NotFollowedBy(COMMON.HYPHEN, COMMON.CLOSING, CJK.CLOSING)
|
||
.Build(),
|
||
// Rules for opening and closing punctuation
|
||
Break.BeforeMany(CJK.OPENING).NotPrecededBy(COMMON.OPENING).Build(),
|
||
Break.AfterMany(CJK.CLOSING).NotFollowedBy(COMMON.CLOSING).Build(),
|
||
Break.AfterMany(COMMON.CLOSING).FollowedBy(COMMON.OPENING).Build(),
|
||
);
|
||
|
||
/**
|
||
* Matches various emoji types.
|
||
*
|
||
* 1. basic emojis (😀, 🌍)
|
||
* 2. flags (🇨🇿)
|
||
* 3. multi-codepoint emojis:
|
||
* - skin tones (👍🏽)
|
||
* - variation selectors (☂️)
|
||
* - keycaps (1️⃣)
|
||
* - tag sequences (🏴)
|
||
* - emoji sequences (👨👩👧👦, 👩🚀, 🏳️🌈)
|
||
*
|
||
* Unicode points:
|
||
* - \uFE0F: presentation selector
|
||
* - \u20E3: enclosing keycap
|
||
* - \u200D: zero width joiner
|
||
* - \u{E0020}-\u{E007E}: tags
|
||
* - \u{E007F}: cancel tag
|
||
*
|
||
* @see https://unicode.org/reports/tr51/#EBNF_and_Regex, with changes:
|
||
* - replaced \p{Emoji} with [\p{Extended_Pictographic}\p{Emoji_Presentation}], see more in `should tokenize emojis mixed with mixed text` test
|
||
* - replaced \p{Emod} with \p{Emoji_Modifier} as some engines do not understand the abbreviation (i.e. https://devina.io/redos-checker)
|
||
*/
|
||
const getEmojiRegexUnicode = () =>
|
||
Regex.group(
|
||
Regex.or(
|
||
EMOJI.FLAG,
|
||
Regex.and(
|
||
EMOJI.MOST,
|
||
EMOJI.JOINER,
|
||
Regex.build(
|
||
`(?:${EMOJI.ZWJ.source}(?:${EMOJI.FLAG.source}|${EMOJI.ANY.source}${EMOJI.JOINER.source}))*`,
|
||
),
|
||
),
|
||
),
|
||
);
|
||
|
||
/**
|
||
* Regex utilities for unicode character classes.
|
||
*/
|
||
const Regex = {
|
||
/**
|
||
* Builds a regex from a string.
|
||
*/
|
||
build: (regex: string): RegExp => new RegExp(regex, "u"),
|
||
/**
|
||
* Joins regexes into a single string.
|
||
*/
|
||
join: (...regexes: RegExp[]): string => regexes.map((x) => x.source).join(""),
|
||
/**
|
||
* Joins regexes into a single regex as with "and" operator.
|
||
*/
|
||
and: (...regexes: RegExp[]): RegExp => Regex.build(Regex.join(...regexes)),
|
||
/**
|
||
* Joins regexes into a single regex with "or" operator.
|
||
*/
|
||
or: (...regexes: RegExp[]): RegExp =>
|
||
Regex.build(regexes.map((x) => x.source).join("|")),
|
||
/**
|
||
* Puts regexes into a matching group.
|
||
*/
|
||
group: (...regexes: RegExp[]): RegExp =>
|
||
Regex.build(`(${Regex.join(...regexes)})`),
|
||
/**
|
||
* Puts regexes into a character class.
|
||
*/
|
||
class: (...regexes: RegExp[]): RegExp =>
|
||
Regex.build(`[${Regex.join(...regexes)}]`),
|
||
};
|
||
|
||
/**
|
||
* Human-readable lookahead and lookbehind utilities for defining line break
|
||
* opportunities between pairs of character classes.
|
||
*/
|
||
const Break = {
|
||
/**
|
||
* Break on the given class of characters.
|
||
*/
|
||
On: (...regexes: RegExp[]) => {
|
||
const joined = Regex.join(...regexes);
|
||
return Regex.build(`([${joined}])`);
|
||
},
|
||
/**
|
||
* Break before the given class of characters.
|
||
*/
|
||
Before: (...regexes: RegExp[]) => {
|
||
const joined = Regex.join(...regexes);
|
||
const builder = () => Regex.build(`(?=[${joined}])`);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"FollowedBy"
|
||
>;
|
||
},
|
||
/**
|
||
* Break after the given class of characters.
|
||
*/
|
||
After: (...regexes: RegExp[]) => {
|
||
const joined = Regex.join(...regexes);
|
||
const builder = () => Regex.build(`(?<=[${joined}])`);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"PreceededBy"
|
||
>;
|
||
},
|
||
/**
|
||
* Break before one or multiple characters of the same class.
|
||
*/
|
||
BeforeMany: (...regexes: RegExp[]) => {
|
||
const joined = Regex.join(...regexes);
|
||
const builder = () => Regex.build(`(?<![${joined}])(?=[${joined}])`);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"FollowedBy"
|
||
>;
|
||
},
|
||
/**
|
||
* Break after one or multiple character from the same class.
|
||
*/
|
||
AfterMany: (...regexes: RegExp[]) => {
|
||
const joined = Regex.join(...regexes);
|
||
const builder = () => Regex.build(`(?<=[${joined}])(?![${joined}])`);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"PreceededBy"
|
||
>;
|
||
},
|
||
/**
|
||
* Do not break before the given class of characters.
|
||
*/
|
||
NotBefore: (...regexes: RegExp[]) => {
|
||
const joined = Regex.join(...regexes);
|
||
const builder = () => Regex.build(`(?![${joined}])`);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"NotFollowedBy"
|
||
>;
|
||
},
|
||
/**
|
||
* Do not break after the given class of characters.
|
||
*/
|
||
NotAfter: (...regexes: RegExp[]) => {
|
||
const joined = Regex.join(...regexes);
|
||
const builder = () => Regex.build(`(?<![${joined}])`);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"NotPrecededBy"
|
||
>;
|
||
},
|
||
Chain: (rootBuilder: () => RegExp) => ({
|
||
/**
|
||
* Build the root regex.
|
||
*/
|
||
Build: rootBuilder,
|
||
/**
|
||
* Specify additional class of characters that should precede the root regex.
|
||
*/
|
||
PreceededBy: (...regexes: RegExp[]) => {
|
||
const root = rootBuilder();
|
||
const preceeded = Break.After(...regexes).Build();
|
||
const builder = () => Regex.and(preceeded, root);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"PreceededBy"
|
||
>;
|
||
},
|
||
/**
|
||
* Specify additional class of characters that should follow the root regex.
|
||
*/
|
||
FollowedBy: (...regexes: RegExp[]) => {
|
||
const root = rootBuilder();
|
||
const followed = Break.Before(...regexes).Build();
|
||
const builder = () => Regex.and(root, followed);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"FollowedBy"
|
||
>;
|
||
},
|
||
/**
|
||
* Specify additional class of characters that should not precede the root regex.
|
||
*/
|
||
NotPrecededBy: (...regexes: RegExp[]) => {
|
||
const root = rootBuilder();
|
||
const notPreceeded = Break.NotAfter(...regexes).Build();
|
||
const builder = () => Regex.and(notPreceeded, root);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"NotPrecededBy"
|
||
>;
|
||
},
|
||
/**
|
||
* Specify additional class of characters that should not follow the root regex.
|
||
*/
|
||
NotFollowedBy: (...regexes: RegExp[]) => {
|
||
const root = rootBuilder();
|
||
const notFollowed = Break.NotBefore(...regexes).Build();
|
||
const builder = () => Regex.and(root, notFollowed);
|
||
return Break.Chain(builder) as Omit<
|
||
ReturnType<typeof Break.Chain>,
|
||
"NotFollowedBy"
|
||
>;
|
||
},
|
||
}),
|
||
};
|
||
|
||
/**
|
||
* Breaks the line into the tokens based on the found line break opporutnities.
|
||
*
|
||
* Note: tokenization normalizes to NFC first so decomposed graphemes are treated as
|
||
* their composed variants for wrapping. Any code that needs exact source offsets should
|
||
* keep in mind that this assumes the input text is already NFC-normalized.
|
||
*/
|
||
export const parseTokens = (line: string) => {
|
||
const breakLineRegex = getLineBreakRegex();
|
||
|
||
// normalizing to single-codepoint composed chars due to canonical equivalence
|
||
// of multi-codepoint versions for chars like č, で (~ so that we don't break a line in between c and ˇ)
|
||
// filtering due to multi-codepoint chars like 👨👩👧👦, 👩🏽🦰
|
||
return line.normalize("NFC").split(breakLineRegex).filter(Boolean);
|
||
};
|
||
|
||
/**
|
||
* Wraps the original text into the lines based on the given width.
|
||
*
|
||
* This is a convenience adapter over `getWrappedTextLines()` for call sites
|
||
* that only need the rendered wrapped string and not the source offsets.
|
||
*/
|
||
export const wrapText = (
|
||
text: string,
|
||
font: FontString,
|
||
maxWidth: number,
|
||
): string => {
|
||
return getWrappedTextLines(text, font, maxWidth)
|
||
.map((line) => line.text)
|
||
.join("\n");
|
||
};
|
||
|
||
/**
|
||
* A single rendered visual line produced from the original text.
|
||
*
|
||
* `start` and `end` are end-exclusive code-unit offsets into the original text, and do
|
||
* not include synthetic soft line breaks inserted by this module. If trailing whitespace
|
||
* was trimmed away at a wrap boundary, `end` points to the last rendered character.
|
||
*/
|
||
export type WrappedTextLine = {
|
||
text: string;
|
||
start: number;
|
||
end: number;
|
||
};
|
||
|
||
/**
|
||
* Splits only on existing hard line breaks and preserves original offsets.
|
||
*/
|
||
const getHardLineBreaks = (text: string): WrappedTextLine[] => {
|
||
let offset = 0;
|
||
|
||
return text.split("\n").map((line) => {
|
||
const start = offset;
|
||
const end = start + line.length;
|
||
|
||
offset = end + 1;
|
||
|
||
return {
|
||
text: line,
|
||
start,
|
||
end,
|
||
};
|
||
});
|
||
};
|
||
|
||
/**
|
||
* Returns the rendered visual lines together with their source offsets.
|
||
*
|
||
* This is the source-of-truth wrapping pipeline for callers that need more than the
|
||
* final wrapped string, for example caret placement or future editor/rich-text mapping.
|
||
*/
|
||
export const getWrappedTextLines = (
|
||
text: string,
|
||
font: FontString,
|
||
maxWidth: number,
|
||
): WrappedTextLine[] => {
|
||
// if maxWidth is not finite or NaN which can happen in case of bugs in
|
||
// computation, we need to make sure we don't continue as we'll end up
|
||
// in an infinite loop
|
||
if (!Number.isFinite(maxWidth) || maxWidth < 0) {
|
||
return getHardLineBreaks(text);
|
||
}
|
||
|
||
const lines: WrappedTextLine[] = [];
|
||
let offset = 0;
|
||
|
||
for (const originalLine of text.split("\n")) {
|
||
const originalLineWidth = getLineWidth(originalLine, font);
|
||
|
||
if (originalLineWidth <= maxWidth) {
|
||
lines.push({
|
||
text: originalLine,
|
||
start: offset,
|
||
end: offset + originalLine.length,
|
||
});
|
||
} else {
|
||
lines.push(...wrapLine(originalLine, font, maxWidth, offset));
|
||
}
|
||
|
||
offset += originalLine.length + 1;
|
||
}
|
||
|
||
return lines;
|
||
};
|
||
|
||
/**
|
||
* Wraps a single hard line into one or more visual lines.
|
||
*
|
||
* The line-local offsets are tracked in original-text code units so
|
||
* we can map the visual line back to the source.
|
||
*/
|
||
const wrapLine = (
|
||
line: string,
|
||
font: FontString,
|
||
maxWidth: number,
|
||
lineStart: number,
|
||
): WrappedTextLine[] => {
|
||
const lines: WrappedTextLine[] = [];
|
||
const tokens = parseTokens(line);
|
||
|
||
let currentLine = "";
|
||
let currentLineStart = lineStart;
|
||
let currentLineEnd = lineStart;
|
||
let currentLineWidth = 0;
|
||
// Tracks the next token's code-unit position in the original source string.
|
||
let tokenOffset = lineStart;
|
||
let tokenIndex = 0;
|
||
|
||
while (tokenIndex < tokens.length) {
|
||
const token = tokens[tokenIndex];
|
||
const tokenStart = tokenOffset;
|
||
const tokenEnd = tokenStart + token.length;
|
||
const testLine = currentLine + token;
|
||
|
||
// cache single codepoint whitespace, CJK or emoji width calc. as kerning should not apply here
|
||
const testLineWidth = isSingleCharacter(token)
|
||
? currentLineWidth + charWidth.calculate(token, font)
|
||
: getLineWidth(testLine, font);
|
||
|
||
// build up the current line, skipping length check for possibly trailing whitespaces
|
||
if (/\s/.test(token) || testLineWidth <= maxWidth) {
|
||
if (!currentLine) {
|
||
currentLineStart = tokenStart;
|
||
}
|
||
currentLine = testLine;
|
||
currentLineEnd = tokenEnd;
|
||
currentLineWidth = testLineWidth;
|
||
tokenOffset = tokenEnd;
|
||
tokenIndex++;
|
||
continue;
|
||
}
|
||
|
||
// current line is empty => just the token (word) is longer than `maxWidth` and needs to be wrapped
|
||
if (!currentLine) {
|
||
const wrappedWord = wrapWord(token, font, maxWidth, tokenStart);
|
||
const trailingLine = wrappedWord[wrappedWord.length - 1] ?? {
|
||
text: "",
|
||
start: tokenStart,
|
||
end: tokenStart,
|
||
};
|
||
const precedingLines = wrappedWord.slice(0, -1);
|
||
|
||
lines.push(...precedingLines);
|
||
|
||
// trailing line of the wrapped word might still be joined with next token/s
|
||
currentLine = trailingLine.text;
|
||
currentLineStart = trailingLine.start;
|
||
currentLineEnd = trailingLine.end;
|
||
currentLineWidth = getLineWidth(trailingLine.text, font);
|
||
tokenOffset = tokenEnd;
|
||
tokenIndex++;
|
||
} else {
|
||
// push & reset, but don't iterate on the next token, as we didn't use it yet!
|
||
lines.push(
|
||
trimLineEndAtSoftBreak(currentLine, currentLineStart, currentLineEnd),
|
||
);
|
||
|
||
// purposefully not iterating and not setting `currentLine` to `token`, so that we could use a simple !currentLine check above
|
||
currentLine = "";
|
||
currentLineStart = tokenStart;
|
||
currentLineEnd = tokenStart;
|
||
currentLineWidth = 0;
|
||
}
|
||
}
|
||
|
||
// iterator done, push the trailing line if exists
|
||
if (currentLine) {
|
||
const trailingLine = trimLine(
|
||
currentLine,
|
||
currentLineStart,
|
||
currentLineEnd,
|
||
font,
|
||
maxWidth,
|
||
);
|
||
lines.push(trailingLine);
|
||
}
|
||
|
||
return lines;
|
||
};
|
||
|
||
/**
|
||
* Wraps a single word that could not be placed on an empty line as-is.
|
||
*/
|
||
const wrapWord = (
|
||
word: string,
|
||
font: FontString,
|
||
maxWidth: number,
|
||
wordStart: number,
|
||
): WrappedTextLine[] => {
|
||
// multi-codepoint emojis are already broken apart and shouldn't be broken further
|
||
if (getEmojiRegex().test(word)) {
|
||
return [
|
||
{
|
||
text: word,
|
||
start: wordStart,
|
||
end: wordStart + word.length,
|
||
},
|
||
];
|
||
}
|
||
|
||
satisfiesWordInvariant(word);
|
||
|
||
const lines: WrappedTextLine[] = [];
|
||
const chars = Array.from(word);
|
||
|
||
let currentLine = "";
|
||
let currentLineStart = wordStart;
|
||
let currentLineEnd = wordStart;
|
||
let currentLineWidth = 0;
|
||
let offset = wordStart;
|
||
|
||
for (const char of chars) {
|
||
const charStart = offset;
|
||
const charEnd = charStart + char.length;
|
||
const _charWidth = charWidth.calculate(char, font);
|
||
const testLineWidth = currentLineWidth + _charWidth;
|
||
|
||
if (testLineWidth <= maxWidth) {
|
||
if (!currentLine) {
|
||
currentLineStart = charStart;
|
||
}
|
||
currentLine = currentLine + char;
|
||
currentLineEnd = charEnd;
|
||
currentLineWidth = testLineWidth;
|
||
offset = charEnd;
|
||
continue;
|
||
}
|
||
|
||
if (currentLine) {
|
||
lines.push({
|
||
text: currentLine,
|
||
start: currentLineStart,
|
||
end: currentLineEnd,
|
||
});
|
||
}
|
||
|
||
currentLine = char;
|
||
currentLineStart = charStart;
|
||
currentLineEnd = charEnd;
|
||
currentLineWidth = _charWidth;
|
||
offset = charEnd;
|
||
}
|
||
|
||
if (currentLine) {
|
||
lines.push({
|
||
text: currentLine,
|
||
start: currentLineStart,
|
||
end: currentLineEnd,
|
||
});
|
||
}
|
||
|
||
return lines;
|
||
};
|
||
|
||
/**
|
||
* Trims trailing whitespace that is exceeding the `maxWidth`.
|
||
*
|
||
* Used for the trailing visual line of a hard line, where some trailing
|
||
* whitespace may still be visible if it fits into the available width.
|
||
*/
|
||
const trimLine = (
|
||
line: string,
|
||
start: number,
|
||
end: number,
|
||
font: FontString,
|
||
maxWidth: number,
|
||
): WrappedTextLine => {
|
||
const shouldTrimWhitespaces = getLineWidth(line, font) > maxWidth;
|
||
|
||
if (!shouldTrimWhitespaces) {
|
||
return {
|
||
text: line,
|
||
start,
|
||
end,
|
||
};
|
||
}
|
||
|
||
// defensively default to `trimeEnd` in case the regex does not match
|
||
let [, trimmedLine, whitespaces] = line.match(/^(.+?)(\s+)$/) ?? [
|
||
line,
|
||
line.trimEnd(),
|
||
"",
|
||
];
|
||
|
||
let trimmedLineWidth = getLineWidth(trimmedLine, font);
|
||
|
||
for (const whitespace of Array.from(whitespaces)) {
|
||
const _charWidth = charWidth.calculate(whitespace, font);
|
||
const testLineWidth = trimmedLineWidth + _charWidth;
|
||
|
||
if (testLineWidth > maxWidth) {
|
||
break;
|
||
}
|
||
|
||
trimmedLine = trimmedLine + whitespace;
|
||
trimmedLineWidth = testLineWidth;
|
||
}
|
||
|
||
return {
|
||
text: trimmedLine,
|
||
start,
|
||
end: end - (line.length - trimmedLine.length),
|
||
};
|
||
};
|
||
|
||
/**
|
||
* Used for internal soft-wrap boundaries, where trailing whitespace should not
|
||
* survive into the rendered line even though it still exists in the original
|
||
* text.
|
||
*/
|
||
const trimLineEndAtSoftBreak = (
|
||
line: string,
|
||
start: number,
|
||
end: number,
|
||
): WrappedTextLine => {
|
||
const trimmedLine = line.trimEnd();
|
||
|
||
return {
|
||
text: trimmedLine,
|
||
start,
|
||
end: end - (line.length - trimmedLine.length),
|
||
};
|
||
};
|
||
|
||
/**
|
||
* Check if the given string is a single character.
|
||
*
|
||
* Handles multi-byte chars (é, 中) and purposefully does not handle multi-codepoint char (👨👩👧👦, 👩🏽🦰).
|
||
*/
|
||
const isSingleCharacter = (maybeSingleCharacter: string) => {
|
||
return (
|
||
maybeSingleCharacter.codePointAt(0) !== undefined &&
|
||
maybeSingleCharacter.codePointAt(1) === undefined
|
||
);
|
||
};
|
||
|
||
/**
|
||
* Invariant for the word wrapping algorithm.
|
||
*/
|
||
const satisfiesWordInvariant = (word: string) => {
|
||
if (isTestEnv() || isDevEnv()) {
|
||
if (/\s/.test(word)) {
|
||
throw new Error("Word should not contain any whitespaces!");
|
||
}
|
||
}
|
||
};
|