A naïve attempt at parsing CSS in JavaScript – Part 7: parsing numeric tokens starting with a plus sign
The major new functionality for today is the interpretation of numbers in CSS. The two relevant parts of the spec are consume a numeric token and consume a number.
This interpretation is not completely spec conformant because the goal is just to accurately highlight CSS on a webpage, not actually understand the CSS.
Here is what I came up with for consuming a number:
// https://www.w3.org/TR/css-syntax-3/#consume-number
const consumeNumber = (cssStr, iStart) => {
let type = 'integer';
let repr = '';
let i = iStart;
let char = cssStr[i];
if (char === '+' || char === '-') {
repr += char;
i++;
}
let next = cssStr[i];
while (digitPattern.test(next)) {
repr += next;
next = cssStr[++i];
}
if (next === '.' && digitPattern.test(cssStr[i + 1])) {
repr += next + cssStr[i + 1];
i += 2;
type = 'number';
next = cssStr[i];
while (digitPattern.test(next)) {
repr += next;
next = cssStr[++i];
}
}
const nPlus1 = cssStr[i + 1];
let consumeExponentialIndicatorSize = 0;
if (next === 'e' || next === 'E') {
if (digitPattern.test(nPlus1)) {
consumeExponentialIndicatorSize = 2;
} else if (
(nPlus1 === '-' || nPlus1 === '+') &&
digitPattern.test(cssStr[i + 2])
) {
consumeExponentialIndicatorSize = 3;
}
if (consumeExponentialIndicatorSize) {
repr += cssStr.substring(i, i + consumeExponentialIndicatorSize + 1)
i += consumeExponentialIndicatorSize
type = 'number'
next = cssStr[i]
while (digitPattern.test(next)) {
repr += next
next = cssStr[++i]
}
}
}
// Here the spec, says the value should be converted to a number, but this
// parser just highlights, so the value of the number is irrelevant.
return [
i,
{
t: 'numeric-token',
v: repr,
}
]
};
Here is what I can up with for consuming a numeric token:
// https://www.w3.org/TR/css-syntax-3/#consume-numeric-token
const consumeNumericToken = (cssStr, iStart) => {
const numberResult = consumeNumber(cssStr, iStart)
let i = numberResult[0]
const tokens = [numberResult[1]]
if (startsIdentifier(cssStr[i], cssStr[i + 1], cssStr[i + 2])) {
const dimensionResult = consumeName(cssStr, i)
i = dimensionResult[0]
tokens.push({
t: 'dimension-token',
v: dimensionResult[1]
})
}
else if (cssStr[i] === '%') {
tokens.push({
t: 'percentage-token',
v: '%'
})
i += 1
}
return [i, tokens]
};
The full code now looks like this:
const letterPattern = /[a-zA-Z]/;
const isNonAscii = char => char.charCodeAt(0) > 128;
// https://www.w3.org/TR/css-syntax-3/#name-start-code-point
const isNameStartCodePoint = char => {
const result = char === '_' || isNonAscii(char) || letterPattern.test(char);
return result;
};
const digitPattern = /[0-9]/;
// https://www.w3.org/TR/css-syntax-3/#name-code-point
const isNameCodePoint = char => {
const result =
char === '-' || digitPattern.test(char) || isNameStartCodePoint(char);
return result;
};
const isValidEscape = (next, nPlus1) => {
if (next !== '\\') {
return false;
}
if (nPlus1 === '\n') {
return false;
}
return true;
};
// https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier
const startsIdentifier = (n, nPlus1, nPlus2) => {
if (n === '-') {
return (
isNameStartCodePoint(nPlus1) ||
nPlus1 === '-' ||
isValidEscape(nPlus1, nPlus2)
);
}
if (isNameStartCodePoint(n)) {
return true;
}
if (n === '\\' && isValidEscape(n, nPlus1)) {
return true;
}
return false;
};
// https://www.w3.org/TR/css-syntax-3/#starts-with-a-number
const startsNumber = (n, nPlus1, nPlus2) => {
if (n === '-' || n === '+') {
if (digitPattern.test(nPlus1)) {
return true;
}
if (nPlus1 === '.' && digitPattern.test(nPlus2)) {
return true;
}
return false;
}
if (n === '.') {
return digitPattern.test(nPlus1);
}
return digitPattern.test(n);
};
const consumeComment = (cssStr, iStart) => {
let cache = '';
let i = iStart;
let reachedEnd = false;
while (i < cssStr.length && !reachedEnd) {
cache += cssStr[i];
reachedEnd = cssStr[i - 1] === '*' && cssStr[i] === '/';
++i;
}
const commentToken = {
t: 'comment',
v: cache,
};
return [i, commentToken];
};
const consumeWhitespace = (cssStr, iStart) => {
let cache = '';
let i = iStart;
let reachedEnd = false;
while (i < cssStr.length && !reachedEnd) {
const next = cssStr[i + 1];
cache += cssStr[i];
reachedEnd = !(next === ' ' || next === '\n' || next === '\t');
++i;
}
const whitespaceToken = {
t: 'whitespace',
v: cache,
};
return [i, whitespaceToken];
};
const consumeString = (cssStr, iStart) => {
let cache = cssStr[iStart];
let i = iStart + 1;
let reachedEnd = false;
const endStringChar = cssStr[iStart];
while (i < cssStr.length && !reachedEnd) {
const current = cssStr[i];
if (current === '\\') {
cache += current + cssStr[i + 1];
i += 2;
} else {
cache += current;
reachedEnd = current === endStringChar;
++i;
}
}
const commentToken = {
t: 'string',
v: cache,
};
return [i, commentToken];
};
const consumeName = (cssStr, iStart) => {
let nameString = '';
let i = iStart;
let endReached = false;
do {
next = cssStr[i];
if (isNameCodePoint(next)) {
nameString += next;
i += 1;
} else if (isValidEscape(next, cssStr[i + 1])) {
nameString += next;
nameString += cssStr[i + 1];
i += 2;
} else {
endReached = true;
i + 1;
}
} while (!endReached && i < cssStr.length);
return [i, nameString];
};
const consumeSingleChar = (cssStr, i, char) => {
let tokenType = '';
switch (char) {
case '(':
tokenType = '(-token';
break;
case ')':
tokenType = ')-token';
break;
default:
tokenType = 'plain';
break;
}
const charToken = {
t: tokenType,
v: char,
};
return [i + 1, charToken];
};
// https://www.w3.org/TR/css-syntax-3/#consume-number
const consumeNumber = (cssStr, iStart) => {
let type = 'integer';
let repr = '';
let i = iStart;
let char = cssStr[i];
if (char === '+' || char === '-') {
repr += char;
i++;
}
let next = cssStr[i];
while (digitPattern.test(next)) {
repr += next;
next = cssStr[++i];
}
if (next === '.' && digitPattern.test(cssStr[i + 1])) {
repr += next + cssStr[i + 1];
i += 2;
type = 'number';
next = cssStr[i];
while (digitPattern.test(next)) {
repr += next;
next = cssStr[++i];
}
}
const nPlus1 = cssStr[i + 1];
let consumeExponentialIndicatorSize = 0;
if (next === 'e' || next === 'E') {
if (digitPattern.test(nPlus1)) {
consumeExponentialIndicatorSize = 2;
} else if (
(nPlus1 === '-' || nPlus1 === '+') &&
digitPattern.test(cssStr[i + 2])
) {
consumeExponentialIndicatorSize = 3;
}
if (consumeExponentialIndicatorSize) {
repr += cssStr.substring(i, i + consumeExponentialIndicatorSize + 1)
i += consumeExponentialIndicatorSize
type = 'number'
next = cssStr[i]
while (digitPattern.test(next)) {
repr += next
next = cssStr[++i]
}
}
}
// Here the spec, says the value should be converted to a number, but this
// parser just highlights, so the value of the number is irrelevant.
return [
i,
{
t: 'numeric-token',
v: repr,
}
]
};
// https://www.w3.org/TR/css-syntax-3/#consume-numeric-token
const consumeNumericToken = (cssStr, iStart) => {
const numberResult = consumeNumber(cssStr, iStart)
let i = numberResult[0]
const tokens = [numberResult[1]]
if (startsIdentifier(cssStr[i], cssStr[i + 1], cssStr[i + 2])) {
const dimensionResult = consumeName(cssStr, i)
i = dimensionResult[0]
tokens.push({
t: 'dimension-token',
v: dimensionResult[1]
})
}
else if (cssStr[i] === '%') {
tokens.push({
t: 'percentage-token',
v: '%'
})
i += 1
}
return [i, tokens]
};
const consumeHashToken = (cssStr, iStart) => {
let n = cssStr[iStart];
let nPlus1 = cssStr[iStart + 1];
let nPlus2 = cssStr[iStart + 2];
let isId = startsIdentifier(n, nPlus1, nPlus2);
let nameResult = consumeName(cssStr, iStart + 1);
const hashToken = {
t: `hash${isId ? 'Id' : ''}`,
v: `${n}${nameResult[1]}`,
};
return [nameResult[0], hashToken];
};
const parse = cssStr => {
const tokens = [];
let cache = '';
const consumeToken = (consumeFunction, iStart, ...args) => {
if (cache) {
tokens.push({ t: 'plain', v: cache });
cache = '';
}
const result = consumeFunction(cssStr, iStart, ...args);
const iResult = result[0];
const tokenResult = result[1];
if (Array.isArray(tokenResult)) {
tokenResult.forEach(token => tokens.push(token))
} else {
tokens.push(tokenResult);
}
return iResult - 1;
};
for (let i = 0; i < cssStr.length; i++) {
const current = cssStr[i];
const next = cssStr[i + 1];
if (current === '/' && next === '*') {
i = consumeToken(consumeComment, i);
} else if (current === ' ' || current === '\n' || current === '\t') {
i = consumeToken(consumeWhitespace, i);
} else if (current === '"' || current === "'") {
i = consumeToken(consumeString, i);
} else if (
current === '#' &&
(isNameCodePoint(next) || isValidEscape(next, cssStr[i + 2]))
) {
i = consumeToken(consumeHashToken, i);
} else if (current === '(' || current === ')') {
i = consumeToken(consumeSingleChar, i, current);
} else if (current === '+' && startsNumber(current, next, cssStr[i + 2])) {
i = consumeToken(consumeNumericToken, i);
} else {
cache += current;
}
}
if (cache) {
tokens.push({ t: 'plain', v: cache });
}
return tokens;
};
console.log(
JSON.stringify(
parse(`/* c */
#w{width:calc(+100%);}`)
)
);
// Result
// [{"t":"comment","v":"/* c */"},{"t":"whitespace","v":"\n"},{"t":"hash","v":"#w"},{"t":"plain","v":"{width:calc"},{"t":"(-token","v":"("},{"t":"numeric-token","v":"+100"},{"t":"percentage-token","v":"%"},{"t":")-token","v":")"},{"t":"plain","v":";}"}]