A naïve attempt at parsing CSS in JavaScript – Part 6: parsing opening and closing paranthese
A small addition today, the tokens for (
and )
characters. The bulk of the work is done by this new function:
const consumeSingleChar = (cssStr, i, char) => {
let tokenType = '';
switch (char) {
case '(': tokenType = '(-token'; break;
case ')': tokenType = ')-token'; break;
default: tokenType = 'plain'; break;
}
const charToken = {
t: tokenType,
v: char
}
return [i + 1, charToken];
}
Full code:
const letterPattern = /[a-zA-Z]/
const isNonAscii = char => char.charCodeAt(0) > 128;
// https://www.w3.org/TR/css-syntax-3/#name-start-code-point
const isNameStartCodePoint = char => {
const result = char === '_' || isNonAscii(char) || letterPattern.test(char)
return result
}
const digitPattern = /[0-9]/
// https://www.w3.org/TR/css-syntax-3/#name-code-point
const isNameCodePoint = char => {
const result = char === '-' || digitPattern.test(char) || isNameStartCodePoint(char)
return result
}
const isValidEscape = (next, nPlus1) => {
if (next !== '\\') {
return false;
}
if (nPlus1 === '\n') {
return false;
}
return true;
};
// https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier
const startsIdentifier = (n, nPlus1, nPlus2) => {
if (n === '-') {
return isNameStartCodePoint(nPlus1) || (nPlus1 === '-') || isValidEscape(nPlus1, nPlus2)
}
if (isNameStartCodePoint(n)) {
return true
}
if (n === '\\' && isValidEscape(n, nPlus1)) {
return true
}
return false
}
const consumeComment = (cssStr, iStart) => {
let cache = '';
let i = iStart;
let reachedEnd = false;
while (i < cssStr.length && !reachedEnd) {
cache += cssStr[i];
reachedEnd = cssStr[i - 1] === '*' && cssStr[i] === '/';
++i;
}
const commentToken = {
t: 'comment',
v: cache,
};
return [i, commentToken];
};
const consumeWhitespace = (cssStr, iStart) => {
let cache = '';
let i = iStart;
let reachedEnd = false;
while (i < cssStr.length && !reachedEnd) {
const next = cssStr[i + 1];
cache += cssStr[i];
reachedEnd = !(next === ' ' || next === '\n' || next === '\t');
++i;
}
const whitespaceToken = {
t: 'whitespace',
v: cache,
};
return [i, whitespaceToken];
};
const consumeString = (cssStr, iStart) => {
let cache = cssStr[iStart];
let i = iStart + 1;
let reachedEnd = false;
const endStringChar = cssStr[iStart];
while (i < cssStr.length && !reachedEnd) {
const current = cssStr[i];
if (current === '\\') {
cache += current + cssStr[i + 1];
i += 2;
} else {
cache += current;
reachedEnd = current === endStringChar;
++i;
}
}
const commentToken = {
t: 'string',
v: cache,
};
return [i, commentToken];
};
const consumeName = (cssStr, iStart) => {
let nameString = ''
let i = iStart
let endReached = false
do {
next = cssStr[i]
if (isNameCodePoint(next)) {
nameString += next
i += 1
}
else if (isValidEscape(next, cssStr[i + 1])) {
nameString += next
nameString += cssStr[i + 1]
i += 2
}
else {
endReached = true
i + 1
}
} while (!endReached && i < cssStr.length)
return [i, nameString]
}
const consumeSingleChar = (cssStr, i, char) => {
let tokenType = '';
switch (char) {
case '(': tokenType = '(-token'; break;
case ')': tokenType = ')-token'; break;
default: tokenType = 'plain'; break;
}
const charToken = {
t: tokenType,
v: char
}
return [i + 1, charToken];
}
const consumeHashToken = (cssStr, iStart) => {
let n = cssStr[iStart];
let nPlus1 = cssStr[iStart + 1];
let nPlus2 = cssStr[iStart + 2];
let isId = startsIdentifier(n, nPlus1, nPlus2);
let nameResult = consumeName(cssStr, iStart + 1)
const hashToken = {
t: `hash${isId ? 'Id' : ''}`,
v: `${n}${nameResult[1]}`
}
return [nameResult[0], hashToken]
};
const parse = cssStr => {
const tokens = [];
let cache = '';
const consumeToken = (consumeFunction, iStart, ...args) => {
if (cache) {
tokens.push({ t: 'plain', v: cache });
cache = '';
}
const result = consumeFunction(cssStr, iStart, ...args);
tokens.push(result[1]);
return result[0] - 1;
};
for (let i = 0; i < cssStr.length; i++) {
const current = cssStr[i];
const next = cssStr[i + 1];
if (current === '/' && next === '*') {
i = consumeToken(consumeComment, i);
} else if (current === ' ' || current === '\n' || current === '\t') {
i = consumeToken(consumeWhitespace, i);
} else if (current === '"' || current === "'") {
i = consumeToken(consumeString, i);
} else if (
current === '#' &&
(isNameCodePoint(next) || isValidEscape(next, cssStr[i + 2]))
) {
i = consumeToken(consumeHashToken, i);
} else if (current === '(' || current === ')') {
i = consumeToken(consumeSingleChar, i, current)
} else {
cache += current;
}
}
if (cache) {
tokens.push({ t: 'plain', v: cache });
}
return tokens;
};
console.log(
JSON.stringify(
parse(`/* c */
#w{width:calc(100%);}`)
)
);