A naïve attempt at parsing CSS in JavaScript – Part 4: parsing strings
So far, comments and whitespace are being parsed. As of this post, strings can be parsed too. The spec defines the process for consuming string tokens.
For this iteraction, the consumption of comments, whitespace and strings have been split into their own functions. This makes it easier to oversee the rules governing how each type of token is parsed. The main function still throws all characters for which the parsing rules are not yet known into the non-standard ‘plain’ token type. This is not a spec conformant implementation of string consumption because it does not yet handle escaped hex digits. Nor does it property handle errors when strings are incorrectly terminated.
const consumeComment = (cssStr, iStart) => {
let cache = '';
let i = iStart;
let reachedEnd = false;
while (i < cssStr.length && !reachedEnd) {
cache += cssStr[i];
reachedEnd = cssStr[i - 1] === '*' && cssStr[i] === '/';
++i;
}
const commentToken = {
t: 'comment',
v: cache,
};
return [i, commentToken];
};
const consumeWhitespace = (cssStr, iStart) => {
let cache = '';
let i = iStart;
let reachedEnd = false;
while (i < cssStr.length && !reachedEnd) {
const next = cssStr[i + 1];
cache += cssStr[i];
reachedEnd = !(next === ' ' || next === '\n' || next === '\t');
++i;
}
const whitespaceToken = {
t: 'whitespace',
v: cache,
};
return [i, whitespaceToken];
};
const consumeString = (cssStr, iStart) => {
let cache = cssStr[iStart];
let i = iStart + 1;
let reachedEnd = false;
const endStringChar = cssStr[iStart];
while (i < cssStr.length && !reachedEnd) {
const current = cssStr[i];
if (current === '\\') {
cache += current + cssStr[i + 1];
i += 2;
} else {
cache += current;
reachedEnd = current === endStringChar;
++i;
}
}
const commentToken = {
t: 'string',
v: cache,
};
return [i, commentToken];
};
const parse = cssStr => {
const tokens = [];
let cache = '';
const consumeToken = (consumeFunction, iStart) => {
if (cache) {
tokens.push({ t: 'plain', v: cache });
cache = '';
}
const result = consumeFunction(cssStr, iStart);
tokens.push(result[1]);
return result[0] - 1;
};
for (let i = 0; i < cssStr.length; i++) {
const current = cssStr[i];
const next = cssStr[i + 1];
if (current === '/' && next === '*') {
i = consumeToken(consumeComment, i);
} else if (current === ' ' || current === '\n' || current === '\t') {
i = consumeToken(consumeWhitespace, i);
} else if (current === '"' || current === "'") {
i = consumeToken(consumeString, i);
} else {
cache += current;
}
}
if (cache) {
tokens.push({ t: 'plain', v: cache });
}
return tokens;
};
console.log(
JSON.stringify(
parse(`/* c */
p{content:'\\' ';color:blue;}`)
)
);
// Result:
// [
// { t: 'comment', v: '/* c */' },
// { t: 'whitespace', v: '\n' },
// { t: 'plain', v: 'p{content:' },
// { t: 'string', v: "'\\' '" },
// { t: 'plain', v: ';color:blue;}' },
// ];