A naïve attempt at parsing CSS in JavaScript – Part 7: parsing numeric tokens starting with a plus sign

The major new functionality for today is the interpretation of numbers in CSS. The two relevant parts of the spec are consume a numeric token and consume a number.

This interpretation is not completely spec conformant because the goal is just to accurately highlight CSS on a webpage, not actually understand the CSS.

Here is what I came up with for consuming a number:

// https://www.w3.org/TR/css-syntax-3/#consume-number
const consumeNumber = (cssStr, iStart) => {
	let type = 'integer';
	let repr = '';
	let i = iStart;
	let char = cssStr[i];
	if (char === '+' || char === '-') {
		repr += char;
		i++;
	}
	let next = cssStr[i];
	while (digitPattern.test(next)) {
		repr += next;
		next = cssStr[++i];
	}
	if (next === '.' && digitPattern.test(cssStr[i + 1])) {
		repr += next + cssStr[i + 1];
		i += 2;
		type = 'number';
		next = cssStr[i];
		while (digitPattern.test(next)) {
			repr += next;
			next = cssStr[++i];
		}
	}
	const nPlus1 = cssStr[i + 1];
	let consumeExponentialIndicatorSize = 0;
	if (next === 'e' || next === 'E') {
		if (digitPattern.test(nPlus1)) {
			consumeExponentialIndicatorSize = 2;
		} else if (
			(nPlus1 === '-' || nPlus1 === '+') &&
			digitPattern.test(cssStr[i + 2])
		) {
			consumeExponentialIndicatorSize = 3;
		}
		if (consumeExponentialIndicatorSize) {
			repr += cssStr.substring(i, i + consumeExponentialIndicatorSize + 1)
			i += consumeExponentialIndicatorSize
			type = 'number'
			next = cssStr[i]
			while (digitPattern.test(next)) {
				repr += next
				next = cssStr[++i]
			}
		}
	}
	// Here the spec, says the value should be converted to a number, but this
	// parser just highlights, so the value of the number is irrelevant.
	return [
		i,
		{
			t: 'numeric-token',
			v: repr,
		}
	]
};

Here is what I can up with for consuming a numeric token:

// https://www.w3.org/TR/css-syntax-3/#consume-numeric-token
const consumeNumericToken = (cssStr, iStart) => {
	const numberResult = consumeNumber(cssStr, iStart)
	let i = numberResult[0]
	const tokens = [numberResult[1]]
	if (startsIdentifier(cssStr[i], cssStr[i + 1], cssStr[i + 2])) {
		const dimensionResult = consumeName(cssStr, i)
		i = dimensionResult[0]
		tokens.push({
			t: 'dimension-token',
			v: dimensionResult[1]
		})
	}
	else if (cssStr[i] === '%') {
		tokens.push({
			t: 'percentage-token',
			v: '%'
		})
		i += 1
	}
	return [i, tokens]
};

The full code now looks like this:

const letterPattern = /[a-zA-Z]/;
const isNonAscii = char => char.charCodeAt(0) > 128;

// https://www.w3.org/TR/css-syntax-3/#name-start-code-point
const isNameStartCodePoint = char => {
	const result = char === '_' || isNonAscii(char) || letterPattern.test(char);
	return result;
};

const digitPattern = /[0-9]/;

// https://www.w3.org/TR/css-syntax-3/#name-code-point
const isNameCodePoint = char => {
	const result =
		char === '-' || digitPattern.test(char) || isNameStartCodePoint(char);
	return result;
};

const isValidEscape = (next, nPlus1) => {
	if (next !== '\\') {
		return false;
	}
	if (nPlus1 === '\n') {
		return false;
	}
	return true;
};

// https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier
const startsIdentifier = (n, nPlus1, nPlus2) => {
	if (n === '-') {
		return (
			isNameStartCodePoint(nPlus1) ||
			nPlus1 === '-' ||
			isValidEscape(nPlus1, nPlus2)
		);
	}
	if (isNameStartCodePoint(n)) {
		return true;
	}
	if (n === '\\' && isValidEscape(n, nPlus1)) {
		return true;
	}
	return false;
};

// https://www.w3.org/TR/css-syntax-3/#starts-with-a-number
const startsNumber = (n, nPlus1, nPlus2) => {
	if (n === '-' || n === '+') {
		if (digitPattern.test(nPlus1)) {
			return true;
		}
		if (nPlus1 === '.' && digitPattern.test(nPlus2)) {
			return true;
		}
		return false;
	}
	if (n === '.') {
		return digitPattern.test(nPlus1);
	}
	return digitPattern.test(n);
};

const consumeComment = (cssStr, iStart) => {
	let cache = '';
	let i = iStart;
	let reachedEnd = false;
	while (i < cssStr.length && !reachedEnd) {
		cache += cssStr[i];
		reachedEnd = cssStr[i - 1] === '*' && cssStr[i] === '/';
		++i;
	}
	const commentToken = {
		t: 'comment',
		v: cache,
	};
	return [i, commentToken];
};

const consumeWhitespace = (cssStr, iStart) => {
	let cache = '';
	let i = iStart;
	let reachedEnd = false;
	while (i < cssStr.length && !reachedEnd) {
		const next = cssStr[i + 1];
		cache += cssStr[i];
		reachedEnd = !(next === ' ' || next === '\n' || next === '\t');
		++i;
	}
	const whitespaceToken = {
		t: 'whitespace',
		v: cache,
	};
	return [i, whitespaceToken];
};

const consumeString = (cssStr, iStart) => {
	let cache = cssStr[iStart];
	let i = iStart + 1;
	let reachedEnd = false;
	const endStringChar = cssStr[iStart];
	while (i < cssStr.length && !reachedEnd) {
		const current = cssStr[i];
		if (current === '\\') {
			cache += current + cssStr[i + 1];
			i += 2;
		} else {
			cache += current;
			reachedEnd = current === endStringChar;
			++i;
		}
	}
	const commentToken = {
		t: 'string',
		v: cache,
	};
	return [i, commentToken];
};

const consumeName = (cssStr, iStart) => {
	let nameString = '';
	let i = iStart;
	let endReached = false;
	do {
		next = cssStr[i];
		if (isNameCodePoint(next)) {
			nameString += next;
			i += 1;
		} else if (isValidEscape(next, cssStr[i + 1])) {
			nameString += next;
			nameString += cssStr[i + 1];
			i += 2;
		} else {
			endReached = true;
			i + 1;
		}
	} while (!endReached && i < cssStr.length);
	return [i, nameString];
};

const consumeSingleChar = (cssStr, i, char) => {
	let tokenType = '';
	switch (char) {
		case '(':
			tokenType = '(-token';
			break;
		case ')':
			tokenType = ')-token';
			break;
		default:
			tokenType = 'plain';
			break;
	}
	const charToken = {
		t: tokenType,
		v: char,
	};
	return [i + 1, charToken];
};

// https://www.w3.org/TR/css-syntax-3/#consume-number
const consumeNumber = (cssStr, iStart) => {
	let type = 'integer';
	let repr = '';
	let i = iStart;
	let char = cssStr[i];
	if (char === '+' || char === '-') {
		repr += char;
		i++;
	}
	let next = cssStr[i];
	while (digitPattern.test(next)) {
		repr += next;
		next = cssStr[++i];
	}
	if (next === '.' && digitPattern.test(cssStr[i + 1])) {
		repr += next + cssStr[i + 1];
		i += 2;
		type = 'number';
		next = cssStr[i];
		while (digitPattern.test(next)) {
			repr += next;
			next = cssStr[++i];
		}
	}
	const nPlus1 = cssStr[i + 1];
	let consumeExponentialIndicatorSize = 0;
	if (next === 'e' || next === 'E') {
		if (digitPattern.test(nPlus1)) {
			consumeExponentialIndicatorSize = 2;
		} else if (
			(nPlus1 === '-' || nPlus1 === '+') &&
			digitPattern.test(cssStr[i + 2])
		) {
			consumeExponentialIndicatorSize = 3;
		}
		if (consumeExponentialIndicatorSize) {
			repr += cssStr.substring(i, i + consumeExponentialIndicatorSize + 1)
			i += consumeExponentialIndicatorSize
			type = 'number'
			next = cssStr[i]
			while (digitPattern.test(next)) {
				repr += next
				next = cssStr[++i]
			}
		}
	}
	// Here the spec, says the value should be converted to a number, but this
	// parser just highlights, so the value of the number is irrelevant.
	return [
		i,
		{
			t: 'numeric-token',
			v: repr,
		}
	]
};

// https://www.w3.org/TR/css-syntax-3/#consume-numeric-token
const consumeNumericToken = (cssStr, iStart) => {
	const numberResult = consumeNumber(cssStr, iStart)
	let i = numberResult[0]
	const tokens = [numberResult[1]]
	if (startsIdentifier(cssStr[i], cssStr[i + 1], cssStr[i + 2])) {
		const dimensionResult = consumeName(cssStr, i)
		i = dimensionResult[0]
		tokens.push({
			t: 'dimension-token',
			v: dimensionResult[1]
		})
	}
	else if (cssStr[i] === '%') {
		tokens.push({
			t: 'percentage-token',
			v: '%'
		})
		i += 1
	}
	return [i, tokens]
};

const consumeHashToken = (cssStr, iStart) => {
	let n = cssStr[iStart];
	let nPlus1 = cssStr[iStart + 1];
	let nPlus2 = cssStr[iStart + 2];
	let isId = startsIdentifier(n, nPlus1, nPlus2);
	let nameResult = consumeName(cssStr, iStart + 1);
	const hashToken = {
		t: `hash${isId ? 'Id' : ''}`,
		v: `${n}${nameResult[1]}`,
	};
	return [nameResult[0], hashToken];
};

const parse = cssStr => {
	const tokens = [];
	let cache = '';

	const consumeToken = (consumeFunction, iStart, ...args) => {
		if (cache) {
			tokens.push({ t: 'plain', v: cache });
			cache = '';
		}
		const result = consumeFunction(cssStr, iStart, ...args);
		const iResult = result[0];
		const tokenResult = result[1];
		if (Array.isArray(tokenResult)) {
			tokenResult.forEach(token => tokens.push(token))
		} else {
			tokens.push(tokenResult);
		}
		return iResult - 1;
	};

	for (let i = 0; i < cssStr.length; i++) {
		const current = cssStr[i];
		const next = cssStr[i + 1];
		if (current === '/' && next === '*') {
			i = consumeToken(consumeComment, i);
		} else if (current === ' ' || current === '\n' || current === '\t') {
			i = consumeToken(consumeWhitespace, i);
		} else if (current === '"' || current === "'") {
			i = consumeToken(consumeString, i);
		} else if (
			current === '#' &&
			(isNameCodePoint(next) || isValidEscape(next, cssStr[i + 2]))
		) {
			i = consumeToken(consumeHashToken, i);
		} else if (current === '(' || current === ')') {
			i = consumeToken(consumeSingleChar, i, current);
		} else if (current === '+' && startsNumber(current, next, cssStr[i + 2])) {
			i = consumeToken(consumeNumericToken, i);
		} else {
			cache += current;
		}
	}
	if (cache) {
		tokens.push({ t: 'plain', v: cache });
	}
	return tokens;
};

console.log(
	JSON.stringify(
		parse(`/* c */
#w{width:calc(+100%);}`)
	)
);

// Result
// [{"t":"comment","v":"/* c */"},{"t":"whitespace","v":"\n"},{"t":"hash","v":"#w"},{"t":"plain","v":"{width:calc"},{"t":"(-token","v":"("},{"t":"numeric-token","v":"+100"},{"t":"percentage-token","v":"%"},{"t":")-token","v":")"},{"t":"plain","v":";}"}]
Color scheme: