A naïve attempt at parsing CSS in JavaScript – Part 8: using a css stream object

While working through tokenization from the specification, I’ve seen some now common patterns to do with reading the stream of tokens. I believe I can refactor this out into a fake stream object which uses the vocabulary of the specification.

This CssFakeStream object needs to be able to:

In code, that could look like this:

class CssFakeStream {
	css = '';

	constructor(css = '') {
		this.css = css;
	}

	get next() {
		return this.css.charAt(0);
	}

	get nextPlus1() {
		return this.css.charAt(1);
	}

	get nextPlus2() {
		return this.css.charAt(2);
	}

	get nextPlus3() {
		return this.css.charAt(3);
	}

	hasNext() {
		return !isNaN(this.css.charCodeAt(0));
	}

	consume() {
		const char = this.css.charAt(0);
		this.css = this.css.substring(1);
		return char;
	}
}

I like this better because it reduces the number of variables floating around in the main parse function (don’t need to set a next character variable anymore, for example). It also means copies of the the CSS string are no longer being thrown around into all the functions. Instead, just the reference to this object is passed around. Further, every time a character is consumed now, the CSS string gets smaller. Once the parsing is complete, the string is completely gone.

I called it a fake stream, because it’s not doing any asynchronous streaming operations and lacks many streaming concepts. It is only a stream in the sense that its functionality resembles the functionality required by the CSS specifcation when it talks about interacting with a stream.

Only whitespace and comments parsing operations have been migrated to use this new system for now. Here is how it looks:

const letterPattern = /[a-zA-Z]/;
const digitPattern = /[0-9]/;
const isNonAscii = char => char.charCodeAt(0) > 128;
const isWhitespace = char => char === ' ' || char === '\n' || char === '\t';

// https://www.w3.org/TR/css-syntax-3/#name-start-code-point
const isNameStartCodePoint = char => {
	const result = char === '_' || isNonAscii(char) || letterPattern.test(char);
	return result;
};

// https://www.w3.org/TR/css-syntax-3/#name-code-point
const isNameCodePoint = char => {
	const result =
		char === '-' || digitPattern.test(char) || isNameStartCodePoint(char);
	return result;
};

// https://www.w3.org/TR/css-syntax-3/#starts-with-a-valid-escape
const isValidEscape = (next, nPlus1) => {
	if (next !== '\\') {
		return false;
	}
	if (nPlus1 === '\n') {
		return false;
	}
	return true;
};

// https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier
const startsIdentifier = (n, nPlus1, nPlus2) => {
	if (n === '-') {
		return (
			isNameStartCodePoint(nPlus1) ||
			nPlus1 === '-' ||
			isValidEscape(nPlus1, nPlus2)
		);
	}
	if (isNameStartCodePoint(n)) {
		return true;
	}
	if (n === '\\' && isValidEscape(n, nPlus1)) {
		return true;
	}
	return false;
};

// https://www.w3.org/TR/css-syntax-3/#starts-with-a-number
const startsNumber = (n, nPlus1, nPlus2) => {
	if (n === '-' || n === '+') {
		if (digitPattern.test(nPlus1)) {
			return true;
		}
		if (nPlus1 === '.' && digitPattern.test(nPlus2)) {
			return true;
		}
		return false;
	}
	if (n === '.') {
		return digitPattern.test(nPlus1);
	}
	return digitPattern.test(n);
};

class CssFakeStream {
	css = '';

	constructor(css = '') {
		this.css = css;
	}

	get next() {
		return this.css.charAt(0);
	}

	get nextPlus1() {
		return this.css.charAt(1);
	}

	get nextPlus2() {
		return this.css.charAt(2);
	}

	get nextPlus3() {
		return this.css.charAt(3);
	}

	hasNext() {
		return !isNaN(this.css.charCodeAt(0));
	}

	consume() {
		const char = this.css.charAt(0);
		this.css = this.css.substring(1);
		return char;
	}
}

/**
 * https://www.w3.org/TR/css-syntax-3/#consume-comment
 * This returns a token so the comment can be highlighted and rendered. In the
 * spec, consuming comments would not return anything.
 * @param {CssFakeStream} stream
 */
const consumeComment = stream => {
	let reachedCommentEnd = false;
	let comment = '';
	let last = '';
	do {
		const char = stream.consume();
		comment += char;
		reachedCommentEnd = last === '*' && char === '/';
		last = char;
	} while (stream.hasNext() && !reachedCommentEnd);
	return {
		t: 'comment-token',
		v: comment,
	};
};

/**
 * https://www.w3.org/TR/css-syntax-3/#consume-token
 * @param {CssFakeStream} stream
 */
const consumeWhitespace = stream => {
	let whitespace = '';
	do {
		whitespace += stream.consume();
	} while (stream.hasNext() && isWhitespace(stream.next));
	return (whitespaceToken = {
		t: 'whitespace-token',
		v: whitespace,
	});
};

// https://www.w3.org/TR/css-syntax-3/#tokenizer-algorithms
const parse = cssStr => {
	const tokens = [];
	const stream = new CssFakeStream(cssStr);
	let cache = '';

	const consumePlainToken = () => {
		if (cache) {
			tokens.push({
				t: 'plain-token',
				v: cache,
			});
			cache = '';
		}
	};

	while (stream.next !== '') {
		if (stream.next === '/' || stream.next === '*') {
			consumePlainToken();
			tokens.push(consumeComment(stream));
		} else if (isWhitespace(stream.next)) {
			consumePlainToken();
			tokens.push(consumeWhitespace(stream));
		} else {
			cache += stream.consume();
		}
	}
	consumePlainToken();
	return tokens;
};

console.log(
	JSON.stringify(
		parse(`/* c */
#w{width:calc(+100%);}`)
	)
);

Leave a Reply

Your email address will not be published.

Color scheme: