blob: ebced5b301a679ca434028615084750746d2016f [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
/**
* @unrestricted
*/
export class HTMLFormatter {
/**
* @param {!FormatterWorker.FormattedContentBuilder} builder
*/
constructor(builder) {
this._builder = builder;
this._jsFormatter = new FormatterWorker.JavaScriptFormatter(builder);
this._cssFormatter = new FormatterWorker.CSSFormatter(builder);
}
/**
* @param {string} text
* @param {!Array<number>} lineEndings
*/
format(text, lineEndings) {
this._text = text;
this._lineEndings = lineEndings;
this._model = new FormatterWorker.HTMLModel(text);
this._walk(this._model.document());
}
/**
* @param {!Element} element
* @param {number} offset
*/
_formatTokensTill(element, offset) {
while (this._model.peekToken() && this._model.peekToken().startOffset < offset) {
const token = this._model.nextToken();
this._formatToken(element, token);
}
}
/**
* @param {!Element} element
*/
_walk(element) {
if (element.parent) {
this._formatTokensTill(element.parent, element.openTag.startOffset);
}
this._beforeOpenTag(element);
this._formatTokensTill(element, element.openTag.endOffset);
this._afterOpenTag(element);
for (let i = 0; i < element.children.length; ++i) {
this._walk(element.children[i]);
}
this._formatTokensTill(element, element.closeTag.startOffset);
this._beforeCloseTag(element);
this._formatTokensTill(element, element.closeTag.endOffset);
this._afterCloseTag(element);
}
/**
* @param {!Element} element
*/
_beforeOpenTag(element) {
if (!element.children.length || element === this._model.document()) {
return;
}
this._builder.addNewLine();
}
/**
* @param {!Element} element
*/
_afterOpenTag(element) {
if (!element.children.length || element === this._model.document()) {
return;
}
this._builder.increaseNestingLevel();
this._builder.addNewLine();
}
/**
* @param {!Element} element
*/
_beforeCloseTag(element) {
if (!element.children.length || element === this._model.document()) {
return;
}
this._builder.decreaseNestingLevel();
this._builder.addNewLine();
}
/**
* @param {!Element} element
*/
_afterCloseTag(element) {
this._builder.addNewLine();
}
/**
* @param {!Element} element
* @param {!Token} token
*/
_formatToken(element, token) {
if (token.value.isWhitespace()) {
return;
}
if (token.type.has('comment') || token.type.has('meta')) {
this._builder.addNewLine();
this._builder.addToken(token.value.trim(), token.startOffset);
this._builder.addNewLine();
return;
}
const isBodyToken =
element.openTag.endOffset <= token.startOffset && token.startOffset < element.closeTag.startOffset;
if (isBodyToken && element.name === 'style') {
this._builder.addNewLine();
this._builder.increaseNestingLevel();
this._cssFormatter.format(this._text, this._lineEndings, token.startOffset, token.endOffset);
this._builder.decreaseNestingLevel();
return;
}
if (isBodyToken && element.name === 'script') {
this._builder.addNewLine();
this._builder.increaseNestingLevel();
if (this._scriptTagIsJavaScript(element)) {
this._jsFormatter.format(this._text, this._lineEndings, token.startOffset, token.endOffset);
} else {
this._builder.addToken(token.value, token.startOffset);
this._builder.addNewLine();
}
this._builder.decreaseNestingLevel();
return;
}
if (!isBodyToken && token.type.has('attribute')) {
this._builder.addSoftSpace();
}
this._builder.addToken(token.value, token.startOffset);
}
/**
* @param {!Element} element
* @return {boolean}
*/
_scriptTagIsJavaScript(element) {
if (!element.openTag.attributes.has('type')) {
return true;
}
let type = element.openTag.attributes.get('type').toLowerCase();
if (!type) {
return true;
}
const isWrappedInQuotes = /^(["\'])(.*)\1$/.exec(type.trim());
if (isWrappedInQuotes) {
type = isWrappedInQuotes[2];
}
return FormatterWorker.HTMLFormatter.SupportedJavaScriptMimeTypes.has(type.trim());
}
}
HTMLFormatter.SupportedJavaScriptMimeTypes = new Set([
'application/ecmascript', 'application/javascript', 'application/x-ecmascript', 'application/x-javascript',
'text/ecmascript', 'text/javascript', 'text/javascript1.0', 'text/javascript1.1', 'text/javascript1.2',
'text/javascript1.3', 'text/javascript1.4', 'text/javascript1.5', 'text/jscript', 'text/livescript',
'text/x-ecmascript', 'text/x-javascript'
]);
/**
* @unrestricted
*/
export class HTMLModel {
/**
* @param {string} text
*/
constructor(text) {
this._state = ParseState.Initial;
this._document = new Element('document');
this._document.openTag = new Tag('document', 0, 0, new Map(), true, false);
this._document.closeTag = new Tag('document', text.length, text.length, new Map(), false, false);
this._stack = [this._document];
this._tokens = [];
this._tokenIndex = 0;
this._build(text);
}
/**
* @param {string} text
*/
_build(text) {
const tokenizer = FormatterWorker.createTokenizer('text/html');
let lastOffset = 0;
const lowerCaseText = text.toLowerCase();
while (true) {
tokenizer(text.substring(lastOffset), processToken.bind(this, lastOffset));
if (lastOffset >= text.length) {
break;
}
const element = this._stack.peekLast();
lastOffset = lowerCaseText.indexOf('</' + element.name, lastOffset);
if (lastOffset === -1) {
lastOffset = text.length;
}
const tokenStart = element.openTag.endOffset;
const tokenEnd = lastOffset;
const tokenValue = text.substring(tokenStart, tokenEnd);
this._tokens.push(new Token(tokenValue, new Set(), tokenStart, tokenEnd));
}
while (this._stack.length > 1) {
const element = this._stack.peekLast();
this._popElement(new Tag(element.name, text.length, text.length, new Map(), false, false));
}
/**
* @param {number} baseOffset
* @param {string} tokenValue
* @param {?string} type
* @param {number} tokenStart
* @param {number} tokenEnd
* @return {(!Object|undefined)}
* @this {FormatterWorker.HTMLModel}
*/
function processToken(baseOffset, tokenValue, type, tokenStart, tokenEnd) {
tokenStart += baseOffset;
tokenEnd += baseOffset;
lastOffset = tokenEnd;
const tokenType = type ? new Set(type.split(' ')) : new Set();
const token = new Token(tokenValue, tokenType, tokenStart, tokenEnd);
this._tokens.push(token);
this._updateDOM(token);
const element = this._stack.peekLast();
if (element && (element.name === 'script' || element.name === 'style') &&
element.openTag.endOffset === lastOffset) {
return FormatterWorker.AbortTokenization;
}
}
}
/**
* @param {!Token} token
*/
_updateDOM(token) {
const S = ParseState;
const value = token.value;
const type = token.type;
switch (this._state) {
case S.Initial:
if (type.has('bracket') && (value === '<' || value === '</')) {
this._onStartTag(token);
this._state = S.Tag;
}
return;
case S.Tag:
if (type.has('tag') && !type.has('bracket')) {
this._tagName = value.trim().toLowerCase();
} else if (type.has('attribute')) {
this._attributeName = value.trim().toLowerCase();
this._attributes.set(this._attributeName, '');
this._state = S.AttributeName;
} else if (type.has('bracket') && (value === '>' || value === '/>')) {
this._onEndTag(token);
this._state = S.Initial;
}
return;
case S.AttributeName:
if (!type.size && value === '=') {
this._state = S.AttributeValue;
} else if (type.has('bracket') && (value === '>' || value === '/>')) {
this._onEndTag(token);
this._state = S.Initial;
}
return;
case S.AttributeValue:
if (type.has('string')) {
this._attributes.set(this._attributeName, value);
this._state = S.Tag;
} else if (type.has('bracket') && (value === '>' || value === '/>')) {
this._onEndTag(token);
this._state = S.Initial;
}
return;
}
}
/**
* @param {!Token} token
*/
_onStartTag(token) {
this._tagName = '';
this._tagStartOffset = token.startOffset;
this._tagEndOffset = null;
this._attributes = new Map();
this._attributeName = '';
this._isOpenTag = token.value === '<';
}
/**
* @param {!Token} token
*/
_onEndTag(token) {
this._tagEndOffset = token.endOffset;
const selfClosingTag = token.value === '/>' || SelfClosingTags.has(this._tagName);
const tag = new Tag(
this._tagName, this._tagStartOffset, this._tagEndOffset, this._attributes, this._isOpenTag, selfClosingTag);
this._onTagComplete(tag);
}
/**
* @param {!Tag} tag
*/
_onTagComplete(tag) {
if (tag.isOpenTag) {
const topElement = this._stack.peekLast();
if (topElement !== this._document && topElement.openTag.selfClosingTag) {
this._popElement(autocloseTag(topElement, topElement.openTag.endOffset));
} else if ((topElement.name in AutoClosingTags) && AutoClosingTags[topElement.name].has(tag.name)) {
this._popElement(autocloseTag(topElement, tag.startOffset));
}
this._pushElement(tag);
return;
}
while (this._stack.length > 1 && this._stack.peekLast().name !== tag.name) {
this._popElement(autocloseTag(this._stack.peekLast(), tag.startOffset));
}
if (this._stack.length === 1) {
return;
}
this._popElement(tag);
/**
* @param {!Element} element
* @param {number} offset
* @return {!Tag}
*/
function autocloseTag(element, offset) {
return new Tag(element.name, offset, offset, new Map(), false, false);
}
}
/**
* @param {!Tag} closeTag
*/
_popElement(closeTag) {
const element = this._stack.pop();
element.closeTag = closeTag;
}
/**
* @param {!Tag} openTag
*/
_pushElement(openTag) {
const topElement = this._stack.peekLast();
const newElement = new Element(openTag.name);
newElement.parent = topElement;
topElement.children.push(newElement);
newElement.openTag = openTag;
this._stack.push(newElement);
}
/**
* @return {?Token}
*/
peekToken() {
return this._tokenIndex < this._tokens.length ? this._tokens[this._tokenIndex] : null;
}
/**
* @return {?Token}
*/
nextToken() {
return this._tokens[this._tokenIndex++];
}
/**
* @return {!Element}
*/
document() {
return this._document;
}
}
const SelfClosingTags = new Set([
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source',
'track', 'wbr'
]);
// @see https://www.w3.org/TR/html/syntax.html 8.1.2.4 Optional tags
const AutoClosingTags = {
'head': new Set(['body']),
'li': new Set(['li']),
'dt': new Set(['dt', 'dd']),
'dd': new Set(['dt', 'dd']),
'p': new Set([
'address', 'article', 'aside', 'blockquote', 'div', 'dl', 'fieldset', 'footer', 'form',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr',
'main', 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul'
]),
'rb': new Set(['rb', 'rt', 'rtc', 'rp']),
'rt': new Set(['rb', 'rt', 'rtc', 'rp']),
'rtc': new Set(['rb', 'rtc', 'rp']),
'rp': new Set(['rb', 'rt', 'rtc', 'rp']),
'optgroup': new Set(['optgroup']),
'option': new Set(['option', 'optgroup']),
'colgroup': new Set(['colgroup']),
'thead': new Set(['tbody', 'tfoot']),
'tbody': new Set(['tbody', 'tfoot']),
'tfoot': new Set(['tbody']),
'tr': new Set(['tr']),
'td': new Set(['td', 'th']),
'th': new Set(['td', 'th']),
};
/** @enum {string} */
const ParseState = {
Initial: 'Initial',
Tag: 'Tag',
AttributeName: 'AttributeName',
AttributeValue: 'AttributeValue'
};
/**
* @unrestricted
*/
const Token = class {
/**
* @param {string} value
* @param {!Set<string>} type
* @param {number} startOffset
* @param {number} endOffset
*/
constructor(value, type, startOffset, endOffset) {
this.value = value;
this.type = type;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
};
/**
* @unrestricted
*/
const Tag = class {
/**
* @param {string} name
* @param {number} startOffset
* @param {number} endOffset
* @param {!Map<string, string>} attributes
* @param {boolean} isOpenTag
* @param {boolean} selfClosingTag
*/
constructor(name, startOffset, endOffset, attributes, isOpenTag, selfClosingTag) {
this.name = name;
this.startOffset = startOffset;
this.endOffset = endOffset;
this.attributes = attributes;
this.isOpenTag = isOpenTag;
this.selfClosingTag = selfClosingTag;
}
};
/**
* @unrestricted
*/
const Element = class {
/**
* @param {string} name
*/
constructor(name) {
this.name = name;
this.children = [];
this.parent = null;
this.openTag = null;
this.closeTag = null;
}
};
/* Legacy exported object */
self.FormatterWorker = self.FormatterWorker || {};
/* Legacy exported object */
FormatterWorker = FormatterWorker || {};
/** @constructor */
FormatterWorker.HTMLFormatter = HTMLFormatter;
/** @constructor */
FormatterWorker.HTMLModel = HTMLModel;