diff --git a/grammars/html.cson b/grammars/html.cson
index 215afe1..273d5df 100644
--- a/grammars/html.cson
+++ b/grammars/html.cson
@@ -398,7 +398,7 @@
]
}
{
- 'include': '#entities'
+ 'include': '#text-entities'
}
{
'match': '<>'
@@ -415,21 +415,61 @@
'include': '#python'
}
]
- 'entities':
+ 'text-entities':
+ # https://www.w3.org/TR/html51/syntax.html#consume-a-character-reference
'patterns': [
{
- 'match': '(&)([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+)(;)'
- 'captures':
+ 'begin': '(&)([a-zA-Z0-9]+|#\\d+|#[xX][0-9a-fA-F]+)'
+ 'beginCaptures':
'1':
'name': 'punctuation.definition.entity.begin.html'
'2':
'name': 'entity.name.entity.other.html'
- '3':
+ 'end': ';'
+ 'endCaptures':
+ '0':
+ 'name': 'punctuation.definition.entity.end.html'
+ 'name': 'constant.character.entity.html'
+ }
+ {
+ 'match': '&(?!\\s|<|&)'
+ 'name': 'invalid.illegal.bad-ampersand.html'
+ }
+ ]
+ 'attribute-entities':
+ # https://www.w3.org/TR/html51/syntax.html#consume-a-character-reference
+ # Because it would be infeasible to include the entire list of allowed entities,
+ # make sure that an equals sign or the end of a string does not follow a potential reference.
+ 'patterns': [
+ {
+ 'begin': '(&)(#\\d+|#[xX][0-9a-fA-F]+)'
+ 'beginCaptures':
+ '1':
+ 'name': 'punctuation.definition.entity.begin.html'
+ '2':
+ 'name': 'entity.name.entity.other.html'
+ 'end': ';'
+ 'endCaptures':
+ '0':
+ 'name': 'punctuation.definition.entity.end.html'
+ 'name': 'constant.character.entity.html'
+ }
+ {
+ 'begin': '(&)([a-zA-Z0-9]++)(?!["\'=])'
+ 'beginCaptures':
+ '1':
+ 'name': 'punctuation.definition.entity.begin.html'
+ '2':
+ 'name': 'entity.name.entity.other.html'
+ 'end': ';'
+ 'endCaptures':
+ '0':
'name': 'punctuation.definition.entity.end.html'
'name': 'constant.character.entity.html'
}
{
- 'match': '&'
+ # In attributes, potential references that end with an equals sign are fine
+ 'match': '&(?!\\s|<|&|[a-zA-Z0-9]+=)'
'name': 'invalid.illegal.bad-ampersand.html'
}
]
@@ -480,7 +520,7 @@
'include': '#embedded-code'
}
{
- 'include': '#entities'
+ 'include': '#attribute-entities'
}
]
'string-single-quoted':
@@ -498,7 +538,7 @@
'include': '#embedded-code'
}
{
- 'include': '#entities'
+ 'include': '#attribute-entities'
}
]
'tag-generic-attribute':
@@ -559,7 +599,7 @@
'include': '#embedded-code'
}
{
- 'include': '#entities'
+ 'include': '#attribute-entities'
}
]
}
@@ -579,7 +619,7 @@
'include': '#embedded-code'
}
{
- 'include': '#entities'
+ 'include': '#attribute-entities'
}
]
}
@@ -608,7 +648,7 @@
'unquoted-attribute':
'patterns': [
{
- 'include': '#entities'
+ 'include': '#attribute-entities'
}
{
# https://www.w3.org/TR/html51/syntax.html#attribute-value-unquoted-state
diff --git a/spec/html-spec.coffee b/spec/html-spec.coffee
index a6f7754..4e44b4d 100644
--- a/spec/html-spec.coffee
+++ b/spec/html-spec.coffee
@@ -376,14 +376,82 @@ describe 'HTML grammar', ->
expect(lines[2][1]).toEqual value: 'disabled', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-without-value.html', 'entity.other.attribute-name.html']
expect(lines[2][2]).toEqual value: '>', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'punctuation.definition.tag.end.html']
- describe "entities", ->
+ describe "entities in text", ->
it "tokenizes & and characters after it", ->
{tokens} = grammar.tokenizeLine '& & &a'
- expect(tokens[0]).toEqual value: '&', scopes: ['text.html.basic', 'invalid.illegal.bad-ampersand.html']
- expect(tokens[3]).toEqual value: 'amp', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
- expect(tokens[4]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
- expect(tokens[7]).toEqual value: 'a', scopes: ['text.html.basic']
+ expect(tokens[0]).toEqual value: '& ', scopes: ['text.html.basic']
+ expect(tokens[1]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
+ expect(tokens[2]).toEqual value: 'amp', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
+ expect(tokens[3]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
+ expect(tokens[5]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
+ expect(tokens[6]).toEqual value: 'a', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
+
+ lines = grammar.tokenizeLines '&\n'
+ expect(lines[0][0]).toEqual value: '&', scopes: ['text.html.basic']
+
+ it "tokenizes hexadecimal and digit entities", ->
+ {tokens} = grammar.tokenizeLine '" " "'
+
+ expect(tokens[0]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
+ expect(tokens[1]).toEqual value: '#x00022', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
+ expect(tokens[2]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
+ expect(tokens[4]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
+ expect(tokens[5]).toEqual value: '#X00022', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
+ expect(tokens[6]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
+ expect(tokens[8]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
+ expect(tokens[9]).toEqual value: '#34', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
+ expect(tokens[10]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
+
+ it "tokenizes invalid ampersands", ->
+ {tokens} = grammar.tokenizeLine 'PSE&>'
+ expect(tokens[0]).toEqual value: 'PSE', scopes: ['text.html.basic']
+ expect(tokens[1]).toEqual value: '&', scopes: ['text.html.basic', 'invalid.illegal.bad-ampersand.html']
+ expect(tokens[2]).toEqual value: '>', scopes: ['text.html.basic']
+
+ {tokens} = grammar.tokenizeLine 'PSE&'
+ expect(tokens[0]).toEqual value: 'PSE&', scopes: ['text.html.basic']
+
+ {tokens} = grammar.tokenizeLine '&<'
+ expect(tokens[0]).toEqual value: '&<', scopes: ['text.html.basic']
+
+ {tokens} = grammar.tokenizeLine '& '
+ expect(tokens[0]).toEqual value: '& ', scopes: ['text.html.basic']
+
+ {tokens} = grammar.tokenizeLine '&'
+ expect(tokens[0]).toEqual value: '&', scopes: ['text.html.basic']
+
+ {tokens} = grammar.tokenizeLine '&&'
+ expect(tokens[0]).toEqual value: '&&', scopes: ['text.html.basic']
+
+ describe "entities in attributes", ->
+ it "tokenizes entities", ->
+ {tokens} = grammar.tokenizeLine ''
+ expect(tokens[7]).toEqual value: '&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
+ expect(tokens[8]).toEqual value: 'amp', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'constant.character.entity.html', 'entity.name.entity.other.html']
+ expect(tokens[9]).toEqual value: ';', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
+
+ it "does not tokenize query parameters as entities", ->
+ {tokens} = grammar.tokenizeLine ''
+ expect(tokens[6]).toEqual value: 'http://example.com?one=1&type=json&topic=css', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html']
+
+ it "tokenizes invalid ampersands", ->
+ # Note: in order to replicate the following tests' behaviors, make sure you have language-hyperlink disabled
+ {tokens} = grammar.tokenizeLine ''
+ expect(tokens[7]).toEqual value: '&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'invalid.illegal.bad-ampersand.html']
+
+ {tokens} = grammar.tokenizeLine ''
+ expect(tokens[7]).toEqual value: '&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'invalid.illegal.bad-ampersand.html']
+
+ {tokens} = grammar.tokenizeLine ''
+ expect(tokens[6]).toEqual value: 'http://example.com?& ', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html']
+
+ lines = grammar.tokenizeLines ''
+ expect(lines[0][6]).toEqual value: 'http://example.com?&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html']
+
+ {tokens} = grammar.tokenizeLine ''
+ expect(tokens[6]).toEqual value: 'http://example.com?&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html']
+ expect(tokens[7]).toEqual value: '&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'invalid.illegal.bad-ampersand.html']
describe "firstLineMatch", ->
it "recognises HTML5 doctypes", ->