upgrade to most recent bluemonday (#11007)

* upgrade to most recent bluemonday

* make vendor

* update tests for bluemonday

* update tests for bluemonday

* update tests for bluemonday
mj
techknowlogick 4 years ago committed by GitHub
parent 4c54477bb5
commit d00ebf445b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -75,7 +75,7 @@ require (
github.com/mcuadros/go-version v0.0.0-20190308113854-92cdf37c5b75
github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81
github.com/mgechev/revive v1.0.2
github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a
github.com/microcosm-cc/bluemonday v1.0.3-0.20191119130333-0a75d7616912
github.com/mitchellh/go-homedir v1.1.0
github.com/msteinert/pam v0.0.0-20151204160544-02ccfbfaf0cc
github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5
@ -108,9 +108,9 @@ require (
github.com/yuin/goldmark v1.1.25
go.etcd.io/bbolt v1.3.3 // indirect
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd
golang.org/x/text v0.3.2
golang.org/x/tools v0.0.0-20200325010219-a49f79bcc224
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect

@ -75,6 +75,8 @@ github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:l
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA=
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
github.com/aws/aws-sdk-go v1.25.25/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@ -94,6 +96,8 @@ github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26/go.mod h1:paBWMc
github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668 h1:U/lr3Dgy4WK+hNk4tyD+nuGjpVLPEHuJSFXMw11/HPA=
github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668/go.mod h1:H0wQNHz2YrLsuXOZozoeDmnHXkNCRmMW0gwFWDfEZDA=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/chris-ramon/douceur v0.2.0 h1:IDMEdxlEUUBYBKE4z/mJnFyVXox+MjuEVDJNN27glkU=
github.com/chris-ramon/douceur v0.2.0/go.mod h1:wDW5xjJdeoMm1mRt4sD4c/LbF/mWdEpRXQKjTR8nIBE=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/corbym/gocrest v1.0.3 h1:gwEdq6RkTmq+09CTuM29DfKOCtZ7G7bcyxs3IZ6EVdU=
github.com/corbym/gocrest v1.0.3/go.mod h1:maVFL5lbdS2PgfOQgGRWDYTeunSWQeiEgoNdTABShCs=
@ -312,6 +316,8 @@ github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKp
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
github.com/gorilla/handlers v1.4.2 h1:0QniY0USkHQ1RGCLfKxeNHK9bkDHGRYGNDFBCS+YARg=
github.com/gorilla/handlers v1.4.2/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ=
github.com/gorilla/mux v1.6.2 h1:Pgr17XVTNXAk3q/r4CpKzC5xBM/qW1uVLV+IhRZpIIk=
@ -426,8 +432,8 @@ github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81 h1:QASJXOGm2RZ5Ardbc8
github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81/go.mod h1:KQ7+USdGKfpPjXk4Ga+5XxQM4Lm4e3gAogrreFAYpOg=
github.com/mgechev/revive v1.0.2 h1:v0NxxQ7fSFz/u1NQydPo6EGdq7va0J1BtsZmae6kzUg=
github.com/mgechev/revive v1.0.2/go.mod h1:rb0dQy1LVAxW9SWy5R3LPUjevzUbUS316U5MFySA2lo=
github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a h1:d18LCO3ctH2kugUqt0pEyKKP8L+IYrocaPqGFilhTKk=
github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a/go.mod h1:hsXNsILzKxV+sX77C5b8FSuKF00vh2OMYv+xgHpAMF4=
github.com/microcosm-cc/bluemonday v1.0.3-0.20191119130333-0a75d7616912 h1:hJde9rA24hlTcAYSwJoXpDUyGtfKQ/jsofw+WaDqGrI=
github.com/microcosm-cc/bluemonday v1.0.3-0.20191119130333-0a75d7616912/go.mod h1:8iwZnFn2CDDNZ0r6UXhF4xawGvzaqzCRa1n3/lO3W2w=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE=
@ -679,6 +685,8 @@ golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/oauth2 v0.0.0-20180620175406-ef147856a6dd/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -718,6 +726,8 @@ golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527 h1:uYVVQ9WP/Ds2ROhcaGPeIdVq0RIXVLwsHlnvJ+cT1So=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=

@ -117,13 +117,13 @@ func TestRender_links(t *testing.T) {
`<p><a href="http://www.example.com/wpstyle/?p=364" rel="nofollow">http://www.example.com/wpstyle/?p=364</a></p>`)
test(
"https://www.example.com/foo/?bar=baz&inga=42&quux",
`<p><a href="https://www.example.com/foo/?bar=baz&amp;inga=42&amp;quux" rel="nofollow">https://www.example.com/foo/?bar=baz&amp;inga=42&amp;quux</a></p>`)
`<p><a href="https://www.example.com/foo/?bar=baz&inga=42&quux=" rel="nofollow">https://www.example.com/foo/?bar=baz&amp;inga=42&amp;quux</a></p>`)
test(
"http://142.42.1.1/",
`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`)
test(
"https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd",
`<p><a href="https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`)
`<p><a href="https://github.com/go-gitea/gitea/?p=aaa%2Fbbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`)
test(
"https://en.wikipedia.org/wiki/URL_(disambiguation)",
`<p><a href="https://en.wikipedia.org/wiki/URL_(disambiguation)" rel="nofollow">https://en.wikipedia.org/wiki/URL_(disambiguation)</a></p>`)
@ -141,7 +141,7 @@ func TestRender_links(t *testing.T) {
`<p><a href="ftp://gitea.com/file.txt" rel="nofollow">ftp://gitea.com/file.txt</a></p>`)
test(
"magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download",
`<p><a href="magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&amp;dn=download" rel="nofollow">magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&amp;dn=download</a></p>`)
`<p><a href="magnet:?dn=download&xt=urn%3Abtih%3A5dee65101db281ac9c46344cd6b175cdcadabcde" rel="nofollow">magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&amp;dn=download</a></p>`)
// Test that should *not* be turned into URL
test(

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2015 Aymerick JEHANNE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,60 @@
package css
import "fmt"
// Declaration represents a parsed style property
type Declaration struct {
Property string
Value string
Important bool
}
// NewDeclaration instanciates a new Declaration
func NewDeclaration() *Declaration {
return &Declaration{}
}
// Returns string representation of the Declaration
func (decl *Declaration) String() string {
return decl.StringWithImportant(true)
}
// StringWithImportant returns string representation with optional !important part
func (decl *Declaration) StringWithImportant(option bool) string {
result := fmt.Sprintf("%s: %s", decl.Property, decl.Value)
if option && decl.Important {
result += " !important"
}
result += ";"
return result
}
// Equal returns true if both Declarations are equals
func (decl *Declaration) Equal(other *Declaration) bool {
return (decl.Property == other.Property) && (decl.Value == other.Value) && (decl.Important == other.Important)
}
//
// DeclarationsByProperty
//
// DeclarationsByProperty represents sortable style declarations
type DeclarationsByProperty []*Declaration
// Implements sort.Interface
func (declarations DeclarationsByProperty) Len() int {
return len(declarations)
}
// Implements sort.Interface
func (declarations DeclarationsByProperty) Swap(i, j int) {
declarations[i], declarations[j] = declarations[j], declarations[i]
}
// Implements sort.Interface
func (declarations DeclarationsByProperty) Less(i, j int) bool {
return declarations[i].Property < declarations[j].Property
}

@ -0,0 +1,230 @@
package css
import (
"fmt"
"strings"
)
const (
indentSpace = 2
)
// RuleKind represents a Rule kind
type RuleKind int
// Rule kinds
const (
QualifiedRule RuleKind = iota
AtRule
)
// At Rules than have Rules inside their block instead of Declarations
var atRulesWithRulesBlock = []string{
"@document", "@font-feature-values", "@keyframes", "@media", "@supports",
}
// Rule represents a parsed CSS rule
type Rule struct {
Kind RuleKind
// At Rule name (eg: "@media")
Name string
// Raw prelude
Prelude string
// Qualified Rule selectors parsed from prelude
Selectors []string
// Style properties
Declarations []*Declaration
// At Rule embedded rules
Rules []*Rule
// Current rule embedding level
EmbedLevel int
}
// NewRule instanciates a new Rule
func NewRule(kind RuleKind) *Rule {
return &Rule{
Kind: kind,
}
}
// Returns string representation of rule kind
func (kind RuleKind) String() string {
switch kind {
case QualifiedRule:
return "Qualified Rule"
case AtRule:
return "At Rule"
default:
return "WAT"
}
}
// EmbedsRules returns true if this rule embeds another rules
func (rule *Rule) EmbedsRules() bool {
if rule.Kind == AtRule {
for _, atRuleName := range atRulesWithRulesBlock {
if rule.Name == atRuleName {
return true
}
}
}
return false
}
// Equal returns true if both rules are equals
func (rule *Rule) Equal(other *Rule) bool {
if (rule.Kind != other.Kind) ||
(rule.Prelude != other.Prelude) ||
(rule.Name != other.Name) {
return false
}
if (len(rule.Selectors) != len(other.Selectors)) ||
(len(rule.Declarations) != len(other.Declarations)) ||
(len(rule.Rules) != len(other.Rules)) {
return false
}
for i, sel := range rule.Selectors {
if sel != other.Selectors[i] {
return false
}
}
for i, decl := range rule.Declarations {
if !decl.Equal(other.Declarations[i]) {
return false
}
}
for i, rule := range rule.Rules {
if !rule.Equal(other.Rules[i]) {
return false
}
}
return true
}
// Diff returns a string representation of rules differences
func (rule *Rule) Diff(other *Rule) []string {
result := []string{}
if rule.Kind != other.Kind {
result = append(result, fmt.Sprintf("Kind: %s | %s", rule.Kind.String(), other.Kind.String()))
}
if rule.Prelude != other.Prelude {
result = append(result, fmt.Sprintf("Prelude: \"%s\" | \"%s\"", rule.Prelude, other.Prelude))
}
if rule.Name != other.Name {
result = append(result, fmt.Sprintf("Name: \"%s\" | \"%s\"", rule.Name, other.Name))
}
if len(rule.Selectors) != len(other.Selectors) {
result = append(result, fmt.Sprintf("Selectors: %v | %v", strings.Join(rule.Selectors, ", "), strings.Join(other.Selectors, ", ")))
} else {
for i, sel := range rule.Selectors {
if sel != other.Selectors[i] {
result = append(result, fmt.Sprintf("Selector: \"%s\" | \"%s\"", sel, other.Selectors[i]))
}
}
}
if len(rule.Declarations) != len(other.Declarations) {
result = append(result, fmt.Sprintf("Declarations Nb: %d | %d", len(rule.Declarations), len(other.Declarations)))
} else {
for i, decl := range rule.Declarations {
if !decl.Equal(other.Declarations[i]) {
result = append(result, fmt.Sprintf("Declaration: \"%s\" | \"%s\"", decl.String(), other.Declarations[i].String()))
}
}
}
if len(rule.Rules) != len(other.Rules) {
result = append(result, fmt.Sprintf("Rules Nb: %d | %d", len(rule.Rules), len(other.Rules)))
} else {
for i, rule := range rule.Rules {
if !rule.Equal(other.Rules[i]) {
result = append(result, fmt.Sprintf("Rule: \"%s\" | \"%s\"", rule.String(), other.Rules[i].String()))
}
}
}
return result
}
// Returns the string representation of a rule
func (rule *Rule) String() string {
result := ""
if rule.Kind == QualifiedRule {
for i, sel := range rule.Selectors {
if i != 0 {
result += ", "
}
result += sel
}
} else {
// AtRule
result += fmt.Sprintf("%s", rule.Name)
if rule.Prelude != "" {
if result != "" {
result += " "
}
result += fmt.Sprintf("%s", rule.Prelude)
}
}
if (len(rule.Declarations) == 0) && (len(rule.Rules) == 0) {
result += ";"
} else {
result += " {\n"
if rule.EmbedsRules() {
for _, subRule := range rule.Rules {
result += fmt.Sprintf("%s%s\n", rule.indent(), subRule.String())
}
} else {
for _, decl := range rule.Declarations {
result += fmt.Sprintf("%s%s\n", rule.indent(), decl.String())
}
}
result += fmt.Sprintf("%s}", rule.indentEndBlock())
}
return result
}
// Returns identation spaces for declarations and rules
func (rule *Rule) indent() string {
result := ""
for i := 0; i < ((rule.EmbedLevel + 1) * indentSpace); i++ {
result += " "
}
return result
}
// Returns identation spaces for end of block character
func (rule *Rule) indentEndBlock() string {
result := ""
for i := 0; i < (rule.EmbedLevel * indentSpace); i++ {
result += " "
}
return result
}

@ -0,0 +1,25 @@
package css
// Stylesheet represents a parsed stylesheet
type Stylesheet struct {
Rules []*Rule
}
// NewStylesheet instanciate a new Stylesheet
func NewStylesheet() *Stylesheet {
return &Stylesheet{}
}
// Returns string representation of the Stylesheet
func (sheet *Stylesheet) String() string {
result := ""
for _, rule := range sheet.Rules {
if result != "" {
result += "\n"
}
result += rule.String()
}
return result
}

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2015 Aymerick JEHANNE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,409 @@
package parser
import (
"errors"
"fmt"
"regexp"
"strings"
"github.com/gorilla/css/scanner"
"github.com/aymerick/douceur/css"
)
const (
importantSuffixRegexp = `(?i)\s*!important\s*$`
)
var (
importantRegexp *regexp.Regexp
)
// Parser represents a CSS parser
type Parser struct {
scan *scanner.Scanner // Tokenizer
// Tokens parsed but not consumed yet
tokens []*scanner.Token
// Rule embedding level
embedLevel int
}
func init() {
importantRegexp = regexp.MustCompile(importantSuffixRegexp)
}
// NewParser instanciates a new parser
func NewParser(txt string) *Parser {
return &Parser{
scan: scanner.New(txt),
}
}
// Parse parses a whole stylesheet
func Parse(text string) (*css.Stylesheet, error) {
result, err := NewParser(text).ParseStylesheet()
if err != nil {
return nil, err
}
return result, nil
}
// ParseDeclarations parses CSS declarations
func ParseDeclarations(text string) ([]*css.Declaration, error) {
result, err := NewParser(text).ParseDeclarations()
if err != nil {
return nil, err
}
return result, nil
}
// ParseStylesheet parses a stylesheet
func (parser *Parser) ParseStylesheet() (*css.Stylesheet, error) {
result := css.NewStylesheet()
// Parse BOM
if _, err := parser.parseBOM(); err != nil {
return result, err
}
// Parse list of rules
rules, err := parser.ParseRules()
if err != nil {
return result, err
}
result.Rules = rules
return result, nil
}
// ParseRules parses a list of rules
func (parser *Parser) ParseRules() ([]*css.Rule, error) {
result := []*css.Rule{}
inBlock := false
if parser.tokenChar("{") {
// parsing a block of rules
inBlock = true
parser.embedLevel++
parser.shiftToken()
}
for parser.tokenParsable() {
if parser.tokenIgnorable() {
parser.shiftToken()
} else if parser.tokenChar("}") {
if !inBlock {
errMsg := fmt.Sprintf("Unexpected } character: %s", parser.nextToken().String())
return result, errors.New(errMsg)
}
parser.shiftToken()
parser.embedLevel--
// finished
break
} else {
rule, err := parser.ParseRule()
if err != nil {
return result, err
}
rule.EmbedLevel = parser.embedLevel
result = append(result, rule)
}
}
return result, parser.err()
}
// ParseRule parses a rule
func (parser *Parser) ParseRule() (*css.Rule, error) {
if parser.tokenAtKeyword() {
return parser.parseAtRule()
}
return parser.parseQualifiedRule()
}
// ParseDeclarations parses a list of declarations
func (parser *Parser) ParseDeclarations() ([]*css.Declaration, error) {
result := []*css.Declaration{}
if parser.tokenChar("{") {
parser.shiftToken()
}
for parser.tokenParsable() {
if parser.tokenIgnorable() {
parser.shiftToken()
} else if parser.tokenChar("}") {
// end of block
parser.shiftToken()
break
} else {
declaration, err := parser.ParseDeclaration()
if err != nil {
return result, err
}
result = append(result, declaration)
}
}
return result, parser.err()
}
// ParseDeclaration parses a declaration
func (parser *Parser) ParseDeclaration() (*css.Declaration, error) {
result := css.NewDeclaration()
curValue := ""
for parser.tokenParsable() {
if parser.tokenChar(":") {
result.Property = strings.TrimSpace(curValue)
curValue = ""
parser.shiftToken()
} else if parser.tokenChar(";") || parser.tokenChar("}") {
if result.Property == "" {
errMsg := fmt.Sprintf("Unexpected ; character: %s", parser.nextToken().String())
return result, errors.New(errMsg)
}
if importantRegexp.MatchString(curValue) {
result.Important = true
curValue = importantRegexp.ReplaceAllString(curValue, "")
}
result.Value = strings.TrimSpace(curValue)
if parser.tokenChar(";") {
parser.shiftToken()
}
// finished
break
} else {
token := parser.shiftToken()
curValue += token.Value
}
}
// log.Printf("[parsed] Declaration: %s", result.String())
return result, parser.err()
}
// Parse an At Rule
func (parser *Parser) parseAtRule() (*css.Rule, error) {
// parse rule name (eg: "@import")
token := parser.shiftToken()
result := css.NewRule(css.AtRule)
result.Name = token.Value
for parser.tokenParsable() {
if parser.tokenChar(";") {
parser.shiftToken()
// finished
break
} else if parser.tokenChar("{") {
if result.EmbedsRules() {
// parse rules block
rules, err := parser.ParseRules()
if err != nil {
return result, err
}
result.Rules = rules
} else {
// parse declarations block
declarations, err := parser.ParseDeclarations()
if err != nil {
return result, err
}
result.Declarations = declarations
}
// finished
break
} else {
// parse prelude
prelude, err := parser.parsePrelude()
if err != nil {
return result, err
}
result.Prelude = prelude
}
}
// log.Printf("[parsed] Rule: %s", result.String())
return result, parser.err()
}
// Parse a Qualified Rule
func (parser *Parser) parseQualifiedRule() (*css.Rule, error) {
result := css.NewRule(css.QualifiedRule)
for parser.tokenParsable() {
if parser.tokenChar("{") {
if result.Prelude == "" {
errMsg := fmt.Sprintf("Unexpected { character: %s", parser.nextToken().String())
return result, errors.New(errMsg)
}
// parse declarations block
declarations, err := parser.ParseDeclarations()
if err != nil {
return result, err
}
result.Declarations = declarations
// finished
break
} else {
// parse prelude
prelude, err := parser.parsePrelude()
if err != nil {
return result, err
}
result.Prelude = prelude
}
}
result.Selectors = strings.Split(result.Prelude, ",")
for i, sel := range result.Selectors {
result.Selectors[i] = strings.TrimSpace(sel)
}
// log.Printf("[parsed] Rule: %s", result.String())
return result, parser.err()
}
// Parse Rule prelude
func (parser *Parser) parsePrelude() (string, error) {
result := ""
for parser.tokenParsable() && !parser.tokenEndOfPrelude() {
token := parser.shiftToken()
result += token.Value
}
result = strings.TrimSpace(result)
// log.Printf("[parsed] prelude: %s", result)
return result, parser.err()
}
// Parse BOM
func (parser *Parser) parseBOM() (bool, error) {
if parser.nextToken().Type == scanner.TokenBOM {
parser.shiftToken()
return true, nil
}
return false, parser.err()
}
// Returns next token without removing it from tokens buffer
func (parser *Parser) nextToken() *scanner.Token {
if len(parser.tokens) == 0 {
// fetch next token
nextToken := parser.scan.Next()
// log.Printf("[token] %s => %v", nextToken.Type.String(), nextToken.Value)
// queue it
parser.tokens = append(parser.tokens, nextToken)
}
return parser.tokens[0]
}
// Returns next token and remove it from the tokens buffer
func (parser *Parser) shiftToken() *scanner.Token {
var result *scanner.Token
result, parser.tokens = parser.tokens[0], parser.tokens[1:]
return result
}
// Returns tokenizer error, or nil if no error
func (parser *Parser) err() error {
if parser.tokenError() {
token := parser.nextToken()
return fmt.Errorf("Tokenizer error: %s", token.String())
}
return nil
}
// Returns true if next token is Error
func (parser *Parser) tokenError() bool {
return parser.nextToken().Type == scanner.TokenError
}
// Returns true if next token is EOF
func (parser *Parser) tokenEOF() bool {
return parser.nextToken().Type == scanner.TokenEOF
}
// Returns true if next token is a whitespace
func (parser *Parser) tokenWS() bool {
return parser.nextToken().Type == scanner.TokenS
}
// Returns true if next token is a comment
func (parser *Parser) tokenComment() bool {
return parser.nextToken().Type == scanner.TokenComment
}
// Returns true if next token is a CDO or a CDC
func (parser *Parser) tokenCDOorCDC() bool {
switch parser.nextToken().Type {
case scanner.TokenCDO, scanner.TokenCDC:
return true
default:
return false
}
}
// Returns true if next token is ignorable
func (parser *Parser) tokenIgnorable() bool {
return parser.tokenWS() || parser.tokenComment() || parser.tokenCDOorCDC()
}
// Returns true if next token is parsable
func (parser *Parser) tokenParsable() bool {
return !parser.tokenEOF() && !parser.tokenError()
}
// Returns true if next token is an At Rule keyword
func (parser *Parser) tokenAtKeyword() bool {
return parser.nextToken().Type == scanner.TokenAtKeyword
}
// Returns true if next token is given character
func (parser *Parser) tokenChar(value string) bool {
token := parser.nextToken()
return (token.Type == scanner.TokenChar) && (token.Value == value)
}
// Returns true if next token marks the end of a prelude
func (parser *Parser) tokenEndOfPrelude() bool {
return parser.tokenChar(";") || parser.tokenChar("{")
}

@ -0,0 +1,27 @@
Copyright (c) 2013, Gorilla web toolkit
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
Neither the name of the {organization} nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,33 @@
// Copyright 2012 The Gorilla Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package gorilla/css/scanner generates tokens for a CSS3 input.
It follows the CSS3 specification located at:
http://www.w3.org/TR/css3-syntax/
To use it, create a new scanner for a given CSS string and call Next() until
the token returned has type TokenEOF or TokenError:
s := scanner.New(myCSS)
for {
token := s.Next()
if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError {
break
}
// Do something with the token...
}
Following the CSS3 specification, an error can only occur when the scanner
finds an unclosed quote or unclosed comment. In these cases the text becomes
"untokenizable". Everything else is tokenizable and it is up to a parser
to make sense of the token stream (or ignore nonsensical token sequences).
Note: the scanner doesn't perform lexical analysis or, in other words, it
doesn't care about the token context. It is intended to be used by a
lexer or parser.
*/
package scanner

@ -0,0 +1,356 @@
// Copyright 2012 The Gorilla Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package scanner
import (
"fmt"
"regexp"
"strings"
"unicode"
"unicode/utf8"
)
// tokenType identifies the type of lexical tokens.
type tokenType int
// String returns a string representation of the token type.
func (t tokenType) String() string {
return tokenNames[t]
}
// Token represents a token and the corresponding string.
type Token struct {
Type tokenType
Value string
Line int
Column int
}
// String returns a string representation of the token.
func (t *Token) String() string {
if len(t.Value) > 10 {
return fmt.Sprintf("%s (line: %d, column: %d): %.10q...",
t.Type, t.Line, t.Column, t.Value)
}
return fmt.Sprintf("%s (line: %d, column: %d): %q",
t.Type, t.Line, t.Column, t.Value)
}
// All tokens -----------------------------------------------------------------
// The complete list of tokens in CSS3.
const (
// Scanner flags.
TokenError tokenType = iota
TokenEOF
// From now on, only tokens from the CSS specification.
TokenIdent
TokenAtKeyword
TokenString
TokenHash
TokenNumber
TokenPercentage
TokenDimension
TokenURI
TokenUnicodeRange
TokenCDO
TokenCDC
TokenS
TokenComment
TokenFunction
TokenIncludes
TokenDashMatch
TokenPrefixMatch
TokenSuffixMatch
TokenSubstringMatch
TokenChar
TokenBOM
)
// tokenNames maps tokenType's to their names. Used for conversion to string.
var tokenNames = map[tokenType]string{
TokenError: "error",
TokenEOF: "EOF",
TokenIdent: "IDENT",
TokenAtKeyword: "ATKEYWORD",
TokenString: "STRING",
TokenHash: "HASH",
TokenNumber: "NUMBER",
TokenPercentage: "PERCENTAGE",
TokenDimension: "DIMENSION",
TokenURI: "URI",
TokenUnicodeRange: "UNICODE-RANGE",
TokenCDO: "CDO",
TokenCDC: "CDC",
TokenS: "S",
TokenComment: "COMMENT",
TokenFunction: "FUNCTION",
TokenIncludes: "INCLUDES",
TokenDashMatch: "DASHMATCH",
TokenPrefixMatch: "PREFIXMATCH",
TokenSuffixMatch: "SUFFIXMATCH",
TokenSubstringMatch: "SUBSTRINGMATCH",
TokenChar: "CHAR",
TokenBOM: "BOM",
}
// Macros and productions -----------------------------------------------------
// http://www.w3.org/TR/css3-syntax/#tokenization
var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`)
// macros maps macro names to patterns to be expanded.
var macros = map[string]string{
// must be escaped: `\.+*?()|[]{}^$`
"ident": `-?{nmstart}{nmchar}*`,
"name": `{nmchar}+`,
"nmstart": `[a-zA-Z_]|{nonascii}|{escape}`,
"nonascii": "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
"unicode": `\\[0-9a-fA-F]{1,6}{wc}?`,
"escape": "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
"nmchar": `[a-zA-Z0-9_-]|{nonascii}|{escape}`,
"num": `[0-9]*\.[0-9]+|[0-9]+`,
"string": `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`,
"stringchar": `{urlchar}|[ ]|\\{nl}`,
"nl": `[\n\r\f]|\r\n`,
"w": `{wc}*`,
"wc": `[\t\n\f\r ]`,
// urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}]
// ASCII characters range = `[\u0020-\u007e]`
// Skip space \u0020 = `[\u0021-\u007e]`
// Skip quotation mark \0022 = `[\u0021\u0023-\u007e]`
// Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]`
// Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]`
// Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves
"urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}",
}
// productions maps the list of tokens to patterns to be expanded.
var productions = map[tokenType]string{
// Unused regexps (matched using other methods) are commented out.
TokenIdent: `{ident}`,
TokenAtKeyword: `@{ident}`,
TokenString: `{string}`,
TokenHash: `#{name}`,
TokenNumber: `{num}`,
TokenPercentage: `{num}%`,
TokenDimension: `{num}{ident}`,
TokenURI: `url\({w}(?:{string}|{urlchar}*?){w}\)`,
TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`,
//TokenCDO: `<!--`,
TokenCDC: `-->`,
TokenS: `{wc}+`,
TokenComment: `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`,
TokenFunction: `{ident}\(`,
//TokenIncludes: `~=`,
//TokenDashMatch: `\|=`,
//TokenPrefixMatch: `\^=`,
//TokenSuffixMatch: `\$=`,
//TokenSubstringMatch: `\*=`,
//TokenChar: `[^"']`,
//TokenBOM: "\uFEFF",
}
// matchers maps the list of tokens to compiled regular expressions.
//
// The map is filled on init() using the macros and productions defined in
// the CSS specification.
var matchers = map[tokenType]*regexp.Regexp{}
// matchOrder is the order to test regexps when first-char shortcuts
// can't be used.
var matchOrder = []tokenType{
TokenURI,
TokenFunction,
TokenUnicodeRange,
TokenIdent,
TokenDimension,
TokenPercentage,
TokenNumber,
TokenCDC,
}
func init() {
// replace macros and compile regexps for productions.
replaceMacro := func(s string) string {
return "(?:" + macros[s[1:len(s)-1]] + ")"
}
for t, s := range productions {
for macroRegexp.MatchString(s) {
s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro)
}
matchers[t] = regexp.MustCompile("^(?:" + s + ")")
}
}
// Scanner --------------------------------------------------------------------
// New returns a new CSS scanner for the given input.
func New(input string) *Scanner {
// Normalize newlines.
input = strings.Replace(input, "\r\n", "\n", -1)
return &Scanner{
input: input,
row: 1,
col: 1,
}
}
// Scanner scans an input and emits tokens following the CSS3 specification.
type Scanner struct {
input string
pos int
row int
col int
err *Token
}
// Next returns the next token from the input.
//
// At the end of the input the token type is TokenEOF.
//
// If the input can't be tokenized the token type is TokenError. This occurs
// in case of unclosed quotation marks or comments.
func (s *Scanner) Next() *Token {
if s.err != nil {
return s.err
}
if s.pos >= len(s.input) {
s.err = &Token{TokenEOF, "", s.row, s.col}
return s.err
}
if s.pos == 0 {
// Test BOM only once, at the beginning of the file.
if strings.HasPrefix(s.input, "\uFEFF") {
return s.emitSimple(TokenBOM, "\uFEFF")
}
}
// There's a lot we can guess based on the first byte so we'll take a
// shortcut before testing multiple regexps.
input := s.input[s.pos:]
switch input[0] {
case '\t', '\n', '\f', '\r', ' ':
// Whitespace.
return s.emitToken(TokenS, matchers[TokenS].FindString(input))
case '.':
// Dot is too common to not have a quick check.
// We'll test if this is a Char; if it is followed by a number it is a
// dimension/percentage/number, and this will be matched later.
if len(input) > 1 && !unicode.IsDigit(rune(input[1])) {
return s.emitSimple(TokenChar, ".")
}
case '#':
// Another common one: Hash or Char.
if match := matchers[TokenHash].FindString(input); match != "" {
return s.emitToken(TokenHash, match)
}
return s.emitSimple(TokenChar, "#")
case '@':
// Another common one: AtKeyword or Char.
if match := matchers[TokenAtKeyword].FindString(input); match != "" {
return s.emitSimple(TokenAtKeyword, match)
}
return s.emitSimple(TokenChar, "@")
case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}':
// More common chars.
return s.emitSimple(TokenChar, string(input[0]))
case '"', '\'':
// String or error.
match := matchers[TokenString].FindString(input)
if match != "" {
return s.emitToken(TokenString, match)
}
s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
return s.err
case '/':
// Comment, error or Char.
if len(input) > 1 && input[1] == '*' {
match := matchers[TokenComment].FindString(input)
if match != "" {
return s.emitToken(TokenComment, match)
} else {
s.err = &Token{TokenError, "unclosed comment", s.row, s.col}
return s.err
}
}
return s.emitSimple(TokenChar, "/")
case '~':
// Includes or Char.
return s.emitPrefixOrChar(TokenIncludes, "~=")
case '|':
// DashMatch or Char.
return s.emitPrefixOrChar(TokenDashMatch, "|=")
case '^':
// PrefixMatch or Char.
return s.emitPrefixOrChar(TokenPrefixMatch, "^=")
case '$':
// SuffixMatch or Char.
return s.emitPrefixOrChar(TokenSuffixMatch, "$=")
case '*':
// SubstringMatch or Char.
return s.emitPrefixOrChar(TokenSubstringMatch, "*=")
case '<':
// CDO or Char.
return s.emitPrefixOrChar(TokenCDO, "<!--")
}
// Test all regexps, in order.
for _, token := range matchOrder {
if match := matchers[token].FindString(input); match != "" {
return s.emitToken(token, match)
}
}
// We already handled unclosed quotation marks and comments,
// so this can only be a Char.
r, width := utf8.DecodeRuneInString(input)
token := &Token{TokenChar, string(r), s.row, s.col}
s.col += width
s.pos += width
return token
}
// updatePosition updates input coordinates based on the consumed text.
func (s *Scanner) updatePosition(text string) {
width := utf8.RuneCountInString(text)
lines := strings.Count(text, "\n")
s.row += lines
if lines == 0 {
s.col += width
} else {
s.col = utf8.RuneCountInString(text[strings.LastIndex(text, "\n"):])
}
s.pos += len(text) // while col is a rune index, pos is a byte index
}
// emitToken returns a Token for the string v and updates the scanner position.
func (s *Scanner) emitToken(t tokenType, v string) *Token {
token := &Token{t, v, s.row, s.col}
s.updatePosition(v)
return token
}
// emitSimple returns a Token for the string v and updates the scanner
// position in a simplified manner.
//
// The string is known to have only ASCII characters and to not have a newline.
func (s *Scanner) emitSimple(t tokenType, v string) *Token {
token := &Token{t, v, s.row, s.col}
s.col += len(v)
s.pos += len(v)
return token
}
// emitPrefixOrChar returns a Token for type t if the current position
// matches the given prefix. Otherwise it returns a Char token using the
// first character from the prefix.
//
// The prefix is known to have only ASCII characters and to not have a newline.
func (s *Scanner) emitPrefixOrChar(t tokenType, prefix string) *Token {
if strings.HasPrefix(s.input[s.pos:], prefix) {
return s.emitSimple(t, prefix)
}
return s.emitSimple(TokenChar, string(prefix[0]))
}

@ -1,22 +1,15 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
# Test binary, built with `go test -c`
*.test
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
_testmain.go
*.exe
# goland idea folder
*.idea

@ -1,18 +1,22 @@
language: go
go:
- 1.1
- 1.2
- 1.3
- 1.4
- 1.5
- 1.6
- 1.7
- 1.2.x
- 1.3.x
- 1.4.x
- 1.5.x
- 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- 1.11.x
- 1.12.x
- tip
matrix:
allow_failures:
- go: tip
fast_finish: true
install:
- go get golang.org/x/net/html
- go get .
script:
- go test -v ./...

@ -6,6 +6,10 @@ Third-party patches are essential for keeping bluemonday secure and offering the
* Make sure you have a [Github account](https://github.com/signup/free)
## Guidelines
1. Do not vendor dependencies. As a security package, were we to vendor dependencies the projects that then vendor bluemonday may not receive the latest security updates to the dependencies. By not vendoring dependencies the project that implements bluemonday will vendor the latest version of any dependent packages. Vendoring is a project problem, not a package problem. bluemonday will be tested against the latest version of dependencies periodically and during any PR/merge.
## Submitting an Issue
* Submit a ticket for your issue, assuming one does not already exist

@ -1,6 +1,7 @@
1. John Graham-Cumming http://jgc.org/
1. Mohammad Gufran https://github.com/Gufran
1. Steven Gutzwiller https://github.com/StevenGutzwiller
1. Andrew Krasichkov @buglloc https://github.com/buglloc
1. Mike Samuel mikesamuel@gmail.com
1. Dmitri Shuralyov shurcooL@gmail.com
1. https://github.com/opennota
1. https://github.com/Gufran
1. https://github.com/opennota

@ -1,4 +1,4 @@
# bluemonday [![Build Status](https://travis-ci.org/microcosm-cc/bluemonday.svg?branch=master)](https://travis-ci.org/microcosm-cc/bluemonday) [![GoDoc](https://godoc.org/github.com/microcosm-cc/bluemonday?status.png)](https://godoc.org/github.com/microcosm-cc/bluemonday)
# bluemonday [![Build Status](https://travis-ci.org/microcosm-cc/bluemonday.svg?branch=master)](https://travis-ci.org/microcosm-cc/bluemonday) [![GoDoc](https://godoc.org/github.com/microcosm-cc/bluemonday?status.png)](https://godoc.org/github.com/microcosm-cc/bluemonday) [![Sourcegraph](https://sourcegraph.com/github.com/microcosm-cc/bluemonday/-/badge.svg)](https://sourcegraph.com/github.com/microcosm-cc/bluemonday?badge)
bluemonday is a HTML sanitizer implemented in Go. It is fast and highly configurable.
@ -58,10 +58,12 @@ We expect to be supplied with well-formatted HTML (closing elements for every ap
### Supported Go Versions
bluemonday is regularly tested against Go 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7 and tip.
bluemonday is tested against Go 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11, 1.12, and tip.
We do not support Go 1.0 as we depend on `golang.org/x/net/html` which includes a reference to `io.ErrNoProgress` which did not exist in Go 1.0.
We support Go 1.1 but Travis no longer tests against it.
## Is it production ready?
*Yes*
@ -87,7 +89,11 @@ import (
)
func main() {
// Do this once for each unique policy, and use the policy for the life of the program
// Policy creation/editing is not safe to use in multiple goroutines
p := bluemonday.UGCPolicy()
// The policy can then be used to sanitize lots of input and it is safe to use the policy in multiple goroutines
html := p.Sanitize(
`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`,
)
@ -140,7 +146,7 @@ func main() {
We ship two default policies:
1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on it's whitelist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy.
1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its whitelist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy.
2. `bluemonday.UGCPolicy()` which allows a broad selection of HTML elements and attributes that are safe for user generated content. Note that this policy does *not* whitelist iframes, object, embed, styles, script, etc. An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.
## Policy Building
@ -163,12 +169,26 @@ To add elements to a policy either add just the elements:
p.AllowElements("b", "strong")
```
Or using a regex:
_Note: if an element is added by name as shown above, any matching regex will be ignored_
It is also recommended to ensure multiple patterns don't overlap as order of execution is not guaranteed and can result in some rules being missed.
```go
p.AllowElementsMatching(regex.MustCompile(`^my-element-`))
```
Or add elements as a virtue of adding an attribute:
```go
// Not the recommended pattern, see the recommendation on using .Matching() below
p.AllowAttrs("nowrap").OnElements("td", "th")
```
Again, this also supports a regex pattern match alternative:
```go
p.AllowAttrs("nowrap").OnElementsMatching(regex.MustCompile(`^my-element-`))
```
Attributes can either be added to all elements:
```go
p.AllowAttrs("dir").Matching(regexp.MustCompile("(?i)rtl|ltr")).Globally()
@ -198,6 +218,49 @@ p := bluemonday.UGCPolicy()
p.AllowElements("fieldset", "select", "option")
```
### Inline CSS
Although it's possible to handle inline CSS using `AllowAttrs` with a `Matching` rule, writing a single monolithic regular expression to safely process all inline CSS which you wish to allow is not a trivial task. Instead of attempting to do so, you can whitelist the `style` attribute on whichever element(s) you desire and use style policies to control and sanitize inline styles.
It is suggested that you use `Matching` (with a suitable regular expression)
`MatchingEnum`, or `MatchingHandler` to ensure each style matches your needs,
but default handlers are supplied for most widely used styles.
Similar to attributes, you can allow specific CSS properties to be set inline:
```go
p.AllowAttrs("style").OnElements("span", "p")
// Allow the 'color' property with valid RGB(A) hex values only (on any element allowed a 'style' attribute)
p.AllowStyles("color").Matching(regexp.MustCompile("(?i)^#([0-9a-f]{3,4}|[0-9a-f]{6}|[0-9a-f]{8})$")).Globally()
```
Additionally, you can allow a CSS property to be set only to an allowed value:
```go
p.AllowAttrs("style").OnElements("span", "p")
// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none'
// on 'span' elements only
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElements("span")
```