// Package util provides utility functions for the goldmark. package util import ( "bytes" "io" "net/url" "regexp" "sort" "strconv" "unicode" "unicode/utf8" ) // A CopyOnWriteBuffer is a byte buffer that copies buffer when // it need to be changed. type CopyOnWriteBuffer struct { buffer []byte copied bool } // NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer. func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer { return CopyOnWriteBuffer{ buffer: buffer, copied: false, } } // Write writes given bytes to the buffer. // Write allocate new buffer and clears it at the first time. func (b *CopyOnWriteBuffer) Write(value []byte) { if !b.copied { b.buffer = make([]byte, 0, len(b.buffer)+20) b.copied = true } b.buffer = append(b.buffer, value...) } // Append appends given bytes to the buffer. // Append copy buffer at the first time. func (b *CopyOnWriteBuffer) Append(value []byte) { if !b.copied { tmp := make([]byte, len(b.buffer), len(b.buffer)+20) copy(tmp, b.buffer) b.buffer = tmp b.copied = true } b.buffer = append(b.buffer, value...) } // WriteByte writes the given byte to the buffer. // WriteByte allocate new buffer and clears it at the first time. func (b *CopyOnWriteBuffer) WriteByte(c byte) { if !b.copied { b.buffer = make([]byte, 0, len(b.buffer)+20) b.copied = true } b.buffer = append(b.buffer, c) } // AppendByte appends given bytes to the buffer. // AppendByte copy buffer at the first time. func (b *CopyOnWriteBuffer) AppendByte(c byte) { if !b.copied { tmp := make([]byte, len(b.buffer), len(b.buffer)+20) copy(tmp, b.buffer) b.buffer = tmp b.copied = true } b.buffer = append(b.buffer, c) } // Bytes returns bytes of this buffer. func (b *CopyOnWriteBuffer) Bytes() []byte { return b.buffer } // IsCopied returns true if buffer has been copied, otherwise false. func (b *CopyOnWriteBuffer) IsCopied() bool { return b.copied } // IsEscapedPunctuation returns true if character at a given index i // is an escaped punctuation, otherwise false. func IsEscapedPunctuation(source []byte, i int) bool { return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1]) } // ReadWhile read the given source while pred is true. func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) { j := index[0] ok := false for ; j < index[1]; j++ { c1 := source[j] if pred(c1) { ok = true continue } break } return j, ok } // IsBlank returns true if the given string is all space characters. func IsBlank(bs []byte) bool { for _, b := range bs { if !IsSpace(b) { return false } } return true } // VisualizeSpaces visualize invisible space characters. func VisualizeSpaces(bs []byte) []byte { bs = bytes.Replace(bs, []byte(" "), []byte("[SPACE]"), -1) bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1) bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1) bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1) return bs } // TabWidth calculates actual width of a tab at the given position. func TabWidth(currentPos int) int { return 4 - currentPos%4 } // IndentPosition searches an indent position with the given width for the given line. // If the line contains tab characters, paddings may be not zero. // currentPos==0 and width==2: // // position: 0 1 // [TAB]aaaa // width: 1234 5678 // // width=2 is in the tab character. In this case, IndentPosition returns // (pos=1, padding=2) func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) { if width == 0 { return 0, 0 } w := 0 l := len(bs) i := 0 hasTab := false for ; i < l; i++ { if bs[i] == '\t' { w += TabWidth(currentPos + w) hasTab = true } else if bs[i] == ' ' { w++ } else { break } } if w >= width { if !hasTab { return width, 0 } return i, w - width } return -1, -1 } // IndentPositionPadding searches an indent position with the given width for the given line. // This function is mostly same as IndentPosition except this function // takes account into additional paddings. func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) { if width == 0 { return 0, paddingv } w := 0 i := 0 l := len(bs) for ; i < l; i++ { if bs[i] == '\t' { w += TabWidth(currentPos + w) } else if bs[i] == ' ' { w++ } else { break } } if w >= width { return i - paddingv, w - width } return -1, -1 } // DedentPosition dedents lines by the given width. func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) { if width == 0 { return 0, 0 } w := 0 l := len(bs) i := 0 for ; i < l; i++ { if bs[i] == '\t' { w += TabWidth(currentPos + w) } else if bs[i] == ' ' { w++ } else { break } } if w >= width { return i, w - width } return i, 0 } // DedentPositionPadding dedents lines by the given width. // This function is mostly same as DedentPosition except this function // takes account into additional paddings. func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) { if width == 0 { return 0, paddingv } w := 0 i := 0 l := len(bs) for ; i < l; i++ { if bs[i] == '\t' { w += TabWidth(currentPos + w) } else if bs[i] == ' ' { w++ } else { break } } if w >= width { return i - paddingv, w - width } return i - paddingv, 0 } // IndentWidth calculate an indent width for the given line. func IndentWidth(bs []byte, currentPos int) (width, pos int) { l := len(bs) for i := 0; i < l; i++ { b := bs[i] if b == ' ' { width++ pos++ } else if b == '\t' { width += TabWidth(currentPos + width) pos++ } else { break } } return } // FirstNonSpacePosition returns a position line that is a first nonspace // character. func FirstNonSpacePosition(bs []byte) int { i := 0 for ; i < len(bs); i++ { c := bs[i] if c == ' ' || c == '\t' { continue } if c == '\n' { return -1 } return i } return -1 } // FindClosure returns a position that closes the given opener. // If codeSpan is set true, it ignores characters in code spans. // If allowNesting is set true, closures correspond to nested opener will be // ignored. func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int { i := 0 opened := 1 codeSpanOpener := 0 for i < len(bs) { c := bs[i] if codeSpan && codeSpanOpener != 0 && c == '`' { codeSpanCloser := 0 for ; i < len(bs); i++ { if bs[i] == '`' { codeSpanCloser++ } else { i-- break } } if codeSpanCloser == codeSpanOpener { codeSpanOpener = 0 } } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) { i += 2 continue } else if codeSpan && codeSpanOpener == 0 && c == '`' { for ; i < len(bs); i++ { if bs[i] == '`' { codeSpanOpener++ } else { i-- break } } } else if (codeSpan && codeSpanOpener == 0) || !codeSpan { if c == closure { opened-- if opened == 0 { return i } } else if c == opener { if !allowNesting { return -1 } opened++ } } i++ } return -1 } // TrimLeft trims characters in the given s from head of the source. // bytes.TrimLeft offers same functionalities, but bytes.TrimLeft // allocates new buffer for the result. func TrimLeft(source, b []byte) []byte { i := 0 for ; i < len(source); i++ { c := source[i] found := false for j := 0; j < len(b); j++ { if c == b[j] { found = true break } } if !found { break } } return source[i:] } // TrimRight trims characters in the given s from tail of the source. func TrimRight(source, b []byte) []byte { i := len(source) - 1 for ; i >= 0; i-- { c := source[i] found := false for j := 0; j < len(b); j++ { if c == b[j] { found = true break } } if !found { break } } return source[:i+1] } // TrimLeftLength returns a length of leading specified characters. func TrimLeftLength(source, s []byte) int { return len(source) - len(TrimLeft(source, s)) } // TrimRightLength returns a length of trailing specified characters. func TrimRightLength(source, s []byte) int { return len(source) - len(TrimRight(source, s)) } // TrimLeftSpaceLength returns a length of leading space characters. func TrimLeftSpaceLength(source []byte) int { i := 0 for ; i < len(source); i++ { if !IsSpace(source[i]) { break } } return i } // TrimRightSpaceLength returns a length of trailing space characters. func TrimRightSpaceLength(source []byte) int { l := len(source) i := l - 1 for ; i >= 0; i-- { if !IsSpace(source[i]) { break } } if i < 0 { return l } return l - 1 - i } // TrimLeftSpace returns a subslice of the given string by slicing off all leading // space characters. func TrimLeftSpace(source []byte) []byte { return TrimLeft(source, spaces) } // TrimRightSpace returns a subslice of the given string by slicing off all trailing // space characters. func TrimRightSpace(source []byte) []byte { return TrimRight(source, spaces) } // DoFullUnicodeCaseFolding performs full unicode case folding to given bytes. func DoFullUnicodeCaseFolding(v []byte) []byte { var rbuf []byte cob := NewCopyOnWriteBuffer(v) n := 0 for i := 0; i < len(v); i++ { c := v[i] if c < 0xb5 { if c >= 0x41 && c <= 0x5a { // A-Z to a-z cob.Write(v[n:i]) cob.WriteByte(c + 32) n = i + 1 } continue } if !utf8.RuneStart(c) { continue } r, length := utf8.DecodeRune(v[i:]) if r == utf8.RuneError { continue } folded, ok := unicodeCaseFoldings[r] if !ok { continue } cob.Write(v[n:i]) if rbuf == nil { rbuf = make([]byte, 4) } for _, f := range folded { l := utf8.EncodeRune(rbuf, f) cob.Write(rbuf[:l]) } i += length - 1 n = i + 1 } if cob.IsCopied() { cob.Write(v[n:]) } return cob.Bytes() } // ReplaceSpaces replaces sequence of spaces with the given repl. func ReplaceSpaces(source []byte, repl byte) []byte { var ret []byte start := -1 for i, c := range source { iss := IsSpace(c) if start < 0 && iss { start = i continue } else if start >= 0 && iss { continue } else if start >= 0 { if ret == nil { ret = make([]byte, 0, len(source)) ret = append(ret, source[:start]...) } ret = append(ret, repl) start = -1 } if ret != nil { ret = append(ret, c) } } if start >= 0 && ret != nil { ret = append(ret, repl) } if ret == nil { return source } return ret } // ToRune decode given bytes start at pos and returns a rune. func ToRune(source []byte, pos int) rune { i := pos for ; i >= 0; i-- { if utf8.RuneStart(source[i]) { break } } r, _ := utf8.DecodeRune(source[i:]) return r } // ToValidRune returns 0xFFFD if the given rune is invalid, otherwise v. func ToValidRune(v rune) rune { if v == 0 || !utf8.ValidRune(v) { return rune(0xFFFD) } return v } // ToLinkReference converts given bytes into a valid link reference string. // ToLinkReference performs unicode case folding, trims leading and trailing spaces, converts into lower // case and replace spaces with a single space character. func ToLinkReference(v []byte) string { v = TrimLeftSpace(v) v = TrimRightSpace(v) v = DoFullUnicodeCaseFolding(v) return string(ReplaceSpaces(v, ' ')) } var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} // EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped, // otherwise nil. func EscapeHTMLByte(b byte) []byte { return htmlEscapeTable[b] } // EscapeHTML escapes characters that should be escaped in HTML text. func EscapeHTML(v []byte) []byte { cob := NewCopyOnWriteBuffer(v) n := 0 for i := 0; i < len(v); i++ { c := v[i] escaped := htmlEscapeTable[c] if escaped != nil { cob.Write(v[n:i]) cob.Write(escaped) n = i + 1 } } if cob.IsCopied() { cob.Write(v[n:]) } return cob.Bytes() } // UnescapePunctuations unescapes blackslash escaped punctuations. func UnescapePunctuations(source []byte) []byte { cob := NewCopyOnWriteBuffer(source) limit := len(source) n := 0 for i := 0; i < limit; { c := source[i] if i < limit-1 && c == '\\' && IsPunct(source[i+1]) { cob.Write(source[n:i]) cob.WriteByte(source[i+1]) i += 2 n = i continue } i++ } if cob.IsCopied() { cob.Write(source[n:]) } return cob.Bytes() } // ResolveNumericReferences resolve numeric references like 'Ӓ" . func ResolveNumericReferences(source []byte) []byte { cob := NewCopyOnWriteBuffer(source) buf := make([]byte, 6, 6) limit := len(source) ok := false n := 0 for i := 0; i < limit; i++ { if source[i] == '&' { pos := i next := i + 1 if next < limit && source[next] == '#' { nnext := next + 1 if nnext < limit { nc := source[nnext] // code point like #x22; if nnext < limit && nc == 'x' || nc == 'X' { start := nnext + 1 i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal) if ok && i < limit && source[i] == ';' { v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32) cob.Write(source[n:pos]) n = i + 1 runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v))) cob.Write(buf[:runeSize]) continue } // code point like #1234; } else if nc >= '0' && nc <= '9' { start := nnext i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric) if ok && i < limit && i-start < 8 && source[i] == ';' { v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32) cob.Write(source[n:pos]) n = i + 1 runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v))) cob.Write(buf[:runeSize]) continue } } } } i = next - 1 } } if cob.IsCopied() { cob.Write(source[n:]) } return cob.Bytes() } // ResolveEntityNames resolve entity references like 'ö" . func ResolveEntityNames(source []byte) []byte { cob := NewCopyOnWriteBuffer(source) limit := len(source) ok := false n := 0 for i := 0; i < limit; i++ { if source[i] == '&' { pos := i next := i + 1 if !(next < limit && source[next] == '#') { start := next i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric) if ok && i < limit && source[i] == ';' { name := BytesToReadOnlyString(source[start:i]) entity, ok := LookUpHTML5EntityByName(name) if ok { cob.Write(source[n:pos]) n = i + 1 cob.Write(entity.Characters) continue } } } i = next - 1 } } if cob.IsCopied() { cob.Write(source[n:]) } return cob.Bytes() } var htmlSpace = []byte("%20") // URLEscape escape the given URL. // If resolveReference is set true: // 1. unescape punctuations // 2. resolve numeric references // 3. resolve entity references // // URL encoded values (%xx) are kept as is. func URLEscape(v []byte, resolveReference bool) []byte { if resolveReference { v = UnescapePunctuations(v) v = ResolveNumericReferences(v) v = ResolveEntityNames(v) } cob := NewCopyOnWriteBuffer(v) limit := len(v) n := 0 for i := 0; i < limit; { c := v[i] if urlEscapeTable[c] == 1 { i++ continue } if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) { i += 3 continue } u8len := utf8lenTable[c] if u8len == 99 { // invalid utf8 leading byte, skip it i++ continue } if c == ' ' { cob.Write(v[n:i]) cob.Write(htmlSpace) i++ n = i continue } if int(u8len) >= len(v) { u8len = int8(len(v) - 1) } if u8len == 0 { i++ n = i continue } cob.Write(v[n:i]) stop := i + int(u8len) if stop > len(v) { i++ n = i continue } cob.Write(StringToReadOnlyBytes(url.QueryEscape(string(v[i:stop])))) i += int(u8len) n = i } if cob.IsCopied() && n < limit { cob.Write(v[n:]) } return cob.Bytes() } // FindURLIndex returns a stop index value if the given bytes seem an URL. // This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* . func FindURLIndex(b []byte) int { i := 0 if !(len(b) > 0 && urlTable[b[i]]&7 == 7) { return -1 } i++ for ; i < len(b); i++ { c := b[i] if urlTable[c]&4 != 4 { break } } if i == 1 || i > 33 || i >= len(b) { return -1 } if b[i] != ':' { return -1 } i++ for ; i < len(b); i++ { c := b[i] if urlTable[c]&1 != 1 { break } } return i } var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`) // FindEmailIndex returns a stop index value if the given bytes seem an email address. func FindEmailIndex(b []byte) int { // TODO: eliminate regexps i := 0 for ; i < len(b); i++ { c := b[i] if emailTable[c]&1 != 1 { break } } if i == 0 { return -1 } if i >= len(b) || b[i] != '@' { return -1 } i++ if i >= len(b) { return -1 } match := emailDomainRegexp.FindSubmatchIndex(b[i:]) if match == nil { return -1 } return i + match[1] } var spaces = []byte(" \t\n\x0b\x0c\x0d") var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()# var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99} var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // UTF8Len returns a byte length of the utf-8 character. func UTF8Len(b byte) int8 { return utf8lenTable[b] } // IsPunct returns true if the given character is a punctuation, otherwise false. func IsPunct(c byte) bool { return punctTable[c] == 1 } // IsPunct returns true if the given rune is a punctuation, otherwise false. func IsPunctRune(r rune) bool { return int32(r) <= 256 && IsPunct(byte(r)) || unicode.IsPunct(r) } // IsSpace returns true if the given character is a space, otherwise false. func IsSpace(c byte) bool { return spaceTable[c] == 1 } // IsSpace returns true if the given rune is a space, otherwise false. func IsSpaceRune(r rune) bool { return int32(r) <= 256 && IsSpace(byte(r)) || unicode.IsSpace(r) } // IsNumeric returns true if the given character is a numeric, otherwise false. func IsNumeric(c byte) bool { return c >= '0' && c <= '9' } // IsHexDecimal returns true if the given character is a hexdecimal, otherwise false. func IsHexDecimal(c byte) bool { return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' } // IsAlphaNumeric returns true if the given character is a alphabet or a numeric, otherwise false. func IsAlphaNumeric(c byte) bool { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' } // A BufWriter is a subset of the bufio.Writer . type BufWriter interface { io.Writer Available() int Buffered() int Flush() error WriteByte(c byte) error WriteRune(r rune) (size int, err error) WriteString(s string) (int, error) } // A PrioritizedValue struct holds pair of an arbitrary value and a priority. type PrioritizedValue struct { // Value is an arbitrary value that you want to prioritize. Value interface{} // Priority is a priority of the value. Priority int } // PrioritizedSlice is a slice of the PrioritizedValues type PrioritizedSlice []PrioritizedValue // Sort sorts the PrioritizedSlice in ascending order. func (s PrioritizedSlice) Sort() { sort.Slice(s, func(i, j int) bool { return s[i].Priority < s[j].Priority }) } // Remove removes the given value from this slice. func (s PrioritizedSlice) Remove(v interface{}) PrioritizedSlice { i := 0 found := false for ; i < len(s); i++ { if s[i].Value == v { found = true break } } if !found { return s } return append(s[:i], s[i+1:]...) } // Prioritized returns a new PrioritizedValue. func Prioritized(v interface{}, priority int) PrioritizedValue { return PrioritizedValue{v, priority} } func bytesHash(b []byte) uint64 { var hash uint64 = 5381 for _, c := range b { hash = ((hash << 5) + hash) + uint64(c) } return hash } // BytesFilter is a efficient data structure for checking whether bytes exist or not. // BytesFilter is thread-safe. type BytesFilter interface { // Add adds given bytes to this set. Add([]byte) // Contains return true if this set contains given bytes, otherwise false. Contains([]byte) bool // Extend copies this filter and adds given bytes to new filter. Extend(...[]byte) BytesFilter } type bytesFilter struct { chars [256]uint8 threshold int slots [][][]byte } // NewBytesFilter returns a new BytesFilter. func NewBytesFilter(elements ...[]byte) BytesFilter { s := &bytesFilter{ threshold: 3, slots: make([][][]byte, 64), } for _, element := range elements { s.Add(element) } return s } func (s *bytesFilter) Add(b []byte) { l := len(b) m := s.threshold if l < s.threshold { m = l } for i := 0; i < m; i++ { s.chars[b[i]] |= 1 << uint8(i) } h := bytesHash(b) % uint64(len(s.slots)) slot := s.slots[h] if slot == nil { slot = [][]byte{} } s.slots[h] = append(slot, b) } func (s *bytesFilter) Extend(bs ...[]byte) BytesFilter { newFilter := NewBytesFilter().(*bytesFilter) newFilter.chars = s.chars newFilter.threshold = s.threshold for k, v := range s.slots { newSlot := make([][]byte, len(v)) copy(newSlot, v) newFilter.slots[k] = v } for _, b := range bs { newFilter.Add(b) } return newFilter } func (s *bytesFilter) Contains(b []byte) bool { l := len(b) m := s.threshold if l < s.threshold { m = l } for i := 0; i < m; i++ { if (s.chars[b[i]] & (1 << uint8(i))) == 0 { return false } } h := bytesHash(b) % uint64(len(s.slots)) slot := s.slots[h] if slot == nil || len(slot) == 0 { return false } for _, element := range slot { if bytes.Equal(element, b) { return true } } return false }