330 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			330 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			Go
		
	
	
	
// Copyright 2015 The Go Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE file.
 | 
						|
 | 
						|
package mime
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"encoding/base64"
 | 
						|
	"errors"
 | 
						|
	"fmt"
 | 
						|
	"io"
 | 
						|
	"strings"
 | 
						|
	"sync"
 | 
						|
	"unicode"
 | 
						|
	"unicode/utf8"
 | 
						|
)
 | 
						|
 | 
						|
// A WordEncoder is a RFC 2047 encoded-word encoder.
 | 
						|
type WordEncoder byte
 | 
						|
 | 
						|
const (
 | 
						|
	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
 | 
						|
	BEncoding = WordEncoder('b')
 | 
						|
	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
 | 
						|
	QEncoding = WordEncoder('q')
 | 
						|
)
 | 
						|
 | 
						|
var (
 | 
						|
	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
 | 
						|
)
 | 
						|
 | 
						|
// Encode returns the encoded-word form of s. If s is ASCII without special
 | 
						|
// characters, it is returned unchanged. The provided charset is the IANA
 | 
						|
// charset name of s. It is case insensitive.
 | 
						|
func (e WordEncoder) Encode(charset, s string) string {
 | 
						|
	if !needsEncoding(s) {
 | 
						|
		return s
 | 
						|
	}
 | 
						|
	return e.encodeWord(charset, s)
 | 
						|
}
 | 
						|
 | 
						|
func needsEncoding(s string) bool {
 | 
						|
	for _, b := range s {
 | 
						|
		if (b < ' ' || b > '~') && b != '\t' {
 | 
						|
			return true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// encodeWord encodes a string into an encoded-word.
 | 
						|
func (e WordEncoder) encodeWord(charset, s string) string {
 | 
						|
	buf := getBuffer()
 | 
						|
	defer putBuffer(buf)
 | 
						|
 | 
						|
	buf.WriteString("=?")
 | 
						|
	buf.WriteString(charset)
 | 
						|
	buf.WriteByte('?')
 | 
						|
	buf.WriteByte(byte(e))
 | 
						|
	buf.WriteByte('?')
 | 
						|
 | 
						|
	if e == BEncoding {
 | 
						|
		w := base64.NewEncoder(base64.StdEncoding, buf)
 | 
						|
		io.WriteString(w, s)
 | 
						|
		w.Close()
 | 
						|
	} else {
 | 
						|
		enc := make([]byte, 3)
 | 
						|
		for i := 0; i < len(s); i++ {
 | 
						|
			b := s[i]
 | 
						|
			switch {
 | 
						|
			case b == ' ':
 | 
						|
				buf.WriteByte('_')
 | 
						|
			case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
 | 
						|
				buf.WriteByte(b)
 | 
						|
			default:
 | 
						|
				enc[0] = '='
 | 
						|
				enc[1] = upperhex[b>>4]
 | 
						|
				enc[2] = upperhex[b&0x0f]
 | 
						|
				buf.Write(enc)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	buf.WriteString("?=")
 | 
						|
	return buf.String()
 | 
						|
}
 | 
						|
 | 
						|
const upperhex = "0123456789ABCDEF"
 | 
						|
 | 
						|
// A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
 | 
						|
type WordDecoder struct {
 | 
						|
	// CharsetReader, if non-nil, defines a function to generate
 | 
						|
	// charset-conversion readers, converting from the provided
 | 
						|
	// charset into UTF-8.
 | 
						|
	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
 | 
						|
	// are handled by default.
 | 
						|
	// One of the the CharsetReader's result values must be non-nil.
 | 
						|
	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
 | 
						|
}
 | 
						|
 | 
						|
// Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word,
 | 
						|
// word is returned unchanged.
 | 
						|
func (d *WordDecoder) Decode(word string) (string, error) {
 | 
						|
	fields := strings.Split(word, "?") // TODO: remove allocation?
 | 
						|
	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
 | 
						|
		return "", errInvalidWord
 | 
						|
	}
 | 
						|
 | 
						|
	content, err := decode(fields[2][0], fields[3])
 | 
						|
	if err != nil {
 | 
						|
		return "", err
 | 
						|
	}
 | 
						|
 | 
						|
	buf := getBuffer()
 | 
						|
	defer putBuffer(buf)
 | 
						|
 | 
						|
	if err := d.convert(buf, fields[1], content); err != nil {
 | 
						|
		return "", err
 | 
						|
	}
 | 
						|
 | 
						|
	return buf.String(), nil
 | 
						|
}
 | 
						|
 | 
						|
// DecodeHeader decodes all encoded-words of the given string. It returns an
 | 
						|
// error if and only if CharsetReader of d returns an error.
 | 
						|
func (d *WordDecoder) DecodeHeader(header string) (string, error) {
 | 
						|
	// If there is no encoded-word, returns before creating a buffer.
 | 
						|
	i := strings.Index(header, "=?")
 | 
						|
	if i == -1 {
 | 
						|
		return header, nil
 | 
						|
	}
 | 
						|
 | 
						|
	buf := getBuffer()
 | 
						|
	defer putBuffer(buf)
 | 
						|
 | 
						|
	buf.WriteString(header[:i])
 | 
						|
	header = header[i:]
 | 
						|
 | 
						|
	betweenWords := false
 | 
						|
	for {
 | 
						|
		start := strings.Index(header, "=?")
 | 
						|
		if start == -1 {
 | 
						|
			break
 | 
						|
		}
 | 
						|
		cur := start + len("=?")
 | 
						|
 | 
						|
		i := strings.Index(header[cur:], "?")
 | 
						|
		if i == -1 {
 | 
						|
			break
 | 
						|
		}
 | 
						|
		charset := header[cur : cur+i]
 | 
						|
		cur += i + len("?")
 | 
						|
 | 
						|
		if len(header) < cur+len("Q??=") {
 | 
						|
			break
 | 
						|
		}
 | 
						|
		encoding := header[cur]
 | 
						|
		cur++
 | 
						|
 | 
						|
		if header[cur] != '?' {
 | 
						|
			break
 | 
						|
		}
 | 
						|
		cur++
 | 
						|
 | 
						|
		j := strings.Index(header[cur:], "?=")
 | 
						|
		if j == -1 {
 | 
						|
			break
 | 
						|
		}
 | 
						|
		text := header[cur : cur+j]
 | 
						|
		end := cur + j + len("?=")
 | 
						|
 | 
						|
		content, err := decode(encoding, text)
 | 
						|
		if err != nil {
 | 
						|
			betweenWords = false
 | 
						|
			buf.WriteString(header[:start+2])
 | 
						|
			header = header[start+2:]
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// Write characters before the encoded-word. White-space and newline
 | 
						|
		// characters separating two encoded-words must be deleted.
 | 
						|
		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
 | 
						|
			buf.WriteString(header[:start])
 | 
						|
		}
 | 
						|
 | 
						|
		if err := d.convert(buf, charset, content); err != nil {
 | 
						|
			return "", err
 | 
						|
		}
 | 
						|
 | 
						|
		header = header[end:]
 | 
						|
		betweenWords = true
 | 
						|
	}
 | 
						|
 | 
						|
	if len(header) > 0 {
 | 
						|
		buf.WriteString(header)
 | 
						|
	}
 | 
						|
 | 
						|
	return buf.String(), nil
 | 
						|
}
 | 
						|
 | 
						|
func decode(encoding byte, text string) ([]byte, error) {
 | 
						|
	switch encoding {
 | 
						|
	case 'B', 'b':
 | 
						|
		return base64.StdEncoding.DecodeString(text)
 | 
						|
	case 'Q', 'q':
 | 
						|
		return qDecode(text)
 | 
						|
	default:
 | 
						|
		return nil, errInvalidWord
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
 | 
						|
	switch {
 | 
						|
	case strings.EqualFold("utf-8", charset):
 | 
						|
		buf.Write(content)
 | 
						|
	case strings.EqualFold("iso-8859-1", charset):
 | 
						|
		for _, c := range content {
 | 
						|
			buf.WriteRune(rune(c))
 | 
						|
		}
 | 
						|
	case strings.EqualFold("us-ascii", charset):
 | 
						|
		for _, c := range content {
 | 
						|
			if c >= utf8.RuneSelf {
 | 
						|
				buf.WriteRune(unicode.ReplacementChar)
 | 
						|
			} else {
 | 
						|
				buf.WriteByte(c)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	default:
 | 
						|
		if d.CharsetReader == nil {
 | 
						|
			return fmt.Errorf("mime: unhandled charset %q", charset)
 | 
						|
		}
 | 
						|
		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		if _, err = buf.ReadFrom(r); err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
 | 
						|
// one byte of non-whitespace.
 | 
						|
func hasNonWhitespace(s string) bool {
 | 
						|
	for _, b := range s {
 | 
						|
		switch b {
 | 
						|
		// Encoded-words can only be separated by linear white spaces which does
 | 
						|
		// not include vertical tabs (\v).
 | 
						|
		case ' ', '\t', '\n', '\r':
 | 
						|
		default:
 | 
						|
			return true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// qDecode decodes a Q encoded string.
 | 
						|
func qDecode(s string) ([]byte, error) {
 | 
						|
	dec := make([]byte, len(s))
 | 
						|
	n := 0
 | 
						|
	for i := 0; i < len(s); i++ {
 | 
						|
		switch c := s[i]; {
 | 
						|
		case c == '_':
 | 
						|
			dec[n] = ' '
 | 
						|
		case c == '=':
 | 
						|
			if i+2 >= len(s) {
 | 
						|
				return nil, errInvalidWord
 | 
						|
			}
 | 
						|
			b, err := readHexByte(s[i+1], s[i+2])
 | 
						|
			if err != nil {
 | 
						|
				return nil, err
 | 
						|
			}
 | 
						|
			dec[n] = b
 | 
						|
			i += 2
 | 
						|
		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
 | 
						|
			dec[n] = c
 | 
						|
		default:
 | 
						|
			return nil, errInvalidWord
 | 
						|
		}
 | 
						|
		n++
 | 
						|
	}
 | 
						|
 | 
						|
	return dec[:n], nil
 | 
						|
}
 | 
						|
 | 
						|
// readHexByte returns the byte from its quoted-printable representation.
 | 
						|
func readHexByte(a, b byte) (byte, error) {
 | 
						|
	var hb, lb byte
 | 
						|
	var err error
 | 
						|
	if hb, err = fromHex(a); err != nil {
 | 
						|
		return 0, err
 | 
						|
	}
 | 
						|
	if lb, err = fromHex(b); err != nil {
 | 
						|
		return 0, err
 | 
						|
	}
 | 
						|
	return hb<<4 | lb, nil
 | 
						|
}
 | 
						|
 | 
						|
func fromHex(b byte) (byte, error) {
 | 
						|
	switch {
 | 
						|
	case b >= '0' && b <= '9':
 | 
						|
		return b - '0', nil
 | 
						|
	case b >= 'A' && b <= 'F':
 | 
						|
		return b - 'A' + 10, nil
 | 
						|
	// Accept badly encoded bytes.
 | 
						|
	case b >= 'a' && b <= 'f':
 | 
						|
		return b - 'a' + 10, nil
 | 
						|
	}
 | 
						|
	return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
 | 
						|
}
 | 
						|
 | 
						|
var bufPool = sync.Pool{
 | 
						|
	New: func() interface{} {
 | 
						|
		return new(bytes.Buffer)
 | 
						|
	},
 | 
						|
}
 | 
						|
 | 
						|
func getBuffer() *bytes.Buffer {
 | 
						|
	return bufPool.Get().(*bytes.Buffer)
 | 
						|
}
 | 
						|
 | 
						|
func putBuffer(buf *bytes.Buffer) {
 | 
						|
	if buf.Len() > 1024 {
 | 
						|
		return
 | 
						|
	}
 | 
						|
	buf.Reset()
 | 
						|
	bufPool.Put(buf)
 | 
						|
}
 |