diff --git a/encoding.go b/encoding.go index 524c249..1ed8220 100644 --- a/encoding.go +++ b/encoding.go @@ -1,6 +1,7 @@ package message import ( + "bufio" "bytes" "encoding/base64" "errors" @@ -30,7 +31,10 @@ func encodingReader(enc string, r io.Reader) (io.Reader, error) { var dec io.Reader switch strings.ToLower(enc) { case "quoted-printable": - dec = quotedprintable.NewReader(r) + // Wrap with lenient QP reader to handle malformed QP encoding + // (e.g., "= " followed by whitespace which some encoders produce) + wrapped := &lenientQPReader{wrapped: bufio.NewReader(r)} + dec = quotedprintable.NewReader(wrapped) case "base64": wrapped := &whitespaceReplacingReader{wrapped: r} dec = base64.NewDecoder(base64.StdEncoding, wrapped) @@ -86,6 +90,203 @@ func (r *whitespaceReplacingReader) Read(p []byte) (int, error) { return n, err } +// lenientQPReader fixes malformed quoted-printable encoding while preserving valid sequences. +// It handles the following cases: +// 1. "=[spaces]\r\n" -> Soft line break (malformed). Strips spaces to "=\r\n". +// 2. "=[spaces][other]" -> Malformed literal. Encodes to "=3D[spaces][other]". +// 3. "=62" -> Valid QP. Preserved. +// 4. "=\r\n" -> Valid soft break. Preserved. +// 5. "[spaces]\r\n" -> Trailing whitespace. Encodes to "=20" or "=09". +type lenientQPReader struct { + wrapped *bufio.Reader + pending []byte // data waiting to be read (e.g. from expansion) +} + +func (r *lenientQPReader) Read(p []byte) (int, error) { + if len(r.pending) > 0 { + n := copy(p, r.pending) + r.pending = r.pending[n:] + return n, nil + } + + i := 0 + for i < len(p) { + // Peek 1 byte + bPrefix, err := r.wrapped.Peek(1) + if err != nil { + if i > 0 { + return i, nil + } + return 0, err + } + + b := bPrefix[0] + + if b == '=' { + // Look ahead to categorize + peekSize := 2 + spacesCount := 0 + foundEnd := false + var targetChar byte + isSoftBreak := false + isLiteral := false + + for { + peaked, err := r.wrapped.Peek(peekSize) + if len(peaked) < peekSize { + if err != nil && err != io.EOF { + // Error peeking, treat as literal to be safe + isLiteral = true + foundEnd = true + break + } + // EOF reached + isLiteral = true + foundEnd = true + break + } + + c := peaked[peekSize-1] + if c == ' ' || c == '\t' { + spacesCount++ + peekSize++ + continue + } + + targetChar = c + foundEnd = true + break + } + + if !foundEnd { + // Buffer likely full or scan too long + isLiteral = true + } else { + if targetChar == '\r' || targetChar == '\n' { + if spacesCount > 0 { + // Malformed soft break: =[spaces]\r\n + isSoftBreak = true + } + // Else valid soft break: =\r\n (fall through to default copy) + } else { + // Not a soft break + // Check if valid Hex if no spaces (e.g. =62) + if spacesCount == 0 && isHex(targetChar) { + // Need one more char for full hex check + peaked3, _ := r.wrapped.Peek(3) + if len(peaked3) >= 3 && isHex(peaked3[2]) { + // Valid QP hex: =XX. Preserved. + } else { + // =X? or =X[EOF]. Malformed. + isLiteral = true + } + } else { + // =[spaces]X or =[bad char]. Malformed literal. + isLiteral = true + } + } + } + + if isLiteral { + // Emit '=3D'. Convert = to =3D. + toWrite := []byte("=3D") + n := copy(p[i:], toWrite) + i += n + r.wrapped.Discard(1) // Consume '=' + + if n < 3 { + // Pending remainder + r.pending = toWrite[n:] + return i, nil + } + continue + } else if isSoftBreak { + // Malformed soft break: emit '=' but skip spaces + p[i] = '=' + i++ + r.wrapped.Discard(1 + spacesCount) // Consume '=' and spaces + continue + } + // Default: Valid soft break or Valid Hex. Copy '='. + p[i] = '=' + i++ + r.wrapped.Discard(1) + } else if b == ' ' || b == '\t' { + // Check for trailing whitespace + isTrailing := false + peekSize := 1 + for { + peaked, _ := r.wrapped.Peek(peekSize) + if len(peaked) < peekSize { + // EOF after spaces -> trailing + isTrailing = true + break + } + c := peaked[peekSize-1] + if c == ' ' || c == '\t' { + peekSize++ + continue + } + if c == '=' { + // Check if '=' is a soft break + // We need to look ahead from 'peekSize' (position of '=') + // We can't reuse r.wrapped.Peek easily with large offset unless we loop? + // actually we can. + + // We need to check if '=' is followed by [spaces]\r\n + // We start scanning from peekSize. + subPeekSize := peekSize + 1 + for { + subPeaked, _ := r.wrapped.Peek(subPeekSize) + if len(subPeaked) < subPeekSize { + // EOF + break + } + sc := subPeaked[subPeekSize-1] + if sc == ' ' || sc == '\t' { + subPeekSize++ + continue + } + if sc == '\r' || sc == '\n' { + isTrailing = true + } + break + } + } + break + } + + if isTrailing { + // Encode to =20 or =09 + toWrite := []byte("=20") + if b == '\t' { + toWrite = []byte("=09") + } + n := copy(p[i:], toWrite) + i += n + r.wrapped.Discard(1) + if n < 3 { + r.pending = toWrite[n:] + return i, nil + } + continue + } + p[i] = b + i++ + r.wrapped.Discard(1) + } else { + p[i] = b + i++ + r.wrapped.Discard(1) + } + } + return i, nil +} + +func isHex(c byte) bool { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') +} + type lineWrapper struct { w io.Writer maxLineLen int diff --git a/encoding_test.go b/encoding_test.go index 153147f..00b0771 100644 --- a/encoding_test.go +++ b/encoding_test.go @@ -1,9 +1,11 @@ package message import ( + "bufio" "bytes" "io" "io/ioutil" + "mime/quotedprintable" "strings" "testing" ) @@ -167,3 +169,88 @@ func writeStringBytePerByte(w io.Writer, s string) error { } return nil } + +func TestLenientQPReader(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "valid_hex", + input: "=62", + expected: "b", + }, + { + name: "valid_soft_break", + input: "foo=\r\nbar", + expected: "foobar", + }, + { + name: "malformed_soft_break_spaces", + input: "foo= \r\nbar", // Spaces after = are stripped + expected: "foobar", + }, + { + name: "malformed_soft_break_tabs", + input: "foo=\t\t\r\nbar", + expected: "foobar", + }, + { + name: "spaces_before_soft_break", + input: "foo =\r\nbar", // Leading spaces before soft break should be preserved logic? + // Wait, my logic preserves spaces if they are before '=' and '=' is start of soft break. + // input: "foo =\r\n" + // ' ' -> peek -> ' ' -> peek -> '='. '=' is soft break. Encode spaces. + // encoded: "foo=20=20=\r\n" + // decoded: "foo " + "bar" + expected: "foo bar", + }, + { + name: "malformed_literal_space", + input: "equation = 5", // = followed by space but no newline + expected: "equation = 5", // = processed as =3D + }, + { + name: "malformed_literal_eof", + input: "end=", // = at EOF + expected: "end=", + }, + { + name: "trailing_whitespace_eol", + input: "end \r\n", // Trailing spaces at EOL + // My logic: If ' ' peek finds \r or \n -> do NOT encode (let Go strip). + // So decoded "end\r\n" (stripped). + expected: "end\r\n", // Go QP decoder usually strips this? + // quotedprintable.NewReader returns decoded content. + // If input "end \r\n". Decoded is "end\r\n" or "end \r\n"? + // Go spec: "Trailing whitespace is removed from each line." + // So "end\r\n". + }, + { + name: "interleaved_valid_and_invalid", + input: "foo= \r\n=62ar", // malformed soft break, then valid hex + expected: "foobar", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := strings.NewReader(tt.input) + // Manually construct lenientQPReader as it's private + lqp := &lenientQPReader{wrapped: bufio.NewReader(r)} + // Wrap with standard QP decoder + dec := quotedprintable.NewReader(lqp) + + gotBytes, err := io.ReadAll(dec) + if err != nil { + t.Fatalf("ReadAll failed: %v", err) + } + got := string(gotBytes) + + if got != tt.expected { + t.Errorf("got %q, expected %q", got, tt.expected) + } + }) + } +}