Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 202 additions & 1 deletion encoding.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package message

import (
"bufio"
"bytes"
"encoding/base64"
"errors"
Expand Down Expand Up @@ -30,7 +31,10 @@ func encodingReader(enc string, r io.Reader) (io.Reader, error) {
var dec io.Reader
switch strings.ToLower(enc) {
case "quoted-printable":
dec = quotedprintable.NewReader(r)
// Wrap with lenient QP reader to handle malformed QP encoding
// (e.g., "= " followed by whitespace which some encoders produce)
wrapped := &lenientQPReader{wrapped: bufio.NewReader(r)}
dec = quotedprintable.NewReader(wrapped)
case "base64":
wrapped := &whitespaceReplacingReader{wrapped: r}
dec = base64.NewDecoder(base64.StdEncoding, wrapped)
Expand Down Expand Up @@ -86,6 +90,203 @@ func (r *whitespaceReplacingReader) Read(p []byte) (int, error) {
return n, err
}

// lenientQPReader fixes malformed quoted-printable encoding while preserving valid sequences.
// It handles the following cases:
// 1. "=[spaces]\r\n" -> Soft line break (malformed). Strips spaces to "=\r\n".
// 2. "=[spaces][other]" -> Malformed literal. Encodes to "=3D[spaces][other]".
// 3. "=62" -> Valid QP. Preserved.
// 4. "=\r\n" -> Valid soft break. Preserved.
// 5. "[spaces]\r\n" -> Trailing whitespace. Encodes to "=20" or "=09".
type lenientQPReader struct {
wrapped *bufio.Reader
pending []byte // data waiting to be read (e.g. from expansion)
}

func (r *lenientQPReader) Read(p []byte) (int, error) {
if len(r.pending) > 0 {
n := copy(p, r.pending)
r.pending = r.pending[n:]
return n, nil
}

i := 0
for i < len(p) {
// Peek 1 byte
bPrefix, err := r.wrapped.Peek(1)
if err != nil {
if i > 0 {
return i, nil
}
return 0, err
}

b := bPrefix[0]

if b == '=' {
// Look ahead to categorize
peekSize := 2
spacesCount := 0
foundEnd := false
var targetChar byte
isSoftBreak := false
isLiteral := false

for {
peaked, err := r.wrapped.Peek(peekSize)
if len(peaked) < peekSize {
if err != nil && err != io.EOF {
// Error peeking, treat as literal to be safe
isLiteral = true
foundEnd = true
break
}
// EOF reached
isLiteral = true
foundEnd = true
break
}

c := peaked[peekSize-1]
if c == ' ' || c == '\t' {
spacesCount++
peekSize++
continue
}

targetChar = c
foundEnd = true
break
}

if !foundEnd {
// Buffer likely full or scan too long
isLiteral = true
} else {
if targetChar == '\r' || targetChar == '\n' {
if spacesCount > 0 {
// Malformed soft break: =[spaces]\r\n
isSoftBreak = true
}
// Else valid soft break: =\r\n (fall through to default copy)
} else {
// Not a soft break
// Check if valid Hex if no spaces (e.g. =62)
if spacesCount == 0 && isHex(targetChar) {
// Need one more char for full hex check
peaked3, _ := r.wrapped.Peek(3)
if len(peaked3) >= 3 && isHex(peaked3[2]) {
// Valid QP hex: =XX. Preserved.
} else {
// =X? or =X[EOF]. Malformed.
isLiteral = true
}
} else {
// =[spaces]X or =[bad char]. Malformed literal.
isLiteral = true
}
}
}

if isLiteral {
// Emit '=3D'. Convert = to =3D.
toWrite := []byte("=3D")
n := copy(p[i:], toWrite)
i += n
r.wrapped.Discard(1) // Consume '='

if n < 3 {
// Pending remainder
r.pending = toWrite[n:]
return i, nil
}
continue
} else if isSoftBreak {
// Malformed soft break: emit '=' but skip spaces
p[i] = '='
i++
r.wrapped.Discard(1 + spacesCount) // Consume '=' and spaces
continue
}
// Default: Valid soft break or Valid Hex. Copy '='.
p[i] = '='
i++
r.wrapped.Discard(1)
} else if b == ' ' || b == '\t' {
// Check for trailing whitespace
isTrailing := false
peekSize := 1
for {
peaked, _ := r.wrapped.Peek(peekSize)
if len(peaked) < peekSize {
// EOF after spaces -> trailing
isTrailing = true
break
}
c := peaked[peekSize-1]
if c == ' ' || c == '\t' {
peekSize++
continue
}
if c == '=' {
// Check if '=' is a soft break
// We need to look ahead from 'peekSize' (position of '=')
// We can't reuse r.wrapped.Peek easily with large offset unless we loop?
// actually we can.

// We need to check if '=' is followed by [spaces]\r\n
// We start scanning from peekSize.
subPeekSize := peekSize + 1
for {
subPeaked, _ := r.wrapped.Peek(subPeekSize)
if len(subPeaked) < subPeekSize {
// EOF
break
}
sc := subPeaked[subPeekSize-1]
if sc == ' ' || sc == '\t' {
subPeekSize++
continue
}
if sc == '\r' || sc == '\n' {
isTrailing = true
}
break
}
}
break
}

if isTrailing {
// Encode to =20 or =09
toWrite := []byte("=20")
if b == '\t' {
toWrite = []byte("=09")
}
n := copy(p[i:], toWrite)
i += n
r.wrapped.Discard(1)
if n < 3 {
r.pending = toWrite[n:]
return i, nil
}
continue
}
p[i] = b
i++
r.wrapped.Discard(1)
} else {
p[i] = b
i++
r.wrapped.Discard(1)
}
}
return i, nil
}

func isHex(c byte) bool {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')
}

type lineWrapper struct {
w io.Writer
maxLineLen int
Expand Down
87 changes: 87 additions & 0 deletions encoding_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package message

import (
"bufio"
"bytes"
"io"
"io/ioutil"
"mime/quotedprintable"
"strings"
"testing"
)
Expand Down Expand Up @@ -167,3 +169,88 @@ func writeStringBytePerByte(w io.Writer, s string) error {
}
return nil
}

func TestLenientQPReader(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "valid_hex",
input: "=62",
expected: "b",
},
{
name: "valid_soft_break",
input: "foo=\r\nbar",
expected: "foobar",
},
{
name: "malformed_soft_break_spaces",
input: "foo= \r\nbar", // Spaces after = are stripped
expected: "foobar",
},
{
name: "malformed_soft_break_tabs",
input: "foo=\t\t\r\nbar",
expected: "foobar",
},
{
name: "spaces_before_soft_break",
input: "foo =\r\nbar", // Leading spaces before soft break should be preserved logic?
// Wait, my logic preserves spaces if they are before '=' and '=' is start of soft break.
// input: "foo =\r\n"
// ' ' -> peek -> ' ' -> peek -> '='. '=' is soft break. Encode spaces.
// encoded: "foo=20=20=\r\n"
// decoded: "foo " + "bar"
expected: "foo bar",
},
{
name: "malformed_literal_space",
input: "equation = 5", // = followed by space but no newline
expected: "equation = 5", // = processed as =3D
},
{
name: "malformed_literal_eof",
input: "end=", // = at EOF
expected: "end=",
},
{
name: "trailing_whitespace_eol",
input: "end \r\n", // Trailing spaces at EOL
// My logic: If ' ' peek finds \r or \n -> do NOT encode (let Go strip).
// So decoded "end\r\n" (stripped).
expected: "end\r\n", // Go QP decoder usually strips this?
// quotedprintable.NewReader returns decoded content.
// If input "end \r\n". Decoded is "end\r\n" or "end \r\n"?
// Go spec: "Trailing whitespace is removed from each line."
// So "end\r\n".
},
{
name: "interleaved_valid_and_invalid",
input: "foo= \r\n=62ar", // malformed soft break, then valid hex
expected: "foobar",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r := strings.NewReader(tt.input)
// Manually construct lenientQPReader as it's private
lqp := &lenientQPReader{wrapped: bufio.NewReader(r)}
// Wrap with standard QP decoder
dec := quotedprintable.NewReader(lqp)

gotBytes, err := io.ReadAll(dec)
if err != nil {
t.Fatalf("ReadAll failed: %v", err)
}
got := string(gotBytes)

if got != tt.expected {
t.Errorf("got %q, expected %q", got, tt.expected)
}
})
}
}