From b74f0f98c8cc014fdc5dcca76da8f60253ae5a37 Mon Sep 17 00:00:00 2001 From: arkahood Date: Sun, 2 Nov 2025 03:05:33 +0530 Subject: [PATCH] request line and http headers parsing completed --- Makefile | 2 +- README.md | 19 ++++ cmd/tcplistener/main.go | 61 +++--------- go.mod | 7 ++ go.sum | 9 ++ internal/headers/headers.go | 88 +++++++++++++++++ internal/headers/headers_test.go | 57 +++++++++++ internal/request/request.go | 165 +++++++++++++++++++++++++++++++ internal/request/request_test.go | 91 +++++++++++++++++ messages.txt | 5 - 10 files changed, 450 insertions(+), 54 deletions(-) create mode 100644 go.sum create mode 100644 internal/headers/headers.go create mode 100644 internal/headers/headers_test.go create mode 100644 internal/request/request.go create mode 100644 internal/request/request_test.go delete mode 100644 messages.txt diff --git a/Makefile b/Makefile index f0643d5..8a63f88 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,2 @@ run: - go run ./cmd/tcplistener/ \ No newline at end of file + go run ./cmd/tcplistener/ | tee tcplistener.log \ No newline at end of file diff --git a/README.md b/README.md index 966d488..f737733 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,21 @@ # RawHTTP HTTP 1.1 server from scratch + +## HTTP Message Structure + +According to RFC 7230, HTTP messages follow this structure: + +``` +start-line CRLF +*( field-line CRLF ) +*( field-line CRLF ) +... +CRLF +[ message-body ] +``` + +Where: +- **start-line**: Request line (method, URI, version) or status line +- **field-line**: HTTP headers (key-value pairs) (The RFC uses the term) +- **CRLF**: Carriage return + line feed (`\r\n`) +- **message-body**: Optional request/response body \ No newline at end of file diff --git a/cmd/tcplistener/main.go b/cmd/tcplistener/main.go index db05024..1381913 100644 --- a/cmd/tcplistener/main.go +++ b/cmd/tcplistener/main.go @@ -1,55 +1,11 @@ package main import ( + "RAWHTTP/internal/request" "fmt" - "io" "net" - "strings" ) -func getLinesChannel(f io.ReadCloser) <-chan string { - lines := make(chan string) - - go func() { - defer f.Close() - defer close(lines) - - currentLine := "" - - for { - read := make([]byte, 8) - _, err := f.Read(read) - - if err == io.EOF { - // Send the last line if it has content - if currentLine != "" { - lines <- currentLine - } - return - } - - if err != nil { - fmt.Println("Some Error Happened While Putting in Slice") - return - } - - parts := strings.Split(string(read), "\n") - - // Process all parts except the last one, which may be incomplete - if len(parts)-1 > 0 { - currentLine += parts[0] - lines <- currentLine - currentLine = "" // Reset for next line - } - - // The last part, which may be incomplete gets added to currentLine - currentLine += parts[len(parts)-1] - } - }() - - return lines -} - func main() { listener, err := net.Listen("tcp", ":42069") @@ -66,10 +22,19 @@ func main() { } go func(c net.Conn) { - lines := getLinesChannel(conn) + req, err := request.RequestFromReader(conn) + + if err != nil { + fmt.Println("error happened", err.Error()) + } - for line := range lines { - fmt.Println(line) + fmt.Println("Request line:") + fmt.Println("Method: ", req.RequestLine.Method) + fmt.Println("Http Version: ", req.RequestLine.HttpVersion) + fmt.Println("Target: ", req.RequestLine.RequestTarget) + fmt.Println("Headers") + for key, val := range req.Headers { + fmt.Println(key, ": ", val) } }(conn) } diff --git a/go.mod b/go.mod index 61c39ab..9bd3c22 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,10 @@ module RAWHTTP go 1.25.3 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/testify v1.11.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..cc8b3f4 --- /dev/null +++ b/go.sum @@ -0,0 +1,9 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/headers/headers.go b/internal/headers/headers.go new file mode 100644 index 0000000..6ada13c --- /dev/null +++ b/internal/headers/headers.go @@ -0,0 +1,88 @@ +package headers + +import ( + "errors" + "strings" + "unicode" +) + +const FieldLineSeperator = ":" +const crlf = "\r\n" + +type Headers map[string]string + +// there can be an unlimited amount of whitespace +// before and after the field-value (Header value). However, when parsing a field-name, +// there must be no spaces betwixt the colon and the field-name. In other words, +// these are valid: + +// 'Host: localhost:42069' +// ' Host: localhost:42069 ' + +// But this is not: + +// Host : localhost:42069 + +func (h Headers) Parse(data []byte) (n int, done bool, err error) { + endIdx := strings.Index(string(data), crlf) + if endIdx == -1 { + return 0, false, nil + } + // if encounter \r\n in front of line that means we consumed all the headers/fieldLines + if endIdx == 0 { + return endIdx + 2, true, nil + } + + currentLine := string(data[:endIdx]) + fieldParts := strings.SplitN(currentLine, FieldLineSeperator, 2) + + if len(fieldParts) != 2 { + return 0, false, errors.New("field-line have wrong number of parts") + } + + fieldName := strings.TrimLeft(fieldParts[0], " ") + if len(fieldName) != len(strings.TrimSpace(fieldName)) { + return 0, false, errors.New("error in field-name syntax, whitespace unexpected") + } + if !isValidFieldName(fieldName) { + return 0, false, errors.New("invalid characters in field-name") + } + + fieldValue := strings.TrimSpace(strings.Trim(fieldParts[1], crlf)) + // lowercase the fieldname while adding to the map + val, exists := h[strings.ToLower(fieldName)] + if exists { + h[strings.ToLower(fieldName)] = val + "," + fieldValue + } else { + h[strings.ToLower(fieldName)] = fieldValue + } + + return endIdx + 2, false, nil +} + +func isValidFieldName(fieldName string) bool { + allowedSpecials := map[rune]bool{ + '!': true, '#': true, '$': true, '%': true, '&': true, '\'': true, + '*': true, '+': true, '-': true, '.': true, '^': true, '_': true, + '`': true, '|': true, '~': true, + } + + if len(fieldName) == 0 { + return false + } + + for _, ch := range fieldName { + switch { + case unicode.IsLetter(ch): + continue + case unicode.IsDigit(ch): + continue + case allowedSpecials[ch]: + continue + default: + return false + } + } + + return true +} diff --git a/internal/headers/headers_test.go b/internal/headers/headers_test.go new file mode 100644 index 0000000..49781f5 --- /dev/null +++ b/internal/headers/headers_test.go @@ -0,0 +1,57 @@ +package headers + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHeadersParser(t *testing.T) { + // Test: Valid single header + headers := make(Headers) + data := []byte("host: localhost:42069\r\n\r\n") + n, done, err := headers.Parse(data) + require.NoError(t, err) + require.NotNil(t, headers) + assert.Equal(t, "localhost:42069", headers["host"]) + assert.Equal(t, 23, n) + assert.False(t, done) + + // Test: Invalid spacing header + headers = make(Headers) + data = []byte(" Host : localhost:42069 \r\n\r\n") + n, done, err = headers.Parse(data) + require.Error(t, err) + assert.Equal(t, 0, n) + assert.False(t, done) + + // Test: Invalid character in header + headers = make(Headers) + data = []byte(" H(st : localhost:42069 \r\n\r\n") + n, done, err = headers.Parse(data) + require.Error(t, err) + assert.Equal(t, 0, n) + assert.False(t, done) + + // Test: Uppercase FieldName should add as a lowercase key + headers = make(Headers) + data = []byte("Host: localhost:42069\r\n\r\n") + n, done, err = headers.Parse(data) + require.NoError(t, err) + require.NotNil(t, headers) + assert.Equal(t, "localhost:42069", headers["host"]) + assert.Equal(t, 23, n) + assert.False(t, done) + + // Test: multipe same fieldname should have values comma separated + headers = make(Headers) + data = []byte("Host: localhost:42069\r\nHost: localhost:42070\r\n\r\n") + bytesConsumed0, _, _ := headers.Parse(data) + bytesConsumed1, done, err := headers.Parse(data[bytesConsumed0:]) + require.NoError(t, err) + require.NotNil(t, headers) + assert.Equal(t, "localhost:42069,localhost:42070", headers["host"]) + assert.Equal(t, bytesConsumed0+bytesConsumed1, len(data)-2) + assert.False(t, done) +} diff --git a/internal/request/request.go b/internal/request/request.go new file mode 100644 index 0000000..17b0458 --- /dev/null +++ b/internal/request/request.go @@ -0,0 +1,165 @@ +package request + +import ( + "RAWHTTP/internal/headers" + "errors" + "fmt" + "io" + "strings" +) + +const crlf = "\r\n" +const bufferSize int = 8 + +type RequestState int + +const ( + RequestStateInitialized RequestState = iota + RequestStateDone + RequestStateParsingHeaders +) + +type Request struct { + RequestLine RequestLine + Headers headers.Headers + State RequestState +} + +type RequestLine struct { + HttpVersion string + RequestTarget string + Method string +} + +func RequestFromReader(reader io.Reader) (*Request, error) { + buf := make([]byte, bufferSize) + readToIndex := 0 + + req := &Request{ + State: RequestStateInitialized, + Headers: make(headers.Headers), + } + + for req.State != RequestStateDone { + // If the buffer is full, grow it + if readToIndex == cap(buf) { + newBuf := make([]byte, cap(buf)*2) + copy(newBuf, buf[:readToIndex]) + buf = newBuf + } + + // Read from the io.Reader into the buffer starting at readToIndex + n, err := reader.Read(buf[readToIndex:]) + + if err != nil { + if errors.Is(err, io.EOF) { + req.State = RequestStateDone + break + } + return nil, err + } + + // Update readToIndex with the number of bytes actually read + readToIndex += n + + // Call parse with the slice of buffer that has data we've read so far + bytesConsumed, err := req.parse(buf[:readToIndex]) + if err != nil { + return nil, err + } + + // Remove the data that was parsed successfully from the buffer + if bytesConsumed > 0 { + remainingData := buf[bytesConsumed:readToIndex] + newBuf := make([]byte, cap(buf)) + copy(newBuf, remainingData) + buf = newBuf + + // Decrement readToIndex by the number of bytes that were parsed + readToIndex -= bytesConsumed + } + } + + return req, nil +} + +func parseRequestLine(data string) (*RequestLine, int, error) { + // Look for the CRLF that marks the end of the request line + endIdx := strings.Index(data, crlf) + if endIdx == -1 { + // Not enough data yet; no CRLF found + return nil, 0, nil + } + + // Extract the request line (without the trailing CRLF) + reqLine := data[:endIdx] + + parts := strings.Split(reqLine, " ") + if len(parts) != 3 { + return nil, endIdx + 2, errors.New("invalid number of parts in request line") + } + // "method" part only contains capital alphabetic characters. + if strings.ToUpper(parts[0]) != parts[0] { + return nil, endIdx + 2, errors.New("http method is not capitalized") + } + + httpVersion := strings.Replace(parts[2], "HTTP/", "", 1) + + if httpVersion != "1.1" { + return nil, endIdx + 2, errors.New("http/1.1 only supported") + } + + return &RequestLine{ + Method: parts[0], + HttpVersion: httpVersion, + RequestTarget: parts[1], + }, endIdx + 2, nil +} + +func (r *Request) parse(data []byte) (int, error) { + totalBytesParsed := 0 + for r.State != RequestStateDone { + n, err := r.parseSingle(data[totalBytesParsed:]) + if err != nil { + return totalBytesParsed, err + } + if n == 0 { + break + } + totalBytesParsed += n + } + return totalBytesParsed, nil +} + +func (r *Request) parseSingle(data []byte) (int, error) { + if r.State == RequestStateDone { + return 0, errors.New("trying to read from done state") + } + if r.State == RequestStateInitialized { + requestLine, numOfByteConsumed, err := parseRequestLine(string(data)) + if err != nil { + return 0, err + } + // if zero bytes are parsed and no error + if numOfByteConsumed == 0 { + return 0, nil + } + r.RequestLine = *requestLine + r.State = RequestStateParsingHeaders + return numOfByteConsumed, nil + } + + if r.State == RequestStateParsingHeaders { + numberOfBytes, done, err := r.Headers.Parse(data) + if err != nil { + fmt.Println("error occured while parsing headers") + return 0, err + } + if done { + r.State = RequestStateDone + } + return numberOfBytes, nil + } + + return 0, errors.New("unknown request status") +} diff --git a/internal/request/request_test.go b/internal/request/request_test.go new file mode 100644 index 0000000..766dd87 --- /dev/null +++ b/internal/request/request_test.go @@ -0,0 +1,91 @@ +package request + +import ( + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type chunkReader struct { + data string + numBytesPerRead int + pos int +} + +// Read reads up to len(p) or numBytesPerRead bytes from the string per call +// its useful for simulating reading a variable number of bytes per chunk from a network connection +func (cr *chunkReader) Read(p []byte) (n int, err error) { + if cr.pos >= len(cr.data) { + return 0, io.EOF + } + endIndex := cr.pos + cr.numBytesPerRead + if endIndex > len(cr.data) { + endIndex = len(cr.data) + } + n = copy(p, cr.data[cr.pos:endIndex]) + cr.pos += n + + return n, nil +} + +func TestRequestLineParser(t *testing.T) { + // Test: Good GET Request line + r, err := RequestFromReader(&chunkReader{ + data: "GET / HTTP/1.1\r\nHost: localhost:42069\r\nUser-Agent: curl/7.81.0\r\nAccept: */*\r\n\r\n", + numBytesPerRead: 1, + }) + require.NoError(t, err) + require.NotNil(t, r) + assert.Equal(t, "GET", r.RequestLine.Method) + assert.Equal(t, "/", r.RequestLine.RequestTarget) + assert.Equal(t, "1.1", r.RequestLine.HttpVersion) + + // Test: Good GET Request line with path + r, err = RequestFromReader(strings.NewReader("GET /coffee HTTP/1.1\r\nHost: localhost:42069\r\nUser-Agent: curl/7.81.0\r\nAccept: */*\r\n\r\n")) + require.NoError(t, err) + require.NotNil(t, r) + assert.Equal(t, "GET", r.RequestLine.Method) + assert.Equal(t, "/coffee", r.RequestLine.RequestTarget) + assert.Equal(t, "1.1", r.RequestLine.HttpVersion) + + // Test: Invalid number of parts in request line + _, err = RequestFromReader(strings.NewReader("/coffee HTTP/1.1\r\nHost: localhost:42069\r\nUser-Agent: curl/7.81.0\r\nAccept: */*\r\n\r\n")) + require.Error(t, err) + + // Test: Good GET Request line + _, err = RequestFromReader(strings.NewReader("GET / HTTP/2.1\r\nHost: localhost:42069\r\nUser-Agent: curl/7.81.0\r\nAccept: */*\r\n\r\n")) + require.Error(t, err) + + // Test: POST Request + r, err = RequestFromReader(strings.NewReader("POST / HTTP/1.1\r\nHost: localhost:42069\r\nUser-Agent: curl/7.81.0\r\nAccept: */*\r\nContent-Type: application/x-www-form-urlencoded\r\nContent-Length: 13\r\n\r\nparam1=value1")) + require.NoError(t, err) + require.NotNil(t, r) + assert.Equal(t, "POST", r.RequestLine.Method) + assert.Equal(t, "/", r.RequestLine.RequestTarget) + assert.Equal(t, "1.1", r.RequestLine.HttpVersion) +} + +func TestHeadersLineParser(t *testing.T) { + // Test: Standard Headers + reader := &chunkReader{ + data: "GET / HTTP/1.1\r\nHost: localhost:42069\r\nUser-Agent: curl/7.81.0\r\nAccept: */*\r\n\r\n", + numBytesPerRead: 3, + } + r, err := RequestFromReader(reader) + require.NoError(t, err) + require.NotNil(t, r) + assert.Equal(t, "localhost:42069", r.Headers["host"]) + assert.Equal(t, "curl/7.81.0", r.Headers["user-agent"]) + assert.Equal(t, "*/*", r.Headers["accept"]) + + // Test: Malformed Header + reader = &chunkReader{ + data: "GET / HTTP/1.1\r\nHost localhost:42069\r\n\r\n", + numBytesPerRead: 3, + } + _, err = RequestFromReader(reader) + require.Error(t, err) +} diff --git a/messages.txt b/messages.txt deleted file mode 100644 index e13faaf..0000000 --- a/messages.txt +++ /dev/null @@ -1,5 +0,0 @@ -Do you have what it takes to be an engineer at TheStartup™? -Are you willing to work 80 hours a week in hopes that your 0.001% equity is worth something? -Can you say "synergy" and "democratize" with a straight face? -Are you prepared to eat top ramen at your desk 3 meals a day? -end \ No newline at end of file