Posted on 9 mins read

Introduction

I was struggling to find a good (or just simple) reverse proxy solution written in Go, so I decided to take what I had learnt from a work colleague of mine and put together a simple example for others to build upon if they needed a quick reference point.

In this example I have an origin server written in Python (for no other reason than to have a clearer distinction between the proxy and the origin) and which supports the endpoints /, /foo and /bar/* (where the wildcard glob means we support multiple variants of that, such as /bar/baz).

Each origin handler will print the http request headers, followed by sending a response body that correlates to the handler name (so for example, the FooHandler class will respond with FOO!, while the BarHandler class will response with BAR!).


But before we get into it... time for some self-promotion 🙊

Example Python Origin Code

Here is our Python code using the Tornado web framework.

import tornado.ioloop
import tornado.web


class MainHandler(tornado.web.RequestHandler):
    def get(self):
        print("MAIN HEADERS:\n\n", self.request.headers)
        self.write("MAIN!")


class FooHandler(tornado.web.RequestHandler):
    def get(self):
        print("FOO HEADERS:\n\n", self.request.headers)
        self.write("FOO!")


class BarHandler(tornado.web.RequestHandler):
    def get(self):
        print("BAR HEADERS:\n\n", self.request.headers)
        self.write("BAR!")


def make_app():
    return tornado.web.Application([
        (r"/", MainHandler),
        (r"/foo", FooHandler),
        (r"/bar.*", BarHandler),
    ])


if __name__ == "__main__":
    app = make_app()
    app.listen(9000)
    tornado.ioloop.IOLoop.current().start()

Example Golang Proxy Code

There are two versions of the code, a simple version and a more advanced version that aims to handle more specific use cases.

The simple version uses just the Go standard library, whereas the advanced version uses the standard library as well as a few a few external packages such as httprouter and logrus for routing and logging respectively.

One difference between them that’s worth mentioning is that in the simple version we use the httputil.ReverseProxy http handler directly, whereas in the advanced version we use httputil.NewSingleHostReverseProxy to construct this for us. The advanced version also tries to normalise the paths by stripping trailing slashes and joining them up with the base path (if there was one, although ironically I don’t define one in the advanced example).

Simple

package main

import (
	"log"
	"net/http"
	"net/http/httputil"
	"net/url"
)

func main() {
	origin, _ := url.Parse("http://localhost:9000/")

	director := func(req *http.Request) {
		req.Header.Add("X-Forwarded-Host", req.Host)
		req.Header.Add("X-Origin-Host", origin.Host)
		req.URL.Scheme = "http"
		req.URL.Host = origin.Host
	}

	proxy := &httputil.ReverseProxy{Director: director}

	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
		proxy.ServeHTTP(w, r)
	})

	log.Fatal(http.ListenAndServe(":9001", nil))
}

Advanced

package main

import (
	"net/http"
	"net/http/httputil"
	"net/url"
	"os"
	"strings"

	"github.com/Sirupsen/logrus"
	"github.com/julienschmidt/httprouter"
)

func singleJoiningSlash(a, b string) string {
	aslash := strings.HasSuffix(a, "/")
	bslash := strings.HasPrefix(b, "/")
	switch {
	case aslash && bslash:
		return a + b[1:]
	case !aslash && !bslash:
		return a + "/" + b
	}
	return a + b
}

func main() {
	logrus.SetFormatter(&logrus.TextFormatter{})
	logrus.SetOutput(os.Stdout)
	logrus.SetLevel(logrus.InfoLevel)
	logger := logrus.WithFields(logrus.Fields{
		"service": "go-reverse-proxy",
	})

	router := httprouter.New()
	origin, _ := url.Parse("http://localhost:9000/")
	path := "/*catchall"

	reverseProxy := httputil.NewSingleHostReverseProxy(origin)

	reverseProxy.Director = func(req *http.Request) {
		req.Header.Add("X-Forwarded-Host", req.Host)
		req.Header.Add("X-Origin-Host", origin.Host)
		req.URL.Scheme = origin.Scheme
		req.URL.Host = origin.Host

		wildcardIndex := strings.IndexAny(path, "*")
		proxyPath := singleJoiningSlash(origin.Path, req.URL.Path[wildcardIndex:])
		if strings.HasSuffix(proxyPath, "/") && len(proxyPath) > 1 {
			proxyPath = proxyPath[:len(proxyPath)-1]
		}
		req.URL.Path = proxyPath
	}

	router.Handle("GET", path, func(w http.ResponseWriter, r *http.Request, p httprouter.Params) {
		reverseProxy.ServeHTTP(w, r)
	})

	logger.Fatal(http.ListenAndServe(":9001", router))
}

Demonstration

In order to run this example you should follow these instructions:

  1. run the tornado application (e.g. python tornado-origin.py)
  2. run the go application (e.g. go run main.go)
  3. make http requests (shown below)
curl -v http://localhost:9001/
curl -v http://localhost:9001/foo
curl -v http://localhost:9001/foo/
curl -v http://localhost:9001/bar/baz

You should see output from the Python server that looks something like this:

FOO HEADERS:

Host: localhost:9001
User-Agent: curl/7.54.0
Accept: */*
X-Forwarded-Host: localhost:9001
X-Origin-Host: localhost:9000

Explanation

OK, so let’s step through the main function of the advanced example code to see what’s going on.

The core reverse proxy code and its concepts are effectively the same between the advanced and simple versions.

First we set up our basic logging configuration:

logrus.SetFormatter(&logrus.TextFormatter{})
logrus.SetOutput(os.Stdout)
logrus.SetLevel(logrus.InfoLevel)
logger := logrus.WithFields(logrus.Fields{
  "service": "go-reverse-proxy",
})

Next we create a new httprouter instance, we define the origin host (http://localhost:9000/) and the ‘pattern’ we want httprouter to look out for (/*catchall, which is a special syntax that represents a catchall wildcard/glob):

router := httprouter.New()
origin, _ := url.Parse("http://localhost:9000/")
path := "/*catchall"

Next we create a new reverse proxy instance, passing it the origin host (http://localhost:9000/):

reverseProxy := httputil.NewSingleHostReverseProxy(origin)

Followed by configuring the ‘director’ for the reverse proxy. The director is simply a function that modifies the received incoming request, while the response from the origin is copied back to the original client.

In this example, we attach a few common proxy related headers to the incoming request and then modify its Scheme/Host to reflect the origin we wish to proxy it onto.

Next, we change the request path to the origin. What we do is ensure the path we request from the origin is whatever the base origin path is + the requested path (i.e. not just directing the request to the root/entrypoint of the origin).

In our example, our origin’s path is just / whereas the client will be requesting things like /foo and /bar/baz, so these would be appended to the origin’s defined /. But we also make sure that when joining the origin’s path with the incoming request path, that we avoid double slashes in the middle.

Lastly, we ensure that any trailing slash is removed as well:

reverseProxy.Director = func(req *http.Request) {
  req.Header.Add("X-Forwarded-Host", req.Host)
  req.Header.Add("X-Origin-Host", origin.Host)
  req.URL.Scheme = origin.Scheme
  req.URL.Host = origin.Host

  wildcardIndex := strings.IndexAny(path, "*")
  proxyPath := singleJoiningSlash(origin.Path, req.URL.Path[wildcardIndex:])
  if strings.HasSuffix(proxyPath, "/") && len(proxyPath) > 1 {
    proxyPath = proxyPath[:len(proxyPath)-1]
  }
  req.URL.Path = proxyPath
}

Finally, we setup the handler for the /*catchall httprouter path. In this case we don’t do anything other than call the reverse proxy’s ServeHTTP method and pass it the original ResponseWriter and http Request. We then kick start the httprouter using ListenAndServe:

router.Handle("GET", path, func(w http.ResponseWriter, r *http.Request, p httprouter.Params) {
  reverseProxy.ServeHTTP(w, r)
})

logger.Fatal(http.ListenAndServe(":9001", router))

Handling Errors

In order to handle errors the reverse proxy needs to construct a new response object, which means if you wanted the error response you generate to have all the same response headers as were provided by the upstream service, then you’d have to programmatically add those to the new response object.

Let’s see how we can handle errors in the basic sense, just to get an idea for how the code looks. Now we don’t need to use another programming language to do this, we can do it all in Go (we could have done this earlier instead of using Python, but I wanted to highlight how you could use another language if you wanted).

package main

import (
	"errors"
	"fmt"
	"io/ioutil"
	"log"
	"net"
	"net/http"
	"net/http/httptest"
	"net/http/httputil"
	"net/url"
	"strings"
	"time"
)

// copied from https://golang.org/src/net/http/httputil/reverseproxy.go?s=3330:3391#L98
func singleJoiningSlash(a, b string) string {
	aslash := strings.HasSuffix(a, "/")
	bslash := strings.HasPrefix(b, "/")
	switch {
	case aslash && bslash:
		return a + b[1:]
	case !aslash && !bslash:
		return a + "/" + b
	}
	return a + b
}

func main() {
	backendServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		fmt.Fprintln(w, "backend server handled the request!")
	}))
	defer backendServer.Close()

	backendServerURL, err := url.Parse(backendServer.URL)
	if err != nil {
		log.Fatal(err)
	}

	proxy := &httputil.ReverseProxy{
		Director: func(r *http.Request) {
			r.URL.Scheme = backendServerURL.Scheme
			r.URL.Host = backendServerURL.Host
			r.URL.Path = singleJoiningSlash(backendServerURL.Path, r.URL.Path)
		},
		Transport: &http.Transport{
			Dial: (&net.Dialer{
				Timeout: 10 * time.Second,
			}).Dial,
		},
		ModifyResponse: func(r *http.Response) error {
			// return nil
			//
			// purposefully return an error so ErrorHandler gets called
			return errors.New("uh-oh")
		},
		ErrorHandler: func(rw http.ResponseWriter, r *http.Request, err error) {
			fmt.Printf("error was: %+v", err)
			rw.WriteHeader(http.StatusInternalServerError)
			rw.Write([]byte(err.Error()))
		},
	}

	frontendServer := httptest.NewServer(proxy)
	defer frontendServer.Close()

	resp, err := http.Get(frontendServer.URL)
	if err != nil {
		log.Fatal(err)
	}

	b, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Fatal(err)
	}

	fmt.Printf("\n\nbody: \n%s\n\n", b)
}

Here we can see that our httputil.ReverseProxy#ModifyResponse function is hardcoded to return an error type, which then causes the httputil.ReverseProxy#ErrorHandler function to be called.

From there we use the http.ResponseWriter to create a new response. In this case we do nothing other than print the original error, but you could do pretty much anything you like at this point.

If you needed the original response object that came from the upstream then you’d need to make sure the error you returned from ModifyResponse was a custom error type so that you attach a field such as OriginalResponse to it and thus assign it the original http.Response that was available to you within ModifyResponse.

NGINX-Lite (not-really)

Below is an example that demonstrates using httpbin as our origin.

Specifically we use its /anything endpoint, which allows you to provide any value as the final path segment. So for example, /anything/foo or /anything/beep, both work with the httpbin.org service.

package main

import (
	"fmt"
	"log"
	"net/http"
	"net/http/httputil"
)

func main() {
	proxy := &httputil.ReverseProxy{Director: func(req *http.Request) {
		originHost := "httpbin.org"
		originPathPrefix := "/anything"

		req.Header.Add("X-Forwarded-Host", req.Host)
		req.Header.Add("X-Origin-Host", originHost)
		req.Host = originHost
		req.URL.Scheme = "https"
		req.URL.Host = originHost
		req.URL.Path = originPathPrefix + req.URL.Path

		fmt.Printf("final request\n\n %+v \n\n", req)
	}}

	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
		proxy.ServeHTTP(w, r)
	})

	log.Fatal(http.ListenAndServe(":9001", nil))
}

Now let’s elaborate on this example a little bit and ensure that our reverse proxy has a client timeout specified (see this article for details as to why you would want to do this).

We also use gorilla/mux as it supports utilising regular expression path matching (we could do this ourselves, but using a library in this case helps to keep the code we have to write down).

One last thing you’ll notice is that we’re using a configuration object that allows us to configure override behaviour. For example, if our request includes a HTTP header of X-BF-Testing and its value is integralist, then we’ll proxy the request to a different endpoint.

You can do more complex things if necessary, but this gives you a good idea of how to replicate something like NGINX with very little code (obviously to replicate something like NGINX is waaay beyond the scope of this post) 😉

package main

import (
	"log"
	"net"
	"net/http"
	"net/http/httputil"
	"time"

	"github.com/gorilla/mux"
)

type override struct {
	Header string
	Match  string
	Host   string
	Path   string
}

type config struct {
	Path     string
	Host     string
	Override override
}

func generateProxy(conf config) http.Handler {
	proxy := &httputil.ReverseProxy{Director: func(req *http.Request) {
		originHost := conf.Host
		req.Header.Add("X-Forwarded-Host", req.Host)
		req.Header.Add("X-Origin-Host", originHost)
		req.Host = originHost
		req.URL.Host = originHost
		req.URL.Scheme = "https"

		if conf.Override.Header != "" && conf.Override.Match != "" {
			if req.Header.Get(conf.Override.Header) == conf.Override.Match {
				req.URL.Path = conf.Override.Path
			}
		}
	}, Transport: &http.Transport{
		Dial: (&net.Dialer{
			Timeout: 5 * time.Second,
		}).Dial,
	}}

	return proxy
}

func main() {
	r := mux.NewRouter()

	configuration := []config{
		config{
			Path: "/{path:anything/(?:foo|bar)}",
			Host: "httpbin.org",
		},
		config{
			Path: "/anything/foobar",
			Host: "httpbin.org",
			Override: override{
				Header: "X-BF-Testing",
				Match:  "integralist",
				Path:   "/anything/newthing",
			},
		},
	}

	for _, conf := range configuration {
		proxy := generateProxy(conf)

		r.HandleFunc(conf.Path, func(w http.ResponseWriter, r *http.Request) {
			proxy.ServeHTTP(w, r)
		})
	}

	log.Fatal(http.ListenAndServe(":9001", r))
}

Conclusion

That’s all there is to it.

You could also wrap the function passed to router.Handle in a middleware function so that you’re able to do extra processing. A common example of this is to authenticate the incoming request before it is proxied to the origin (meaning you can reject the request if necessary).


But before we wrap up... time (once again) for some self-promotion 🙊