Static Search With Lunr.js ⋆ Mark McDonnell

Introduction

You have a statically generated website (like mine!) and you want to implement some kind of search facility that is:

Free
Doesn’t Suck (e.g. no ads or iframe)
Quick (i.e. no server-side communication)

The solution is to use Lunr.js.

You might be wondering about Lunr’s origins? Well, it’s based loosely on the idea made popular by Solr, which is an open-source search platform built on a Java library called Lucene.

Since then we’ve also seen the release and rise of ElasticSearch, which is an open-source, distributed, and RESTful search engine built on top of the Apache Lucene library.

Background

I’ll explain this from the perspective of Hugo, which is the static site generator I use to produce this website. Hugo stores the metadata for each post (e.g. title, date, categories, tags etc) in something it calls Front Matter.

I use YAML, but you can use JSON or TOML, and that’s important to note because the implementation I use is based on my metadata being in YAML format. So if yours is JSON or TOML, for example, then you’ll need to modify the code shown in this post to reflect your use case.

Below is the ‘front matter’ for this post you’re reading.

---
title: "Static Search With Lunr.js"
date: 2018-05-10T16:54:08+01:00
categories:
  - "code"
  - "development"
  - "guide"
  - "search"
tags:
  - "elasticsearch"
  - "javascript"
  - "js"
  - "lunr"
  - "solr"
  - "static"
draft: false
---

What typically follows the front matter is the content of your post.

Tasks

OK, so here are the tasks we have to get a working solution:

Generate a search index.json file.
Create a search HTML page.
Write some JavaScript to load the index JSON and populate Lunr.
Write some more JavaScript to accept user’s input.
Write even more JavaScript to query your index JSON.

Note: I’m not a JavaScript fan, but needs must.

Setup

Step 1, generate an index.json file. To do that I’m going to use Grunt because it ties easily into NPM’s package.json format and luckily for me someone had already done a lot of the (no pun intended) ‘grunt’ work and I just needed to modify the code to suit my needs.

Here’s the relevant portion of the package.json which we run with npm run dev:

"scripts": {
  "index": "hugo && grunt lunr-index",
  "dev": "npm run index && hugo server"
}

Here is the lunr-index Gruntfile task that’s executed:

var yaml = require("js-yaml");
var S = require("string");

var CONTENT_PATH_PREFIX = "content";

module.exports = function(grunt) {

    grunt.registerTask("lunr-index", function() {

        var indexPages = function() {
            var pagesIndex = [];
            grunt.file.recurse(CONTENT_PATH_PREFIX, function(abspath, rootdir, subdir, filename) {
                pagesIndex.push(processMDFile(abspath, filename));
            });

            return pagesIndex;
        };

        var processMDFile = function(abspath, filename) {
            var content = grunt.file.read(abspath);
            var pageIndex;

            // separate the Front Matter from the content and parse it
            content = content.split("---");

            var frontMatter;
            try {
                frontMatter = yaml.load(content[1]);
            } catch (e) {
                grunt.log.writeln(e.message);
            }

            var href = S(abspath).chompLeft(CONTENT_PATH_PREFIX).chompRight(".md").s;

            if (filename === ".DS_Store") {
              return
            }

            if (filename === "_index.md") {
                href = "/"
            }
            var m = abspath.match(/^content\/page\/(.+)\.md/);
            if (m != null) {
              href = "/" + m[1]
            }

            // build Lunr index for this page
            pageIndex = {
                title: frontMatter.title,
                tags: frontMatter.tags,
                href: href.toLowerCase(),
                content: S(content[2]).stripTags().stripPunctuation().s
            };

            return pageIndex;
        };

        grunt.file.write("static/js/lunr/index.json", JSON.stringify(indexPages()));
    });
};

The key part to that Gruntfile, other than the parsing out of the metadata (front matter), is where we store it: static/js/lunr/index.json. That location is something that’s included in Hugo’s build step and so when I’m working locally on a new post I’ll use npm run dev instead of hugo server because I’m guaranteed to generate a new search index based on the latest blog content I’ve just added and that the file will always be available when my static code is deployed.

Step 2, create a search page:

---
title: Search
description: Lookup articles of interest.
comments: false
menu: main
weight: -170
---

<p><input id="search" type="text" placeholder="type something here"></p>

<ul id="results"></ul>

<script src="https://code.jquery.com/jquery-2.1.3.min.js"></script>
<script src="https://unpkg.com/lunr/lunr.js"></script>
<script>
  ...script here...
</script>

Step 3, 4, 5, lots of JS…

var lunrIndex,
    $results,
    documents;

function initLunr() {
  // retrieve the index file
  $.getJSON("../../js/lunr/index.json")
    .done(function(index) {
        documents = index;

        lunrIndex = lunr(function(){
          this.ref('href')
          this.field('content')

          this.field("title", {
              boost: 10
          });

          this.field("tags", {
              boost: 5
          });

          documents.forEach(function(doc) {
            try {
              this.add(doc)
            } catch (e) {}
          }, this)
        })
    })
    .fail(function(jqxhr, textStatus, error) {
        var err = textStatus + ", " + error;
        console.error("Error getting Lunr index file:", err);
    });
}

function search(query) {
  return lunrIndex.search(query).map(function(result) {
    return documents.filter(function(page) {
      try {
        console.log(page)
        return page.href === result.ref;
      } catch (e) {
        console.log('whoops')
      }
    })[0];
  });
}

function renderResults(results) {
  if (!results.length) {
    return;
  }

  // show first ten results
  results.slice(0, 10).forEach(function(result) {
    var $result = $("<li>");

    $result.append($("<a>", {
      href: result.href,
      text: "» " + result.title
    }));

    $results.append($result);
  });
}

function initUI() {
  $results = $("#results");

  $("#search").keyup(function(){
    // empty previous results
    $results.empty();

    // trigger search when at least two chars provided.
    var query = $(this).val();
    if (query.length < 2) {
      return;
    }

    var results = search(query);

    renderResults(results);
  });
}

initLunr();

$(document).ready(function(){
  initUI();
});

The key part of the above JS is the bit after we’ve retrieved the search index.json file, as this is what takes the search index file and uses it to populate Lunr:

lunrIndex = lunr(function(){
  this.ref('href')
  this.field('content')

  this.field("title", {
      boost: 10
  });

  this.field("tags", {
      boost: 5
  });

  documents.forEach(function(doc) {
    try {
      this.add(doc)
    } catch (e) {}
  }, this)
})

…and that’s it really.