Filters

Filters are JavaScript functions that take the document DOM as parameter and are:

  • in-place: they modify the document structure and content directly;
  • idempotent: they return the same document structure and content even if run repeatedly on their own result.
  • ordered: they are run sequentially in the order specified in the declaration.

Learn more about the concept and constraints on the filters explanation.

Signature

The generic function signature for a filter is:

  • For filters that take no parameter:
export [async] function filterName(document, [documentDeclaration])
  • For filters that take parameters:
export [async] function filterName(document, parameters, [documentDeclaration])

Each filter is exposed as a named function export that takes a document parameter and behaves like the document object in a browser DOM. The document parameter is actually a JSDOM document instance.

These functions can be async, but they will still run sequentially.

Usage

Filters that take no parameter

// <service name>.filters.js
export function customFilter(document) {
  // filter logic here
}

Can be used as follows in the declaration:

// <service name>.json
{
  "name": "<service name>",
  "terms": {
    "<terms type>": {
      "fetch": "<URL>",
      "select": "<CSS or Range selectors>",
      "filter": [
        "customFilter"
      ]
    }
  }
}

Example

export function convertTimeAgoToDate(document) {
  const timeElements = document.querySelectorAll('.metadata time');
  
  timeElements.forEach(timeElement => {
    const dateTimeValue = timeElement.getAttribute('datetime');
    const textNode = document.createTextNode(dateTimeValue);
    timeElement.parentNode.replaceChild(textNode, timeElement);
  });
}
{
  "name": "MyService",
  "terms": {
    "Privacy Policy": {
      "fetch": "https://my.service.example/privacy",
      "select": ".content",
      "filter": [
        "convertTimeAgoToDate"
      ]
    }
  }
}

Result:

- <p class="metadata">Last update: <time datetime="2025-06-23T11:16:36Z" title="06/23/2025, 13:16" data-datetime="relative">2 months ago</time></p>
+ <p class="metadata">Last update: 2025-06-23T11:16:36Z</p>

Filter with parameters

// <service name>.filters.js
export function customParameterizedFilter(document, params) {
  // filter logic here
}

Can be used as follows in the declaration:

// <service name>.json
{
  "name": "<service name>",
  "terms": {
    "<terms type>": {
      "fetch": "<URL>",
      "select": "<CSS or Range selectors>",
      "filter": [
        {
          "customParameterizedFilter": ["param1", "param2"]
        }
      ]
    }
  }
}

Example 1

export function removeLinksWithText(document, textArray) {
  const links = document.querySelectorAll('a');
  const textsToRemove = Array.isArray(textArray) ? textArray : [textArray];
  
  links.forEach(link => {
    if (textsToRemove.includes(link.textContent.trim())) {
      link.remove();
    }
  });
}
{
  "name": "MyService",
  "terms": {
    "Privacy Policy": {
      "fetch": "https://my.service.example/privacy",
      "select": ".content",
      "filter": [
        { "removeLinksWithText": ["Return to previous section", "Go to next section"] }
      ]
    }
  }
}

Result:

  <div id="section1">
-   <a href="#section2">Go to next section</a>
    <p>...</p>
  </div>
  <div id="section2">
-   <a href="#section1">Return to previous section</a>
-   <a href="#section3">Go to next section</a>
    <p>...</p>
  </div>
  <div id="section3">
-   <a href="#section2">Return to previous section</a>
    <p>...</p>
  </div>

Example 2

import fetch from 'isomorphic-fetch';

export async function convertImagesToBase64(document, selector, documentDeclaration) {
  const images = Array.from(document.querySelectorAll(selector));

  return Promise.all(images.map(async ({ src }, index) => {
    if (src.startsWith('data:')) {
      return; // Already a data-URI, skip
    }

    const imageUrl = new URL(src, documentDeclaration.fetch).href; // Ensure url is absolute
    const response = await fetch(imageUrl);
    const mimeType = response.headers.get('content-type');
    const content = await response.arrayBuffer();

    const base64Content = btoa(String.fromCharCode(...new Uint8Array(content)));

    images[index].src = `data:${mimeType};base64,${base64Content}`;
  }));
  
}
{
  "name": "MyService",
  "terms": {
    "Privacy Policy": {
      "fetch": "https://my.service.example/privacy",
      "select": ".content",
      "filter": [
        { "convertImagesToBase64": ".meaningful-illustration" }
      ]
    }
  }
}

Result:

- <img src="https://my.service.example/image.png" class="meaningful-illustration">
+ <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..." class="meaningful-illustration">

Third-party libraries

As can be seen in the last example, third-party libraries can be imported in the filters. These should be declared in the package.json of the collection to be available.