Htcrawl is nodejs module for the recursive crawling of single page applications (SPA) using javascript.
It uses headless chrome to load and analyze web applications and it's build on top of Puppetteer from wich it inherits all the functionalities.

With htcrawl you can roll your own DOM-XSS scanner with less than 60 lines of javascript!! (see below)

Some examples of what (else) you can do with htcrawl:

  1. Advanced scraping of single page applications (SPA)
  2. Intercept and log all requests made by a webpage
  3. Build tools to detect security vulnerabilities
  4. Automate testing of UI, javascript ecc

You may also try htcap, a vulnerability scanner built on top of htcrawl.

Basic usage

Very basic example for code to print all ajax requests.

const htcrawl = require('htcrawl');

// Get instance of Crawler class
const crawler = await htcrawl.launch("");

// Print out the url of ajax calls
crawler.on("xhr", e => {
  console.log("XHR to " + e.params.request.url);

// Start crawling!

Demo Video

Short video of the crawl engine in action.

API Reference

The API manual can be found here


$ npm -i htcrawl
$ # or
$ git clone && cd htcrawl && npm i && cd ..

or visit the github page here

Crawl flow

The diagram below shows the recursive crawling process.


1. DOM XSS Scanner

Simple DOM XSS scanner

const targetUrl="";
const options = {headlessChrome:1};
var pmap = {};

const payloads = [
    "<img src='a' onerror=window.___xssSink({0})>"

function getNewPayload(payload, element){
    const k = "" + Math.floor(Math.random()*4000000000);
    const p = payload.replace("{0}", k);
    pmap[k] = {payload:payload, element:element};
    return p;

async function crawlAndFuzz(payload){
    var hashSet = false;

    // instantiate htcrawl
    const crawler = await htcrawl.launch(targetUrl, options);

    // set a sink on page scope"___xssSink", key => {
        const msg = `DOM XSS found:\n  payload: ${pmap[key].payload}\n  element: ${pmap[key].element}`

    // fill all inputs with a payload
    crawler.on("fillinput", async function(e, crawler){
        const p = getNewPayload(payload, e.params.element);
            await$eval(e.params.element, (i, p) => i.value = p, p);
        // return false to prevent element to be automatically filled with a random value
        return false;

    // change page hash before the triggering of the first event
    crawler.on("triggerevent", async function(e, crawler){
            const p = getNewPayload(payload, "hash");
            await => document.location.hash = p, p);
            hashSet = true;

        await crawler.start();
    } catch(e){
        console.log(`Error ${e}`);


(async () => {
    for(let payload of payloads){
        /* Remove 'await' for parallel scan of all payloads */
        await crawlAndFuzz(payload);

2. Advanced content scraper

Crawls a single page application looking for emails.

const targetUrl="";
const options = {headlessChrome:1};

function printEmails(string){
    const emails = string.match(/([a-z0-9._-]+@[a-z0-9._-]+\.[a-z]+)/gi);
    if(!emails) return;
    for(let e of emails)

htcrawl.launch(targetUrl, options).then(async crawler => {

    crawler.on("domcontentloaded", async function(e, crawler){
        const selector = "body";
        const html = await$eval(selector, body => body.innerText);

    crawler.on("newdom", async function(e, crawler){
        const selector = e.params.rootNode;
        const html = await$eval(selector, node => node.innerText);

    crawler.start().then(crawler => {
    }).catch(err => {
        console.log(`Error: ${err}`);


Htcrawl has been written by Filippo Cavallarin.
Please report bugs, comments ecc to filippo.cavallarin[]
This project is son of Htcap ( |


This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or(at your option) any later version.