/** * A script for the Platypush browser extension that saves the current page URL to Wallabag. * Use together with the Reader Mode script https://gist.manganiello.tech/fabio/c731b57ff6b24d21a8f43fbedde3dc30 for best results. */ // Entry point for the script, which is executed when the user runs the // associated action. All the logic should be encapsulated in this function. async (app, args) => { // This is the base path where the scraped pages will be saved. // For sake of simplicity, we will save the scraped pages to a local directory // on the same server where the Platypush service is running. // If you want to push it to another server, you can replace the call to // `file.write` at the bottom of the script with `ssh.put` // (https://docs.platypush.tech/platypush/plugins/ssh.html#platypush.plugins.ssh.SshPlugin.put) // (ensure that the `ssh` plugin is enabled on your Platypush instance). const savePath = `/CHANGEME`; // This is the URL where the scraped pages will be served from. // The simplest way to configure it is to set up a web server that serves // the files in the `savePath` directory (python -m http.server should // suffice), and then configure a reverse proxy to point to your server - // or even configure nginx itself to both serve the files and handle SSL. // It is strongly recommended to use HTTPS for this URL, as Wallabag // will probably refuse to scrape HTTP URLs. const scrapeUrl = 'https://scraped.example.com'; // Get the page URL and DOM const url = await app.getURL(); const dom = await app.getDOM(); // A utility function that generates a unique hash code for a given string. // This is used to create a unique filename based on the URL. const hashCode = (str) => { let hash = 0; for (let i = 0, len = str.length; i < len; i++) { let chr = str.charCodeAt(i); hash = (hash << 5) - hash + chr; hash |= 0; // Convert to 32bit integer } return hash; }; // Utility functions to get the base URL and base relative URL from a given URL string. const getBaseUrl = (urlString) => { const url = new URL(urlString); const protocol = url.protocol; const hostname = url.hostname; const port = url.port; return `${protocol}//${hostname}${port ? ':' + port : ''}`; }; // This function extracts the base relative URL (without the filename) from a given URL string. const getBaseRelativeUrl = (urlString) => { try { let url = new URL(urlString); let pathWithoutFilename = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1); return url.origin + pathWithoutFilename; } catch (error) { return urlString; } }; const baseUrl = getBaseUrl(url); const baseRelativeUrl = getBaseRelativeUrl(url); // This function replaces relative URLs in the DOM with absolute URLs based // on the original base URL. This is necessary to ensure that links and images // point to the correct location when the page is saved or shared. const replaceRelativeUrls = () => { const relativeLinks = [...dom.querySelectorAll('a')] .filter((a) => a.getAttribute('href')?.length && !a.getAttribute('href')?.match(/^(https?:\/\/)|(javascript:)/) ); const relativeImgs = [...dom.querySelectorAll('img')] .filter((a) => a.getAttribute('src')?.length && !a.getAttribute('src')?.match(/^(https?:\/\/)|(data:image\/)/) ); [...relativeLinks, ...relativeImgs].forEach((el) => { const tag = el.tagName.toLowerCase(); const attrName = tag === 'img' ? 'src' : 'href'; const attrValue = el.getAttribute(attrName); if (attrValue?.startsWith('/')) { el.setAttribute(attrName, `${baseUrl}${attrValue}`); } else { el.setAttribute(attrName, `${baseRelativeUrl}${attrValue}`); } }); }; // This function checks if the current DOM has already been // simplified/distilled by the Reader Mode script. If that's the case, then // we can directly save the simplified content on the server, and let // Wallabag scrape that URL. This ensures that any client-side restrictions // that may prevent Wallabag from scraping the original page are bypassed. const getSaveUrl = async () => { // Check if the current DOM has already been "distilled" by the Mercury script const simplifiedContainer = dom.querySelector('.platypush__simplified-body'); // If that's not the case, save the original URL as it is if (!simplifiedContainer) { return url; } // Otherwise, upload the simplified content to a proxy const html = document.createElement('html'); const head = document.createElement('head'); const title = document.createElement('title'); const meta = document.createElement('meta'); const body = document.createElement('body'); const originalLinkDiv = document.createElement('b'); const originalLink = document.createElement('a'); // Replace the relative URLs in the simplified content replaceRelativeUrls(); // Set up the HTML structure title.innerText = dom.querySelector('head title')?.innerText; meta.setAttribute('charset', 'utf-8'); // Put a link to the original page in the body originalLink.setAttribute('href', url); originalLink.setAttribute('target', '_blank'); originalLink.innerText = 'Original link'; originalLinkDiv.innerHTML = `${originalLink.outerHTML}
`; // Build the HTML document head.appendChild(title); head.appendChild(meta); body.appendChild(originalLinkDiv); body.appendChild(simplifiedContainer); html.appendChild(head); html.appendChild(body); // Generate a unique filename based on the URL hash const filename = `${hashCode(url)}.html`; const outfile = `${savePath}/${filename}`; // Upload it as HTML to the server await app.run({ action: 'file.write', args: { file: outfile, content: html.outerHTML, }, }, args.host); return `${scrapeUrl}/${filename}`; } // Get the URL to save - either the original one, or the simplified one if // the Reader Mode script has already been applied. const urlToSave = await getSaveUrl(); const response = await app.run({ action: 'wallabag.save', args: { url: urlToSave, } }, args.host); // Send a notification to the user with the result of the save operation app.notify('Wallabag Save', response.title); // Optional, if ntfy is enabled, you can send a notification to the user // that will be received by any client running ntfy // app.run({ // action: 'ntfy.send_message', // args: { // topic: 'wallabag-save-some-random-string', // title: 'Saved on Wallabag', // message: response.title, // url: response.url, // } // }, args.host); }