Last active 1749137107

Revision 055aa6dc58031a305f8819e742a25bd618ec46fd

platypush-ext-save-link.js Raw
/**
* A script for the Platypush browser extension that saves the current page URL to Wallabag.
* Use together with the Reader Mode script https://gist.manganiello.tech/fabio/c731b57ff6b24d21a8f43fbedde3dc30 for best results.
*/
// Entry point for the script, which is executed when the user runs the
// associated action. All the logic should be encapsulated in this function.
async (app, args) => {
// This is the base path where the scraped pages will be saved.
// For sake of simplicity, we will save the scraped pages to a local directory
// on the same server where the Platypush service is running.
// If you want to push it to another server, you can replace the call to
// `file.write` at the bottom of the script with `ssh.put`
// (https://docs.platypush.tech/platypush/plugins/ssh.html#platypush.plugins.ssh.SshPlugin.put)
// (ensure that the `ssh` plugin is enabled on your Platypush instance).
const savePath = `/CHANGEME`;
// This is the URL where the scraped pages will be served from.
// The simplest way to configure it is to set up a web server that serves
// the files in the `savePath` directory (python -m http.server should
// suffice), and then configure a reverse proxy to point to your server -
// or even configure nginx itself to both serve the files and handle SSL.
// It is strongly recommended to use HTTPS for this URL, as Wallabag
// will probably refuse to scrape HTTP URLs.
const scrapeUrl = 'https://scraped.example.com';
// Get the page URL and DOM
const url = await app.getURL();
const dom = await app.getDOM();
// A utility function that generates a unique hash code for a given string.
// This is used to create a unique filename based on the URL.
const hashCode = (str) => {
let hash = 0;
for (let i = 0, len = str.length; i < len; i++) {
let chr = str.charCodeAt(i);
hash = (hash << 5) - hash + chr;
hash |= 0; // Convert to 32bit integer
}
return hash;
};
// Utility functions to get the base URL and base relative URL from a given URL string.
const getBaseUrl = (urlString) => {
const url = new URL(urlString);
const protocol = url.protocol;
const hostname = url.hostname;
const port = url.port;
return `${protocol}//${hostname}${port ? ':' + port : ''}`;
};
// This function extracts the base relative URL (without the filename) from a given URL string.
const getBaseRelativeUrl = (urlString) => {
try {
let url = new URL(urlString);
let pathWithoutFilename = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1);
return url.origin + pathWithoutFilename;
} catch (error) {
return urlString;
}
};
const baseUrl = getBaseUrl(url);
const baseRelativeUrl = getBaseRelativeUrl(url);
// This function replaces relative URLs in the DOM with absolute URLs based
// on the original base URL. This is necessary to ensure that links and images
// point to the correct location when the page is saved or shared.
const replaceRelativeUrls = () => {
const relativeLinks = [...dom.querySelectorAll('a')]
.filter((a) =>
a.getAttribute('href')?.length &&
!a.getAttribute('href')?.match(/^(https?:\/\/)|(javascript:)/)
);
const relativeImgs = [...dom.querySelectorAll('img')]
.filter((a) =>
a.getAttribute('src')?.length &&
!a.getAttribute('src')?.match(/^(https?:\/\/)|(data:image\/)/)
);
[...relativeLinks, ...relativeImgs].forEach((el) => {
const tag = el.tagName.toLowerCase();
const attrName = tag === 'img' ? 'src' : 'href';
const attrValue = el.getAttribute(attrName);
if (attrValue?.startsWith('/')) {
el.setAttribute(attrName, `${baseUrl}${attrValue}`);
} else {
el.setAttribute(attrName, `${baseRelativeUrl}${attrValue}`);
}
});
};
// This function checks if the current DOM has already been
// simplified/distilled by the Reader Mode script. If that's the case, then
// we can directly save the simplified content on the server, and let
// Wallabag scrape that URL. This ensures that any client-side restrictions
// that may prevent Wallabag from scraping the original page are bypassed.
const getSaveUrl = async () => {
// Check if the current DOM has already been "distilled" by the Mercury script
const simplifiedContainer = dom.querySelector('.platypush__simplified-body');
// If that's not the case, save the original URL as it is
if (!simplifiedContainer) {
return url;
}
// Otherwise, upload the simplified content to a proxy
const html = document.createElement('html');
const head = document.createElement('head');
const title = document.createElement('title');
const meta = document.createElement('meta');
const body = document.createElement('body');
const originalLinkDiv = document.createElement('b');
const originalLink = document.createElement('a');
// Replace the relative URLs in the simplified content
replaceRelativeUrls();
// Set up the HTML structure
title.innerText = dom.querySelector('head title')?.innerText;
meta.setAttribute('charset', 'utf-8');
// Put a link to the original page in the body
originalLink.setAttribute('href', url);
originalLink.setAttribute('target', '_blank');
originalLink.innerText = 'Original link';
originalLinkDiv.innerHTML = `${originalLink.outerHTML}<br>`;
// Build the HTML document
head.appendChild(title);
head.appendChild(meta);
body.appendChild(originalLinkDiv);
body.appendChild(simplifiedContainer);
html.appendChild(head);
html.appendChild(body);
// Generate a unique filename based on the URL hash
const filename = `${hashCode(url)}.html`;
const outfile = `${savePath}/${filename}`;
// Upload it as HTML to the server
await app.run({
action: 'file.write',
args: {
file: outfile,
content: html.outerHTML,
},
}, args.host);
return `${scrapeUrl}/${filename}`;
}
// Get the URL to save - either the original one, or the simplified one if
// the Reader Mode script has already been applied.
const urlToSave = await getSaveUrl();
const response = await app.run({
action: 'wallabag.save',
args: {
url: urlToSave,
}
}, args.host);
// Send a notification to the user with the result of the save operation
app.notify('Wallabag Save', response.title);
// Optional, if ntfy is enabled, you can send a notification to the user
// that will be received by any client running ntfy
// app.run({
// action: 'ntfy.send_message',
// args: {
// topic: 'wallabag-save-some-random-string',
// title: 'Saved on Wallabag',
// message: response.title,
// url: response.url,
// }
// }, args.host);
}