Revision of platypush-ext-save-link.js

fabio revised this gist 1749137107. Go to revision

1 file changed, 25 insertions, 158 deletions

platypush-ext-save-link.js

			@@ -3,177 +3,44 @@
3	3		* Use together with the Reader Mode script https://gist.manganiello.tech/fabio/c731b57ff6b24d21a8f43fbedde3dc30 for best results.
4	4		*/
5	5
	6	+
6	7		// Entry point for the script, which is executed when the user runs the
7	8		// associated action. All the logic should be encapsulated in this function.
8	9		async (app, args) => {
9		-	// This is the base path where the scraped pages will be saved.
10		-	// For sake of simplicity, we will save the scraped pages to a local directory
11		-	// on the same server where the Platypush service is running.
12		-	// If you want to push it to another server, you can replace the call to
13		-	// `file.write` at the bottom of the script with `ssh.put`
14		-	// (https://docs.platypush.tech/platypush/plugins/ssh.html#platypush.plugins.ssh.SshPlugin.put)
15		-	// (ensure that the `ssh` plugin is enabled on your Platypush instance).
16		-	const savePath = `/CHANGEME`;
17		-
18		-	// This is the URL where the scraped pages will be served from.
19		-	// The simplest way to configure it is to set up a web server that serves
20		-	// the files in the `savePath` directory (python -m http.server should
21		-	// suffice), and then configure a reverse proxy to point to your server -
22		-	// or even configure nginx itself to both serve the files and handle SSL.
23		-	// It is strongly recommended to use HTTPS for this URL, as Wallabag
24		-	// will probably refuse to scrape HTTP URLs.
25		-	const scrapeUrl = 'https://scraped.example.com';
	10	+	// (Optional) topic for the ntfy notification
	11	+	const ntfyTopic = 'notebook-saved-links-random-suffix';
26	12
27	13		// Get the page URL and DOM
28	14		const url = await app.getURL();
29	15		const dom = await app.getDOM();
30		-
31		-	// A utility function that generates a unique hash code for a given string.
32		-	// This is used to create a unique filename based on the URL.
33		-	const hashCode = (str) => {
34		-	let hash = 0;
35		-	for (let i = 0, len = str.length; i < len; i++) {
36		-	let chr = str.charCodeAt(i);
37		-	hash = (hash << 5) - hash + chr;
38		-	hash \|= 0; // Convert to 32bit integer
39		-	}
40		-	return hash;
41		-	};
42		-
43		-	// Utility functions to get the base URL and base relative URL from a given URL string.
44		-	const getBaseUrl = (urlString) => {
45		-	const url = new URL(urlString);
46		-	const protocol = url.protocol;
47		-	const hostname = url.hostname;
48		-	const port = url.port;
49		-	return `${protocol}//${hostname}${port ? ':' + port : ''}`;
50		-	};
51		-
52		-	// This function extracts the base relative URL (without the filename) from a given URL string.
53		-	const getBaseRelativeUrl = (urlString) => {
54		-	try {
55		-	let url = new URL(urlString);
56		-	let pathWithoutFilename = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1);
57		-	return url.origin + pathWithoutFilename;
58		-	} catch (error) {
59		-	return urlString;
60		-	}
61		-	};
62		-
63		-	const baseUrl = getBaseUrl(url);
64		-	const baseRelativeUrl = getBaseRelativeUrl(url);
65		-
66		-	// This function replaces relative URLs in the DOM with absolute URLs based
67		-	// on the original base URL. This is necessary to ensure that links and images
68		-	// point to the correct location when the page is saved or shared.
69		-	const replaceRelativeUrls = () => {
70		-	const relativeLinks = [...dom.querySelectorAll('a')]
71		-	.filter((a) =>
72		-	a.getAttribute('href')?.length &&
73		-	!a.getAttribute('href')?.match(/^(https?:\/\/)\|(javascript:)/)
74		-	);
75		-
76		-	const relativeImgs = [...dom.querySelectorAll('img')]
77		-	.filter((a) =>
78		-	a.getAttribute('src')?.length &&
79		-	!a.getAttribute('src')?.match(/^(https?:\/\/)\|(data:image\/)/)
80		-	);
81		-
82		-	[...relativeLinks, ...relativeImgs].forEach((el) => {
83		-	const tag = el.tagName.toLowerCase();
84		-	const attrName = tag === 'img' ? 'src' : 'href';
85		-	const attrValue = el.getAttribute(attrName);
86		-	if (attrValue?.startsWith('/')) {
87		-	el.setAttribute(attrName, `${baseUrl}${attrValue}`);
88		-	} else {
89		-	el.setAttribute(attrName, `${baseRelativeUrl}${attrValue}`);
90		-	}
91		-	});
92		-	};
93		-
94		-	// This function checks if the current DOM has already been
95		-	// simplified/distilled by the Reader Mode script. If that's the case, then
96		-	// we can directly save the simplified content on the server, and let
97		-	// Wallabag scrape that URL. This ensures that any client-side restrictions
98		-	// that may prevent Wallabag from scraping the original page are bypassed.
99		-	const getSaveUrl = async () => {
100		-	// Check if the current DOM has already been "distilled" by the Mercury script
	16	+	const getContent = () => {
	17	+	// Check if the current DOM has already been "distilled" by the Mercury script.
	18	+	// If that's the case, use the already distilled content as the body of the saved article.
101	19		const simplifiedContainer = dom.querySelector('.platypush__simplified-body');
	20	+	return (simplifiedContainer \|\| dom.querySelector('body')).innerHTML;
	21	+	};
102	22
103		-	// If that's not the case, save the original URL as it is
104		-	if (!simplifiedContainer) {
105		-	return url;
106		-	}
107		-
108		-	// Otherwise, upload the simplified content to a proxy
109		-	const html = document.createElement('html');
110		-	const head = document.createElement('head');
111		-	const title = document.createElement('title');
112		-	const meta = document.createElement('meta');
113		-	const body = document.createElement('body');
114		-	const originalLinkDiv = document.createElement('b');
115		-	const originalLink = document.createElement('a');
116		-
117		-	// Replace the relative URLs in the simplified content
118		-	replaceRelativeUrls();
119		-
120		-	// Set up the HTML structure
121		-	title.innerText = dom.querySelector('head title')?.innerText;
122		-	meta.setAttribute('charset', 'utf-8');
123		-
124		-	// Put a link to the original page in the body
125		-	originalLink.setAttribute('href', url);
126		-	originalLink.setAttribute('target', '_blank');
127		-	originalLink.innerText = 'Original link';
128		-	originalLinkDiv.innerHTML = `${originalLink.outerHTML}<br>`;
129		-
130		-	// Build the HTML document
131		-	head.appendChild(title);
132		-	head.appendChild(meta);
133		-	body.appendChild(originalLinkDiv);
134		-	body.appendChild(simplifiedContainer);
135		-	html.appendChild(head);
136		-	html.appendChild(body);
137		-
138		-	// Generate a unique filename based on the URL hash
139		-	const filename = `${hashCode(url)}.html`;
140		-	const outfile = `${savePath}/${filename}`;
141		-
142		-	// Upload it as HTML to the server
143		-	await app.run({
144		-	action: 'file.write',
145		-	args: {
146		-	file: outfile,
147		-	content: html.outerHTML,
148		-	},
149		-	}, args.host);
150		-
151		-	return `${scrapeUrl}/${filename}`;
152		-	}
153		-
154		-	// Get the URL to save - either the original one, or the simplified one if
155		-	// the Reader Mode script has already been applied.
156		-	const urlToSave = await getSaveUrl();
157		-
	23	+	// Save the URL to Wallabag leveraging the Platypush API
	24	+	const title = dom.querySelector('head title')?.innerText;
158	25		const response = await app.run({
159	26		action: 'wallabag.save',
160	27		args: {
161		-	url: urlToSave,
	28	+	url: url,
	29	+	title: title,
	30	+	content: getContent(),
162	31		}
163	32		}, args.host);
164	33
165		-	// Send a notification to the user with the result of the save operation
166		-	app.notify('Wallabag Save', response.title);
167		-
168		-	// Optional, if ntfy is enabled, you can send a notification to the user
169		-	// that will be received by any client running ntfy
170		-	// app.run({
171		-	// action: 'ntfy.send_message',
172		-	// args: {
173		-	// topic: 'wallabag-save-some-random-string',
174		-	// title: 'Saved on Wallabag',
175		-	// message: response.title,
176		-	// url: response.url,
177		-	// }
178		-	// }, args.host);
	34	+	/*
	35	+	// Optional: Send a notification via ntfy
	36	+	await app.run({
	37	+	action: 'ntfy.send_message',
	38	+	args: {
	39	+	topic: ntfyTopic,
	40	+	message: response.title \|\| title,
	41	+	title: 'URL saved to Wallabag',
	42	+	url: url,
	43	+	}
	44	+	}, args.host);
	45	+	*/
179	46		}

fabio revised this gist 1749088517. Go to revision

1 file changed, 179 insertions

platypush-ext-save-link.js(file created)

		@@ -0,0 +1,179 @@
1	+	/**
2	+	* A script for the Platypush browser extension that saves the current page URL to Wallabag.
3	+	* Use together with the Reader Mode script https://gist.manganiello.tech/fabio/c731b57ff6b24d21a8f43fbedde3dc30 for best results.
4	+	*/
5	+
6	+	// Entry point for the script, which is executed when the user runs the
7	+	// associated action. All the logic should be encapsulated in this function.
8	+	async (app, args) => {
9	+	// This is the base path where the scraped pages will be saved.
10	+	// For sake of simplicity, we will save the scraped pages to a local directory
11	+	// on the same server where the Platypush service is running.
12	+	// If you want to push it to another server, you can replace the call to
13	+	// `file.write` at the bottom of the script with `ssh.put`
14	+	// (https://docs.platypush.tech/platypush/plugins/ssh.html#platypush.plugins.ssh.SshPlugin.put)
15	+	// (ensure that the `ssh` plugin is enabled on your Platypush instance).
16	+	const savePath = `/CHANGEME`;
17	+
18	+	// This is the URL where the scraped pages will be served from.
19	+	// The simplest way to configure it is to set up a web server that serves
20	+	// the files in the `savePath` directory (python -m http.server should
21	+	// suffice), and then configure a reverse proxy to point to your server -
22	+	// or even configure nginx itself to both serve the files and handle SSL.
23	+	// It is strongly recommended to use HTTPS for this URL, as Wallabag
24	+	// will probably refuse to scrape HTTP URLs.
25	+	const scrapeUrl = 'https://scraped.example.com';
26	+
27	+	// Get the page URL and DOM
28	+	const url = await app.getURL();
29	+	const dom = await app.getDOM();
30	+
31	+	// A utility function that generates a unique hash code for a given string.
32	+	// This is used to create a unique filename based on the URL.
33	+	const hashCode = (str) => {
34	+	let hash = 0;
35	+	for (let i = 0, len = str.length; i < len; i++) {
36	+	let chr = str.charCodeAt(i);
37	+	hash = (hash << 5) - hash + chr;
38	+	hash \|= 0; // Convert to 32bit integer
39	+	}
40	+	return hash;
41	+	};
42	+
43	+	// Utility functions to get the base URL and base relative URL from a given URL string.
44	+	const getBaseUrl = (urlString) => {
45	+	const url = new URL(urlString);
46	+	const protocol = url.protocol;
47	+	const hostname = url.hostname;
48	+	const port = url.port;
49	+	return `${protocol}//${hostname}${port ? ':' + port : ''}`;
50	+	};
51	+
52	+	// This function extracts the base relative URL (without the filename) from a given URL string.
53	+	const getBaseRelativeUrl = (urlString) => {
54	+	try {
55	+	let url = new URL(urlString);
56	+	let pathWithoutFilename = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1);
57	+	return url.origin + pathWithoutFilename;
58	+	} catch (error) {
59	+	return urlString;
60	+	}
61	+	};
62	+
63	+	const baseUrl = getBaseUrl(url);
64	+	const baseRelativeUrl = getBaseRelativeUrl(url);
65	+
66	+	// This function replaces relative URLs in the DOM with absolute URLs based
67	+	// on the original base URL. This is necessary to ensure that links and images
68	+	// point to the correct location when the page is saved or shared.
69	+	const replaceRelativeUrls = () => {
70	+	const relativeLinks = [...dom.querySelectorAll('a')]
71	+	.filter((a) =>
72	+	a.getAttribute('href')?.length &&
73	+	!a.getAttribute('href')?.match(/^(https?:\/\/)\|(javascript:)/)
74	+	);
75	+
76	+	const relativeImgs = [...dom.querySelectorAll('img')]
77	+	.filter((a) =>
78	+	a.getAttribute('src')?.length &&
79	+	!a.getAttribute('src')?.match(/^(https?:\/\/)\|(data:image\/)/)
80	+	);
81	+
82	+	[...relativeLinks, ...relativeImgs].forEach((el) => {
83	+	const tag = el.tagName.toLowerCase();
84	+	const attrName = tag === 'img' ? 'src' : 'href';
85	+	const attrValue = el.getAttribute(attrName);
86	+	if (attrValue?.startsWith('/')) {
87	+	el.setAttribute(attrName, `${baseUrl}${attrValue}`);
88	+	} else {
89	+	el.setAttribute(attrName, `${baseRelativeUrl}${attrValue}`);
90	+	}
91	+	});
92	+	};
93	+
94	+	// This function checks if the current DOM has already been
95	+	// simplified/distilled by the Reader Mode script. If that's the case, then
96	+	// we can directly save the simplified content on the server, and let
97	+	// Wallabag scrape that URL. This ensures that any client-side restrictions
98	+	// that may prevent Wallabag from scraping the original page are bypassed.
99	+	const getSaveUrl = async () => {
100	+	// Check if the current DOM has already been "distilled" by the Mercury script
101	+	const simplifiedContainer = dom.querySelector('.platypush__simplified-body');
102	+
103	+	// If that's not the case, save the original URL as it is
104	+	if (!simplifiedContainer) {
105	+	return url;
106	+	}
107	+
108	+	// Otherwise, upload the simplified content to a proxy
109	+	const html = document.createElement('html');
110	+	const head = document.createElement('head');
111	+	const title = document.createElement('title');
112	+	const meta = document.createElement('meta');
113	+	const body = document.createElement('body');
114	+	const originalLinkDiv = document.createElement('b');
115	+	const originalLink = document.createElement('a');
116	+
117	+	// Replace the relative URLs in the simplified content
118	+	replaceRelativeUrls();
119	+
120	+	// Set up the HTML structure
121	+	title.innerText = dom.querySelector('head title')?.innerText;
122	+	meta.setAttribute('charset', 'utf-8');
123	+
124	+	// Put a link to the original page in the body
125	+	originalLink.setAttribute('href', url);
126	+	originalLink.setAttribute('target', '_blank');
127	+	originalLink.innerText = 'Original link';
128	+	originalLinkDiv.innerHTML = `${originalLink.outerHTML}<br>`;
129	+
130	+	// Build the HTML document
131	+	head.appendChild(title);
132	+	head.appendChild(meta);
133	+	body.appendChild(originalLinkDiv);
134	+	body.appendChild(simplifiedContainer);
135	+	html.appendChild(head);
136	+	html.appendChild(body);
137	+
138	+	// Generate a unique filename based on the URL hash
139	+	const filename = `${hashCode(url)}.html`;
140	+	const outfile = `${savePath}/${filename}`;
141	+
142	+	// Upload it as HTML to the server
143	+	await app.run({
144	+	action: 'file.write',
145	+	args: {
146	+	file: outfile,
147	+	content: html.outerHTML,
148	+	},
149	+	}, args.host);
150	+
151	+	return `${scrapeUrl}/${filename}`;
152	+	}
153	+
154	+	// Get the URL to save - either the original one, or the simplified one if
155	+	// the Reader Mode script has already been applied.
156	+	const urlToSave = await getSaveUrl();
157	+
158	+	const response = await app.run({
159	+	action: 'wallabag.save',
160	+	args: {
161	+	url: urlToSave,
162	+	}
163	+	}, args.host);
164	+
165	+	// Send a notification to the user with the result of the save operation
166	+	app.notify('Wallabag Save', response.title);
167	+
168	+	// Optional, if ntfy is enabled, you can send a notification to the user
169	+	// that will be received by any client running ntfy
170	+	// app.run({
171	+	// action: 'ntfy.send_message',
172	+	// args: {
173	+	// topic: 'wallabag-save-some-random-string',
174	+	// title: 'Saved on Wallabag',
175	+	// message: response.title,
176	+	// url: response.url,
177	+	// }
178	+	// }, args.host);
179	+	}

Newer Older