Last active 1749137107

fabio's Avatar fabio revised this gist 1749137107. Go to revision

1 file changed, 25 insertions, 158 deletions

platypush-ext-save-link.js

@@ -3,177 +3,44 @@
3 3 * Use together with the Reader Mode script https://gist.manganiello.tech/fabio/c731b57ff6b24d21a8f43fbedde3dc30 for best results.
4 4 */
5 5
6 +
6 7 // Entry point for the script, which is executed when the user runs the
7 8 // associated action. All the logic should be encapsulated in this function.
8 9 async (app, args) => {
9 - // This is the base path where the scraped pages will be saved.
10 - // For sake of simplicity, we will save the scraped pages to a local directory
11 - // on the same server where the Platypush service is running.
12 - // If you want to push it to another server, you can replace the call to
13 - // `file.write` at the bottom of the script with `ssh.put`
14 - // (https://docs.platypush.tech/platypush/plugins/ssh.html#platypush.plugins.ssh.SshPlugin.put)
15 - // (ensure that the `ssh` plugin is enabled on your Platypush instance).
16 - const savePath = `/CHANGEME`;
17 -
18 - // This is the URL where the scraped pages will be served from.
19 - // The simplest way to configure it is to set up a web server that serves
20 - // the files in the `savePath` directory (python -m http.server should
21 - // suffice), and then configure a reverse proxy to point to your server -
22 - // or even configure nginx itself to both serve the files and handle SSL.
23 - // It is strongly recommended to use HTTPS for this URL, as Wallabag
24 - // will probably refuse to scrape HTTP URLs.
25 - const scrapeUrl = 'https://scraped.example.com';
10 + // (Optional) topic for the ntfy notification
11 + const ntfyTopic = 'notebook-saved-links-random-suffix';
26 12
27 13 // Get the page URL and DOM
28 14 const url = await app.getURL();
29 15 const dom = await app.getDOM();
30 -
31 - // A utility function that generates a unique hash code for a given string.
32 - // This is used to create a unique filename based on the URL.
33 - const hashCode = (str) => {
34 - let hash = 0;
35 - for (let i = 0, len = str.length; i < len; i++) {
36 - let chr = str.charCodeAt(i);
37 - hash = (hash << 5) - hash + chr;
38 - hash |= 0; // Convert to 32bit integer
39 - }
40 - return hash;
41 - };
42 -
43 - // Utility functions to get the base URL and base relative URL from a given URL string.
44 - const getBaseUrl = (urlString) => {
45 - const url = new URL(urlString);
46 - const protocol = url.protocol;
47 - const hostname = url.hostname;
48 - const port = url.port;
49 - return `${protocol}//${hostname}${port ? ':' + port : ''}`;
50 - };
51 -
52 - // This function extracts the base relative URL (without the filename) from a given URL string.
53 - const getBaseRelativeUrl = (urlString) => {
54 - try {
55 - let url = new URL(urlString);
56 - let pathWithoutFilename = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1);
57 - return url.origin + pathWithoutFilename;
58 - } catch (error) {
59 - return urlString;
60 - }
61 - };
62 -
63 - const baseUrl = getBaseUrl(url);
64 - const baseRelativeUrl = getBaseRelativeUrl(url);
65 -
66 - // This function replaces relative URLs in the DOM with absolute URLs based
67 - // on the original base URL. This is necessary to ensure that links and images
68 - // point to the correct location when the page is saved or shared.
69 - const replaceRelativeUrls = () => {
70 - const relativeLinks = [...dom.querySelectorAll('a')]
71 - .filter((a) =>
72 - a.getAttribute('href')?.length &&
73 - !a.getAttribute('href')?.match(/^(https?:\/\/)|(javascript:)/)
74 - );
75 -
76 - const relativeImgs = [...dom.querySelectorAll('img')]
77 - .filter((a) =>
78 - a.getAttribute('src')?.length &&
79 - !a.getAttribute('src')?.match(/^(https?:\/\/)|(data:image\/)/)
80 - );
81 -
82 - [...relativeLinks, ...relativeImgs].forEach((el) => {
83 - const tag = el.tagName.toLowerCase();
84 - const attrName = tag === 'img' ? 'src' : 'href';
85 - const attrValue = el.getAttribute(attrName);
86 - if (attrValue?.startsWith('/')) {
87 - el.setAttribute(attrName, `${baseUrl}${attrValue}`);
88 - } else {
89 - el.setAttribute(attrName, `${baseRelativeUrl}${attrValue}`);
90 - }
91 - });
92 - };
93 -
94 - // This function checks if the current DOM has already been
95 - // simplified/distilled by the Reader Mode script. If that's the case, then
96 - // we can directly save the simplified content on the server, and let
97 - // Wallabag scrape that URL. This ensures that any client-side restrictions
98 - // that may prevent Wallabag from scraping the original page are bypassed.
99 - const getSaveUrl = async () => {
100 - // Check if the current DOM has already been "distilled" by the Mercury script
16 + const getContent = () => {
17 + // Check if the current DOM has already been "distilled" by the Mercury script.
18 + // If that's the case, use the already distilled content as the body of the saved article.
101 19 const simplifiedContainer = dom.querySelector('.platypush__simplified-body');
20 + return (simplifiedContainer || dom.querySelector('body')).innerHTML;
21 + };
102 22
103 - // If that's not the case, save the original URL as it is
104 - if (!simplifiedContainer) {
105 - return url;
106 - }
107 -
108 - // Otherwise, upload the simplified content to a proxy
109 - const html = document.createElement('html');
110 - const head = document.createElement('head');
111 - const title = document.createElement('title');
112 - const meta = document.createElement('meta');
113 - const body = document.createElement('body');
114 - const originalLinkDiv = document.createElement('b');
115 - const originalLink = document.createElement('a');
116 -
117 - // Replace the relative URLs in the simplified content
118 - replaceRelativeUrls();
119 -
120 - // Set up the HTML structure
121 - title.innerText = dom.querySelector('head title')?.innerText;
122 - meta.setAttribute('charset', 'utf-8');
123 -
124 - // Put a link to the original page in the body
125 - originalLink.setAttribute('href', url);
126 - originalLink.setAttribute('target', '_blank');
127 - originalLink.innerText = 'Original link';
128 - originalLinkDiv.innerHTML = `${originalLink.outerHTML}<br>`;
129 -
130 - // Build the HTML document
131 - head.appendChild(title);
132 - head.appendChild(meta);
133 - body.appendChild(originalLinkDiv);
134 - body.appendChild(simplifiedContainer);
135 - html.appendChild(head);
136 - html.appendChild(body);
137 -
138 - // Generate a unique filename based on the URL hash
139 - const filename = `${hashCode(url)}.html`;
140 - const outfile = `${savePath}/${filename}`;
141 -
142 - // Upload it as HTML to the server
143 - await app.run({
144 - action: 'file.write',
145 - args: {
146 - file: outfile,
147 - content: html.outerHTML,
148 - },
149 - }, args.host);
150 -
151 - return `${scrapeUrl}/${filename}`;
152 - }
153 -
154 - // Get the URL to save - either the original one, or the simplified one if
155 - // the Reader Mode script has already been applied.
156 - const urlToSave = await getSaveUrl();
157 -
23 + // Save the URL to Wallabag leveraging the Platypush API
24 + const title = dom.querySelector('head title')?.innerText;
158 25 const response = await app.run({
159 26 action: 'wallabag.save',
160 27 args: {
161 - url: urlToSave,
28 + url: url,
29 + title: title,
30 + content: getContent(),
162 31 }
163 32 }, args.host);
164 33
165 - // Send a notification to the user with the result of the save operation
166 - app.notify('Wallabag Save', response.title);
167 -
168 - // Optional, if ntfy is enabled, you can send a notification to the user
169 - // that will be received by any client running ntfy
170 - // app.run({
171 - // action: 'ntfy.send_message',
172 - // args: {
173 - // topic: 'wallabag-save-some-random-string',
174 - // title: 'Saved on Wallabag',
175 - // message: response.title,
176 - // url: response.url,
177 - // }
178 - // }, args.host);
34 + /*
35 + // Optional: Send a notification via ntfy
36 + await app.run({
37 + action: 'ntfy.send_message',
38 + args: {
39 + topic: ntfyTopic,
40 + message: response.title || title,
41 + title: 'URL saved to Wallabag',
42 + url: url,
43 + }
44 + }, args.host);
45 + */
179 46 }

fabio's Avatar fabio revised this gist 1749088517. Go to revision

1 file changed, 179 insertions

platypush-ext-save-link.js(file created)

@@ -0,0 +1,179 @@
1 + /**
2 + * A script for the Platypush browser extension that saves the current page URL to Wallabag.
3 + * Use together with the Reader Mode script https://gist.manganiello.tech/fabio/c731b57ff6b24d21a8f43fbedde3dc30 for best results.
4 + */
5 +
6 + // Entry point for the script, which is executed when the user runs the
7 + // associated action. All the logic should be encapsulated in this function.
8 + async (app, args) => {
9 + // This is the base path where the scraped pages will be saved.
10 + // For sake of simplicity, we will save the scraped pages to a local directory
11 + // on the same server where the Platypush service is running.
12 + // If you want to push it to another server, you can replace the call to
13 + // `file.write` at the bottom of the script with `ssh.put`
14 + // (https://docs.platypush.tech/platypush/plugins/ssh.html#platypush.plugins.ssh.SshPlugin.put)
15 + // (ensure that the `ssh` plugin is enabled on your Platypush instance).
16 + const savePath = `/CHANGEME`;
17 +
18 + // This is the URL where the scraped pages will be served from.
19 + // The simplest way to configure it is to set up a web server that serves
20 + // the files in the `savePath` directory (python -m http.server should
21 + // suffice), and then configure a reverse proxy to point to your server -
22 + // or even configure nginx itself to both serve the files and handle SSL.
23 + // It is strongly recommended to use HTTPS for this URL, as Wallabag
24 + // will probably refuse to scrape HTTP URLs.
25 + const scrapeUrl = 'https://scraped.example.com';
26 +
27 + // Get the page URL and DOM
28 + const url = await app.getURL();
29 + const dom = await app.getDOM();
30 +
31 + // A utility function that generates a unique hash code for a given string.
32 + // This is used to create a unique filename based on the URL.
33 + const hashCode = (str) => {
34 + let hash = 0;
35 + for (let i = 0, len = str.length; i < len; i++) {
36 + let chr = str.charCodeAt(i);
37 + hash = (hash << 5) - hash + chr;
38 + hash |= 0; // Convert to 32bit integer
39 + }
40 + return hash;
41 + };
42 +
43 + // Utility functions to get the base URL and base relative URL from a given URL string.
44 + const getBaseUrl = (urlString) => {
45 + const url = new URL(urlString);
46 + const protocol = url.protocol;
47 + const hostname = url.hostname;
48 + const port = url.port;
49 + return `${protocol}//${hostname}${port ? ':' + port : ''}`;
50 + };
51 +
52 + // This function extracts the base relative URL (without the filename) from a given URL string.
53 + const getBaseRelativeUrl = (urlString) => {
54 + try {
55 + let url = new URL(urlString);
56 + let pathWithoutFilename = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1);
57 + return url.origin + pathWithoutFilename;
58 + } catch (error) {
59 + return urlString;
60 + }
61 + };
62 +
63 + const baseUrl = getBaseUrl(url);
64 + const baseRelativeUrl = getBaseRelativeUrl(url);
65 +
66 + // This function replaces relative URLs in the DOM with absolute URLs based
67 + // on the original base URL. This is necessary to ensure that links and images
68 + // point to the correct location when the page is saved or shared.
69 + const replaceRelativeUrls = () => {
70 + const relativeLinks = [...dom.querySelectorAll('a')]
71 + .filter((a) =>
72 + a.getAttribute('href')?.length &&
73 + !a.getAttribute('href')?.match(/^(https?:\/\/)|(javascript:)/)
74 + );
75 +
76 + const relativeImgs = [...dom.querySelectorAll('img')]
77 + .filter((a) =>
78 + a.getAttribute('src')?.length &&
79 + !a.getAttribute('src')?.match(/^(https?:\/\/)|(data:image\/)/)
80 + );
81 +
82 + [...relativeLinks, ...relativeImgs].forEach((el) => {
83 + const tag = el.tagName.toLowerCase();
84 + const attrName = tag === 'img' ? 'src' : 'href';
85 + const attrValue = el.getAttribute(attrName);
86 + if (attrValue?.startsWith('/')) {
87 + el.setAttribute(attrName, `${baseUrl}${attrValue}`);
88 + } else {
89 + el.setAttribute(attrName, `${baseRelativeUrl}${attrValue}`);
90 + }
91 + });
92 + };
93 +
94 + // This function checks if the current DOM has already been
95 + // simplified/distilled by the Reader Mode script. If that's the case, then
96 + // we can directly save the simplified content on the server, and let
97 + // Wallabag scrape that URL. This ensures that any client-side restrictions
98 + // that may prevent Wallabag from scraping the original page are bypassed.
99 + const getSaveUrl = async () => {
100 + // Check if the current DOM has already been "distilled" by the Mercury script
101 + const simplifiedContainer = dom.querySelector('.platypush__simplified-body');
102 +
103 + // If that's not the case, save the original URL as it is
104 + if (!simplifiedContainer) {
105 + return url;
106 + }
107 +
108 + // Otherwise, upload the simplified content to a proxy
109 + const html = document.createElement('html');
110 + const head = document.createElement('head');
111 + const title = document.createElement('title');
112 + const meta = document.createElement('meta');
113 + const body = document.createElement('body');
114 + const originalLinkDiv = document.createElement('b');
115 + const originalLink = document.createElement('a');
116 +
117 + // Replace the relative URLs in the simplified content
118 + replaceRelativeUrls();
119 +
120 + // Set up the HTML structure
121 + title.innerText = dom.querySelector('head title')?.innerText;
122 + meta.setAttribute('charset', 'utf-8');
123 +
124 + // Put a link to the original page in the body
125 + originalLink.setAttribute('href', url);
126 + originalLink.setAttribute('target', '_blank');
127 + originalLink.innerText = 'Original link';
128 + originalLinkDiv.innerHTML = `${originalLink.outerHTML}<br>`;
129 +
130 + // Build the HTML document
131 + head.appendChild(title);
132 + head.appendChild(meta);
133 + body.appendChild(originalLinkDiv);
134 + body.appendChild(simplifiedContainer);
135 + html.appendChild(head);
136 + html.appendChild(body);
137 +
138 + // Generate a unique filename based on the URL hash
139 + const filename = `${hashCode(url)}.html`;
140 + const outfile = `${savePath}/${filename}`;
141 +
142 + // Upload it as HTML to the server
143 + await app.run({
144 + action: 'file.write',
145 + args: {
146 + file: outfile,
147 + content: html.outerHTML,
148 + },
149 + }, args.host);
150 +
151 + return `${scrapeUrl}/${filename}`;
152 + }
153 +
154 + // Get the URL to save - either the original one, or the simplified one if
155 + // the Reader Mode script has already been applied.
156 + const urlToSave = await getSaveUrl();
157 +
158 + const response = await app.run({
159 + action: 'wallabag.save',
160 + args: {
161 + url: urlToSave,
162 + }
163 + }, args.host);
164 +
165 + // Send a notification to the user with the result of the save operation
166 + app.notify('Wallabag Save', response.title);
167 +
168 + // Optional, if ntfy is enabled, you can send a notification to the user
169 + // that will be received by any client running ntfy
170 + // app.run({
171 + // action: 'ntfy.send_message',
172 + // args: {
173 + // topic: 'wallabag-save-some-random-string',
174 + // title: 'Saved on Wallabag',
175 + // message: response.title,
176 + // url: response.url,
177 + // }
178 + // }, args.host);
179 + }
Newer Older