fabio revised this gist . Go to revision
1 file changed, 25 insertions, 158 deletions
platypush-ext-save-link.js
| @@ -3,177 +3,44 @@ | |||
| 3 | 3 | * Use together with the Reader Mode script https://gist.manganiello.tech/fabio/c731b57ff6b24d21a8f43fbedde3dc30 for best results. | |
| 4 | 4 | */ | |
| 5 | 5 | ||
| 6 | + | ||
| 6 | 7 | // Entry point for the script, which is executed when the user runs the | |
| 7 | 8 | // associated action. All the logic should be encapsulated in this function. | |
| 8 | 9 | async (app, args) => { | |
| 9 | - | // This is the base path where the scraped pages will be saved. | |
| 10 | - | // For sake of simplicity, we will save the scraped pages to a local directory | |
| 11 | - | // on the same server where the Platypush service is running. | |
| 12 | - | // If you want to push it to another server, you can replace the call to | |
| 13 | - | // `file.write` at the bottom of the script with `ssh.put` | |
| 14 | - | // (https://docs.platypush.tech/platypush/plugins/ssh.html#platypush.plugins.ssh.SshPlugin.put) | |
| 15 | - | // (ensure that the `ssh` plugin is enabled on your Platypush instance). | |
| 16 | - | const savePath = `/CHANGEME`; | |
| 17 | - | ||
| 18 | - | // This is the URL where the scraped pages will be served from. | |
| 19 | - | // The simplest way to configure it is to set up a web server that serves | |
| 20 | - | // the files in the `savePath` directory (python -m http.server should | |
| 21 | - | // suffice), and then configure a reverse proxy to point to your server - | |
| 22 | - | // or even configure nginx itself to both serve the files and handle SSL. | |
| 23 | - | // It is strongly recommended to use HTTPS for this URL, as Wallabag | |
| 24 | - | // will probably refuse to scrape HTTP URLs. | |
| 25 | - | const scrapeUrl = 'https://scraped.example.com'; | |
| 10 | + | // (Optional) topic for the ntfy notification | |
| 11 | + | const ntfyTopic = 'notebook-saved-links-random-suffix'; | |
| 26 | 12 | ||
| 27 | 13 | // Get the page URL and DOM | |
| 28 | 14 | const url = await app.getURL(); | |
| 29 | 15 | const dom = await app.getDOM(); | |
| 30 | - | ||
| 31 | - | // A utility function that generates a unique hash code for a given string. | |
| 32 | - | // This is used to create a unique filename based on the URL. | |
| 33 | - | const hashCode = (str) => { | |
| 34 | - | let hash = 0; | |
| 35 | - | for (let i = 0, len = str.length; i < len; i++) { | |
| 36 | - | let chr = str.charCodeAt(i); | |
| 37 | - | hash = (hash << 5) - hash + chr; | |
| 38 | - | hash |= 0; // Convert to 32bit integer | |
| 39 | - | } | |
| 40 | - | return hash; | |
| 41 | - | }; | |
| 42 | - | ||
| 43 | - | // Utility functions to get the base URL and base relative URL from a given URL string. | |
| 44 | - | const getBaseUrl = (urlString) => { | |
| 45 | - | const url = new URL(urlString); | |
| 46 | - | const protocol = url.protocol; | |
| 47 | - | const hostname = url.hostname; | |
| 48 | - | const port = url.port; | |
| 49 | - | return `${protocol}//${hostname}${port ? ':' + port : ''}`; | |
| 50 | - | }; | |
| 51 | - | ||
| 52 | - | // This function extracts the base relative URL (without the filename) from a given URL string. | |
| 53 | - | const getBaseRelativeUrl = (urlString) => { | |
| 54 | - | try { | |
| 55 | - | let url = new URL(urlString); | |
| 56 | - | let pathWithoutFilename = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1); | |
| 57 | - | return url.origin + pathWithoutFilename; | |
| 58 | - | } catch (error) { | |
| 59 | - | return urlString; | |
| 60 | - | } | |
| 61 | - | }; | |
| 62 | - | ||
| 63 | - | const baseUrl = getBaseUrl(url); | |
| 64 | - | const baseRelativeUrl = getBaseRelativeUrl(url); | |
| 65 | - | ||
| 66 | - | // This function replaces relative URLs in the DOM with absolute URLs based | |
| 67 | - | // on the original base URL. This is necessary to ensure that links and images | |
| 68 | - | // point to the correct location when the page is saved or shared. | |
| 69 | - | const replaceRelativeUrls = () => { | |
| 70 | - | const relativeLinks = [...dom.querySelectorAll('a')] | |
| 71 | - | .filter((a) => | |
| 72 | - | a.getAttribute('href')?.length && | |
| 73 | - | !a.getAttribute('href')?.match(/^(https?:\/\/)|(javascript:)/) | |
| 74 | - | ); | |
| 75 | - | ||
| 76 | - | const relativeImgs = [...dom.querySelectorAll('img')] | |
| 77 | - | .filter((a) => | |
| 78 | - | a.getAttribute('src')?.length && | |
| 79 | - | !a.getAttribute('src')?.match(/^(https?:\/\/)|(data:image\/)/) | |
| 80 | - | ); | |
| 81 | - | ||
| 82 | - | [...relativeLinks, ...relativeImgs].forEach((el) => { | |
| 83 | - | const tag = el.tagName.toLowerCase(); | |
| 84 | - | const attrName = tag === 'img' ? 'src' : 'href'; | |
| 85 | - | const attrValue = el.getAttribute(attrName); | |
| 86 | - | if (attrValue?.startsWith('/')) { | |
| 87 | - | el.setAttribute(attrName, `${baseUrl}${attrValue}`); | |
| 88 | - | } else { | |
| 89 | - | el.setAttribute(attrName, `${baseRelativeUrl}${attrValue}`); | |
| 90 | - | } | |
| 91 | - | }); | |
| 92 | - | }; | |
| 93 | - | ||
| 94 | - | // This function checks if the current DOM has already been | |
| 95 | - | // simplified/distilled by the Reader Mode script. If that's the case, then | |
| 96 | - | // we can directly save the simplified content on the server, and let | |
| 97 | - | // Wallabag scrape that URL. This ensures that any client-side restrictions | |
| 98 | - | // that may prevent Wallabag from scraping the original page are bypassed. | |
| 99 | - | const getSaveUrl = async () => { | |
| 100 | - | // Check if the current DOM has already been "distilled" by the Mercury script | |
| 16 | + | const getContent = () => { | |
| 17 | + | // Check if the current DOM has already been "distilled" by the Mercury script. | |
| 18 | + | // If that's the case, use the already distilled content as the body of the saved article. | |
| 101 | 19 | const simplifiedContainer = dom.querySelector('.platypush__simplified-body'); | |
| 20 | + | return (simplifiedContainer || dom.querySelector('body')).innerHTML; | |
| 21 | + | }; | |
| 102 | 22 | ||
| 103 | - | // If that's not the case, save the original URL as it is | |
| 104 | - | if (!simplifiedContainer) { | |
| 105 | - | return url; | |
| 106 | - | } | |
| 107 | - | ||
| 108 | - | // Otherwise, upload the simplified content to a proxy | |
| 109 | - | const html = document.createElement('html'); | |
| 110 | - | const head = document.createElement('head'); | |
| 111 | - | const title = document.createElement('title'); | |
| 112 | - | const meta = document.createElement('meta'); | |
| 113 | - | const body = document.createElement('body'); | |
| 114 | - | const originalLinkDiv = document.createElement('b'); | |
| 115 | - | const originalLink = document.createElement('a'); | |
| 116 | - | ||
| 117 | - | // Replace the relative URLs in the simplified content | |
| 118 | - | replaceRelativeUrls(); | |
| 119 | - | ||
| 120 | - | // Set up the HTML structure | |
| 121 | - | title.innerText = dom.querySelector('head title')?.innerText; | |
| 122 | - | meta.setAttribute('charset', 'utf-8'); | |
| 123 | - | ||
| 124 | - | // Put a link to the original page in the body | |
| 125 | - | originalLink.setAttribute('href', url); | |
| 126 | - | originalLink.setAttribute('target', '_blank'); | |
| 127 | - | originalLink.innerText = 'Original link'; | |
| 128 | - | originalLinkDiv.innerHTML = `${originalLink.outerHTML}<br>`; | |
| 129 | - | ||
| 130 | - | // Build the HTML document | |
| 131 | - | head.appendChild(title); | |
| 132 | - | head.appendChild(meta); | |
| 133 | - | body.appendChild(originalLinkDiv); | |
| 134 | - | body.appendChild(simplifiedContainer); | |
| 135 | - | html.appendChild(head); | |
| 136 | - | html.appendChild(body); | |
| 137 | - | ||
| 138 | - | // Generate a unique filename based on the URL hash | |
| 139 | - | const filename = `${hashCode(url)}.html`; | |
| 140 | - | const outfile = `${savePath}/${filename}`; | |
| 141 | - | ||
| 142 | - | // Upload it as HTML to the server | |
| 143 | - | await app.run({ | |
| 144 | - | action: 'file.write', | |
| 145 | - | args: { | |
| 146 | - | file: outfile, | |
| 147 | - | content: html.outerHTML, | |
| 148 | - | }, | |
| 149 | - | }, args.host); | |
| 150 | - | ||
| 151 | - | return `${scrapeUrl}/${filename}`; | |
| 152 | - | } | |
| 153 | - | ||
| 154 | - | // Get the URL to save - either the original one, or the simplified one if | |
| 155 | - | // the Reader Mode script has already been applied. | |
| 156 | - | const urlToSave = await getSaveUrl(); | |
| 157 | - | ||
| 23 | + | // Save the URL to Wallabag leveraging the Platypush API | |
| 24 | + | const title = dom.querySelector('head title')?.innerText; | |
| 158 | 25 | const response = await app.run({ | |
| 159 | 26 | action: 'wallabag.save', | |
| 160 | 27 | args: { | |
| 161 | - | url: urlToSave, | |
| 28 | + | url: url, | |
| 29 | + | title: title, | |
| 30 | + | content: getContent(), | |
| 162 | 31 | } | |
| 163 | 32 | }, args.host); | |
| 164 | 33 | ||
| 165 | - | // Send a notification to the user with the result of the save operation | |
| 166 | - | app.notify('Wallabag Save', response.title); | |
| 167 | - | ||
| 168 | - | // Optional, if ntfy is enabled, you can send a notification to the user | |
| 169 | - | // that will be received by any client running ntfy | |
| 170 | - | // app.run({ | |
| 171 | - | // action: 'ntfy.send_message', | |
| 172 | - | // args: { | |
| 173 | - | // topic: 'wallabag-save-some-random-string', | |
| 174 | - | // title: 'Saved on Wallabag', | |
| 175 | - | // message: response.title, | |
| 176 | - | // url: response.url, | |
| 177 | - | // } | |
| 178 | - | // }, args.host); | |
| 34 | + | /* | |
| 35 | + | // Optional: Send a notification via ntfy | |
| 36 | + | await app.run({ | |
| 37 | + | action: 'ntfy.send_message', | |
| 38 | + | args: { | |
| 39 | + | topic: ntfyTopic, | |
| 40 | + | message: response.title || title, | |
| 41 | + | title: 'URL saved to Wallabag', | |
| 42 | + | url: url, | |
| 43 | + | } | |
| 44 | + | }, args.host); | |
| 45 | + | */ | |
| 179 | 46 | } | |
fabio revised this gist . Go to revision
1 file changed, 179 insertions
platypush-ext-save-link.js(file created)
| @@ -0,0 +1,179 @@ | |||
| 1 | + | /** | |
| 2 | + | * A script for the Platypush browser extension that saves the current page URL to Wallabag. | |
| 3 | + | * Use together with the Reader Mode script https://gist.manganiello.tech/fabio/c731b57ff6b24d21a8f43fbedde3dc30 for best results. | |
| 4 | + | */ | |
| 5 | + | ||
| 6 | + | // Entry point for the script, which is executed when the user runs the | |
| 7 | + | // associated action. All the logic should be encapsulated in this function. | |
| 8 | + | async (app, args) => { | |
| 9 | + | // This is the base path where the scraped pages will be saved. | |
| 10 | + | // For sake of simplicity, we will save the scraped pages to a local directory | |
| 11 | + | // on the same server where the Platypush service is running. | |
| 12 | + | // If you want to push it to another server, you can replace the call to | |
| 13 | + | // `file.write` at the bottom of the script with `ssh.put` | |
| 14 | + | // (https://docs.platypush.tech/platypush/plugins/ssh.html#platypush.plugins.ssh.SshPlugin.put) | |
| 15 | + | // (ensure that the `ssh` plugin is enabled on your Platypush instance). | |
| 16 | + | const savePath = `/CHANGEME`; | |
| 17 | + | ||
| 18 | + | // This is the URL where the scraped pages will be served from. | |
| 19 | + | // The simplest way to configure it is to set up a web server that serves | |
| 20 | + | // the files in the `savePath` directory (python -m http.server should | |
| 21 | + | // suffice), and then configure a reverse proxy to point to your server - | |
| 22 | + | // or even configure nginx itself to both serve the files and handle SSL. | |
| 23 | + | // It is strongly recommended to use HTTPS for this URL, as Wallabag | |
| 24 | + | // will probably refuse to scrape HTTP URLs. | |
| 25 | + | const scrapeUrl = 'https://scraped.example.com'; | |
| 26 | + | ||
| 27 | + | // Get the page URL and DOM | |
| 28 | + | const url = await app.getURL(); | |
| 29 | + | const dom = await app.getDOM(); | |
| 30 | + | ||
| 31 | + | // A utility function that generates a unique hash code for a given string. | |
| 32 | + | // This is used to create a unique filename based on the URL. | |
| 33 | + | const hashCode = (str) => { | |
| 34 | + | let hash = 0; | |
| 35 | + | for (let i = 0, len = str.length; i < len; i++) { | |
| 36 | + | let chr = str.charCodeAt(i); | |
| 37 | + | hash = (hash << 5) - hash + chr; | |
| 38 | + | hash |= 0; // Convert to 32bit integer | |
| 39 | + | } | |
| 40 | + | return hash; | |
| 41 | + | }; | |
| 42 | + | ||
| 43 | + | // Utility functions to get the base URL and base relative URL from a given URL string. | |
| 44 | + | const getBaseUrl = (urlString) => { | |
| 45 | + | const url = new URL(urlString); | |
| 46 | + | const protocol = url.protocol; | |
| 47 | + | const hostname = url.hostname; | |
| 48 | + | const port = url.port; | |
| 49 | + | return `${protocol}//${hostname}${port ? ':' + port : ''}`; | |
| 50 | + | }; | |
| 51 | + | ||
| 52 | + | // This function extracts the base relative URL (without the filename) from a given URL string. | |
| 53 | + | const getBaseRelativeUrl = (urlString) => { | |
| 54 | + | try { | |
| 55 | + | let url = new URL(urlString); | |
| 56 | + | let pathWithoutFilename = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1); | |
| 57 | + | return url.origin + pathWithoutFilename; | |
| 58 | + | } catch (error) { | |
| 59 | + | return urlString; | |
| 60 | + | } | |
| 61 | + | }; | |
| 62 | + | ||
| 63 | + | const baseUrl = getBaseUrl(url); | |
| 64 | + | const baseRelativeUrl = getBaseRelativeUrl(url); | |
| 65 | + | ||
| 66 | + | // This function replaces relative URLs in the DOM with absolute URLs based | |
| 67 | + | // on the original base URL. This is necessary to ensure that links and images | |
| 68 | + | // point to the correct location when the page is saved or shared. | |
| 69 | + | const replaceRelativeUrls = () => { | |
| 70 | + | const relativeLinks = [...dom.querySelectorAll('a')] | |
| 71 | + | .filter((a) => | |
| 72 | + | a.getAttribute('href')?.length && | |
| 73 | + | !a.getAttribute('href')?.match(/^(https?:\/\/)|(javascript:)/) | |
| 74 | + | ); | |
| 75 | + | ||
| 76 | + | const relativeImgs = [...dom.querySelectorAll('img')] | |
| 77 | + | .filter((a) => | |
| 78 | + | a.getAttribute('src')?.length && | |
| 79 | + | !a.getAttribute('src')?.match(/^(https?:\/\/)|(data:image\/)/) | |
| 80 | + | ); | |
| 81 | + | ||
| 82 | + | [...relativeLinks, ...relativeImgs].forEach((el) => { | |
| 83 | + | const tag = el.tagName.toLowerCase(); | |
| 84 | + | const attrName = tag === 'img' ? 'src' : 'href'; | |
| 85 | + | const attrValue = el.getAttribute(attrName); | |
| 86 | + | if (attrValue?.startsWith('/')) { | |
| 87 | + | el.setAttribute(attrName, `${baseUrl}${attrValue}`); | |
| 88 | + | } else { | |
| 89 | + | el.setAttribute(attrName, `${baseRelativeUrl}${attrValue}`); | |
| 90 | + | } | |
| 91 | + | }); | |
| 92 | + | }; | |
| 93 | + | ||
| 94 | + | // This function checks if the current DOM has already been | |
| 95 | + | // simplified/distilled by the Reader Mode script. If that's the case, then | |
| 96 | + | // we can directly save the simplified content on the server, and let | |
| 97 | + | // Wallabag scrape that URL. This ensures that any client-side restrictions | |
| 98 | + | // that may prevent Wallabag from scraping the original page are bypassed. | |
| 99 | + | const getSaveUrl = async () => { | |
| 100 | + | // Check if the current DOM has already been "distilled" by the Mercury script | |
| 101 | + | const simplifiedContainer = dom.querySelector('.platypush__simplified-body'); | |
| 102 | + | ||
| 103 | + | // If that's not the case, save the original URL as it is | |
| 104 | + | if (!simplifiedContainer) { | |
| 105 | + | return url; | |
| 106 | + | } | |
| 107 | + | ||
| 108 | + | // Otherwise, upload the simplified content to a proxy | |
| 109 | + | const html = document.createElement('html'); | |
| 110 | + | const head = document.createElement('head'); | |
| 111 | + | const title = document.createElement('title'); | |
| 112 | + | const meta = document.createElement('meta'); | |
| 113 | + | const body = document.createElement('body'); | |
| 114 | + | const originalLinkDiv = document.createElement('b'); | |
| 115 | + | const originalLink = document.createElement('a'); | |
| 116 | + | ||
| 117 | + | // Replace the relative URLs in the simplified content | |
| 118 | + | replaceRelativeUrls(); | |
| 119 | + | ||
| 120 | + | // Set up the HTML structure | |
| 121 | + | title.innerText = dom.querySelector('head title')?.innerText; | |
| 122 | + | meta.setAttribute('charset', 'utf-8'); | |
| 123 | + | ||
| 124 | + | // Put a link to the original page in the body | |
| 125 | + | originalLink.setAttribute('href', url); | |
| 126 | + | originalLink.setAttribute('target', '_blank'); | |
| 127 | + | originalLink.innerText = 'Original link'; | |
| 128 | + | originalLinkDiv.innerHTML = `${originalLink.outerHTML}<br>`; | |
| 129 | + | ||
| 130 | + | // Build the HTML document | |
| 131 | + | head.appendChild(title); | |
| 132 | + | head.appendChild(meta); | |
| 133 | + | body.appendChild(originalLinkDiv); | |
| 134 | + | body.appendChild(simplifiedContainer); | |
| 135 | + | html.appendChild(head); | |
| 136 | + | html.appendChild(body); | |
| 137 | + | ||
| 138 | + | // Generate a unique filename based on the URL hash | |
| 139 | + | const filename = `${hashCode(url)}.html`; | |
| 140 | + | const outfile = `${savePath}/${filename}`; | |
| 141 | + | ||
| 142 | + | // Upload it as HTML to the server | |
| 143 | + | await app.run({ | |
| 144 | + | action: 'file.write', | |
| 145 | + | args: { | |
| 146 | + | file: outfile, | |
| 147 | + | content: html.outerHTML, | |
| 148 | + | }, | |
| 149 | + | }, args.host); | |
| 150 | + | ||
| 151 | + | return `${scrapeUrl}/${filename}`; | |
| 152 | + | } | |
| 153 | + | ||
| 154 | + | // Get the URL to save - either the original one, or the simplified one if | |
| 155 | + | // the Reader Mode script has already been applied. | |
| 156 | + | const urlToSave = await getSaveUrl(); | |
| 157 | + | ||
| 158 | + | const response = await app.run({ | |
| 159 | + | action: 'wallabag.save', | |
| 160 | + | args: { | |
| 161 | + | url: urlToSave, | |
| 162 | + | } | |
| 163 | + | }, args.host); | |
| 164 | + | ||
| 165 | + | // Send a notification to the user with the result of the save operation | |
| 166 | + | app.notify('Wallabag Save', response.title); | |
| 167 | + | ||
| 168 | + | // Optional, if ntfy is enabled, you can send a notification to the user | |
| 169 | + | // that will be received by any client running ntfy | |
| 170 | + | // app.run({ | |
| 171 | + | // action: 'ntfy.send_message', | |
| 172 | + | // args: { | |
| 173 | + | // topic: 'wallabag-save-some-random-string', | |
| 174 | + | // title: 'Saved on Wallabag', | |
| 175 | + | // message: response.title, | |
| 176 | + | // url: response.url, | |
| 177 | + | // } | |
| 178 | + | // }, args.host); | |
| 179 | + | } | |