You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

127 lines
3.8 KiB

  1. #!/usr/bin/node
  2. const fs = require('fs');
  3. const jsdom = require("jsdom");
  4. const { JSDOM } = jsdom;
  5. const fetch = require('node-fetch');
  6. const SVGO = require('svgo');
  7. const svgo = new SVGO(); //{js2svg: { pretty: true }});
  8. // Sort out input params
  9. const baseURL = process.argv[2];
  10. if(!baseURL) {
  11. console.log(`### ERROR! No base URL provided!`);
  12. console.log(`Usage: node scrape.js {url}`);
  13. process.exit(1);
  14. }
  15. // Construct a output dir from the URL
  16. let outputDir = baseURL.replace(/http:\/\/|https:\/\//g, "");
  17. outputDir = outputDir.replace(/\//g, '_');
  18. outputDir = outputDir.trim();
  19. if(outputDir.endsWith('_')) outputDir = outputDir.substring(0, outputDir.length - 1)
  20. // Create output dir
  21. try {
  22. fs.mkdirSync(outputDir, {recursive: true})
  23. } catch(e) {}
  24. console.log(`### Scraping icons from ${baseURL} ...`);
  25. runScrape(baseURL);
  26. //
  27. // Main scraping function
  28. //
  29. async function runScrape(url) {
  30. // Load the page / base URL
  31. let resp = await fetch(url);
  32. if(!resp.ok) {
  33. console.log(`### Failed to fetch ${url}`);
  34. process.exit(1)
  35. }
  36. // Get page body (HTML)
  37. let body = await resp.text()
  38. // Load and parse page into a virtual DOM document
  39. var doc = new JSDOM(body).window.document;
  40. let count = 0;
  41. //
  42. // Process inline SVGs
  43. //
  44. var svgs = doc.getElementsByTagName('svg');
  45. for(let svg of svgs) {
  46. var viewBox = null;
  47. // Default name if we can't get anything better
  48. var name = `svg-${++count}`;
  49. // If there's a slug-id we can use that as a
  50. if(svg.getAttribute('data-slug-id')) {
  51. name = svg.getAttribute('data-slug-id')
  52. }
  53. if(svg.getAttribute('viewBox')) {
  54. viewBox = svg.getAttribute('viewBox');
  55. }
  56. // If the SVG has <use> we need to handle that
  57. uses = svg.getElementsByTagName('use')
  58. for(let use of uses) {
  59. let href = use.getAttribute('xlink:href') || use.getAttribute('href');
  60. // Use is an id of another SVG element in the page
  61. if(href.startsWith('#')) {
  62. name = href.substring(1);
  63. let symbol = doc.getElementById(href.substring(1));
  64. viewBox = symbol.getAttribute('viewBox');
  65. // Remove the <use> node and insert the contents of the referred symbol
  66. svg.removeChild(use);
  67. svg.innerHTML = svg.innerHTML + symbol.innerHTML;
  68. }
  69. // Use is pointing at external URL
  70. if(href.startsWith('http')) {
  71. // NOT HANDLED - NOT EVEN SURE ITS PART OF THE SVG SPEC
  72. }
  73. }
  74. // Skip SVGs which just contain the defs
  75. if(svg.firstElementChild && svg.firstElementChild.nodeName == "defs") {
  76. continue;
  77. }
  78. console.log(`### Inline - ${name}`)
  79. let viewBoxAttr = viewBox ? `viewBox="${viewBox}"` : '';
  80. let svgContent = `<svg xmlns="http://www.w3.org/2000/svg" ${viewBoxAttr}>${svg.innerHTML}</svg>`;
  81. svgContent = (await svgo.optimize(svgContent)).data
  82. fs.writeFileSync(`${outputDir}/${name}.svg`, svgContent)
  83. }
  84. //
  85. // Process img tags
  86. //
  87. var images = doc.getElementsByTagName('img');
  88. for(let img of images) {
  89. let src = img.getAttribute('src');
  90. if (src) {
  91. let fileName = src.split('/');
  92. fileName = fileName[fileName.length - 1];
  93. if(fileName.endsWith('.jpeg') || fileName.endsWith('.jpg')) continue;
  94. if(fileName.endsWith('.png') || fileName.endsWith('.png')) continue;
  95. // Make a fetchable URL (it might be a fragment or a path)
  96. const url = new URL(src, baseURL)
  97. console.log(`### Linked - ${url.href}`);
  98. // fetch optimize and save
  99. let imgResp = await fetch(url.href)
  100. let svgContent = await imgResp.text()
  101. svgContent = (await svgo.optimize(svgContent)).data
  102. fs.writeFileSync(`${outputDir}/${fileName}`, svgContent)
  103. }
  104. }
  105. }