You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

99 lines
2.9 KiB

  1. #!/usr/bin/node
  2. const fs = require('fs');
  3. const request = require('request');
  4. const jsdom = require("jsdom");
  5. const { JSDOM } = jsdom;
  6. var BASEURL = process.argv[2];
  7. if(!BASEURL) {
  8. console.log(`### ERROR! No base URL provided!`);
  9. console.log(`Usage: node scrape.js {URL to scrape REQUIRED}`);
  10. process.exit(1);
  11. }
  12. OUTPUT = BASEURL.replace(/http:\/\/|https:\/\//g, "");
  13. OUTPUT = OUTPUT.replace(/\//g, '_');
  14. OUTPUT = OUTPUT.trim();
  15. if(OUTPUT.endsWith('_')) OUTPUT = OUTPUT.substring(0, OUTPUT.length - 1)
  16. try {
  17. fs.mkdirSync(OUTPUT, {recursive: true})
  18. } catch(e) {}
  19. console.log(`### Scraping icons from ${BASEURL} ...`);
  20. // Fetch page HTML from server
  21. request.get(BASEURL, (err, resp, body) => {
  22. if(err) {
  23. console.log(`### ERROR! Loading page - ${err}`);
  24. return;
  25. }
  26. // Load and parse into a virtual DOM document
  27. var doc = new JSDOM(body).window.document;
  28. let count = 0;
  29. var viewBox = null;
  30. // Process inline SVGs
  31. var svgs = doc.getElementsByTagName('svg');
  32. for(let svg of svgs) {
  33. var name = `svg-${++count}`;
  34. // If the SVG has <use> we need to handle that
  35. uses = svg.getElementsByTagName('use')
  36. for(let use of uses) {
  37. let href = use.getAttribute('xlink:href') || use.getAttribute('href');
  38. // Use is an id of another SVG element in the page
  39. if(href.startsWith('#')) {
  40. name = href.substring(1);
  41. let symbol = doc.getElementById(href.substring(1));
  42. viewBox = symbol.getAttribute('viewBox');
  43. //console.log(viewbox);
  44. // Remove the <use> node and insert the contents of the referred symbol
  45. svg.removeChild(use);
  46. svg.innerHTML = svg.innerHTML + symbol.innerHTML;
  47. }
  48. // Use is pointing at external URL
  49. if(href.startsWith('http')) {
  50. // NOT HANDLED - NOT EVEN SURE ITS PART OF THE SVG SPEC
  51. }
  52. }
  53. // Skip SVGs which just contain the defs
  54. if(svg.firstElementChild && svg.firstElementChild.nodeName == "defs") {
  55. continue;
  56. }
  57. console.log(`### - ${name} (Inline SVG)`)
  58. let viewBoxAttr = viewBox ? `viewBox="${viewBox}"` : "";
  59. let svgContent = `<svg xmlns="http://www.w3.org/2000/svg" ${viewBoxAttr}>${svg.innerHTML}</svg>`;
  60. fs.writeFileSync(`${OUTPUT}/${name}.svg`, svgContent)
  61. }
  62. // Process images
  63. var images = doc.getElementsByTagName('img');
  64. for(let img of images) {
  65. let src = img.getAttribute('src');
  66. if (src) {
  67. let fileName = src.split('/');
  68. fileName = fileName[fileName.length - 1];
  69. if(fileName.endsWith('.jpeg') || fileName.endsWith('.jpg')) continue;
  70. console.log(`### - ${fileName}`);
  71. if (src.startsWith('//')) src = 'https:' + src;
  72. if (!src.startsWith('http')) src = BASEURL + "/" + src;
  73. // Fetch image file and write to disk
  74. request.get(src)
  75. .pipe(fs.createWriteStream(`${OUTPUT}/${fileName}`))
  76. .on('error', err => {
  77. console.log(`### ERROR! Unable to fetch: ${err}`);
  78. })
  79. }
  80. }
  81. })