D Paste by downs
Description: Danbooru downloader
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | module fetch_page; import tools.base, tools.downloader, std.file, std.path, std.string, tools.threadpool; string sub(string a, string b) { if (!a.length || a.endsWith(sep)) return a ~ b; else return a ~ sep ~ b; } static import externs; void symlink(string from, string to) { if (auto res = externs.symlink(from.toStringz(), to.toStringz())) throw new Exception(Format("Symlink ", from, " -> ", to, " failed: ", res)); } void chmod(string file, ushort mode) { if (auto res = externs.chmod(file.toStringz(), mode)) throw new Exception(Format("ChMod ", file, " -> ", mode, " failed: ", res)); } import std.regexp: sub; string sanitize(string s) { return s.sub("[&%/\\;]", "_", "g"); } string[] grep(string[] inp, string match) { return inp /select/ (string s) { return s.find(match) != -1; }; } void multi_mkdir(string path) { string prog_path; foreach (part; path.split(sep)) { prog_path = prog_path.sub(part); if (!prog_path.exists()) prog_path.mkdir(); } } import tools.log; void main(string[] args) { auto exec = args[0]; args = args[1 .. $]; quiet = true; log_threads = false; if (args.length != 3) { logln(exec, " <root folder> <from_id> <to_id>"); return; } auto folder = args[0], from = atoi(args[1]), to = atoi(args[2]); auto img_folder = "images"; auto tag_folder = "tags"; // returns the path (off folder) and filename it saved to Stuple!(string, string) downloadImage(string url) { // extract filename auto fn = url[url.rfind("/")+1 .. $]; if (fn.find("?") != -1) fn = fn[0 .. fn.find("?")]; fn = fn.sanitize(); auto my_folder = img_folder.sub(""~fn[0]); auto full_fn = folder.sub(my_folder).sub(fn); if (!full_fn.exists()) { auto data = url.download(); folder.sub(my_folder).multi_mkdir(); full_fn.write(data); full_fn.chmod(0644); } return stuple(my_folder, fn); } auto tp = new Threadpool(3); void fetch_page(int id) { auto page_url = Format("http://danbooru.donmai.us/post/show/", id); auto data = page_url.download(); auto tags = data.betweens("<a ", "</a>").grep("/post/index?tags") /map/ (string s) { return s.between(">", "").sanitize(); }; auto img_link = data.between("<a", "id=\"highres\"", GLOMP_RIGHT).between("\"", "\""); if (!img_link.length) { logln("Couldn't find image link in ", page_url); return; } string img_path, img_name; ptuple(img_path, img_name) = img_link.downloadImage(); // ,-- root folder auto local_path = "..".sub("..").sub(img_path).sub(img_name); foreach (tag; tags) { auto this_tag_folder = tag_folder.sub(tag); folder.sub(this_tag_folder).multi_mkdir(); auto tag_file = folder.sub(this_tag_folder).sub(img_name); if (!tag_file.exists()) local_path.symlink(tag_file); } logln(id, ": ", img_name, " done, ", tags.length, " tags"); } tp.mt_foreach(Range[from .. to].endIncl, (int i) { fetch_page(i); }); } |