D Paste by downs
Description: Danbooru downloader
Hide line numbers

Create new paste
Post a reply
View replies

Paste:
1  
2  
3  
4  
5  
6  
7  
8  
9  
10  
11  
12  
13  
14  
15  
16  
17  
18  
19  
20  
21  
22  
23  
24  
25  
26  
27  
28  
29  
30  
31  
32  
33  
34  
35  
36  
37  
38  
39  
40  
41  
42  
43  
44  
45  
46  
47  
48  
49  
50  
51  
52  
53  
54  
55  
56  
57  
58  
59  
60  
61  
62  
63  
64  
65  
66  
67  
68  
69  
70  
71  
72  
73  
74  
75  
76  
77  
78  
79  
80  
81  
82  
83  
84  
85  
86  
87  
88  
89  
90  
91  
module fetch_page;

import tools.base, tools.downloader,
    std.file, std.path, std.string, tools.threadpool;
  
string sub(string a, string b) { if (!a.length || a.endsWith(sep)) return a ~ b; else return a ~ sep ~ b; }

static import externs;
void symlink(string from, string to) {
  if (auto res = externs.symlink(from.toStringz(), to.toStringz()))
    throw new Exception(Format("Symlink ", from, " -> ", to, " failed: ", res));
}
void chmod(string file, ushort mode) {
  if (auto res = externs.chmod(file.toStringz(), mode))
    throw new Exception(Format("ChMod ", file, " -> ", mode, " failed: ", res));
}

import std.regexp: sub;
string sanitize(string s) {
  return s.sub("[&%/\\;]", "_", "g");
}

string[] grep(string[] inp, string match) {
  return inp /select/ (string s) { return s.find(match) != -1; };
}

void multi_mkdir(string path) {
  string prog_path;
  foreach (part; path.split(sep)) {
    prog_path = prog_path.sub(part);
    if (!prog_path.exists()) prog_path.mkdir();
  }
}

import tools.log;
void main(string[] args) {
  auto exec = args[0]; args = args[1 .. $];
  quiet = true; log_threads = false;
  if (args.length != 3) {
    logln(exec, " <root folder> <from_id> <to_id>");
    return;
  }
  auto folder = args[0], from = atoi(args[1]), to = atoi(args[2]);
  
  auto img_folder = "images";
  auto tag_folder = "tags";
  // returns the path (off folder) and filename it saved to
  Stuple!(string, string) downloadImage(string url) {
    // extract filename
    auto fn = url[url.rfind("/")+1 .. $];
    if (fn.find("?") != -1) fn = fn[0 .. fn.find("?")];
    fn = fn.sanitize();
    auto my_folder = img_folder.sub(""~fn[0]);
    auto full_fn = folder.sub(my_folder).sub(fn);
    if (!full_fn.exists()) {
      auto data = url.download();
      folder.sub(my_folder).multi_mkdir();
      full_fn.write(data);
      full_fn.chmod(0644);
    }
    return stuple(my_folder, fn);
  }
  
  auto tp = new Threadpool(3);
  
  void fetch_page(int id) {
    auto page_url = Format("http://danbooru.donmai.us/post/show/", id);
    auto data = page_url.download();
    auto tags = data.betweens("<a ", "</a>").grep("/post/index?tags")
      /map/ (string s) { return s.between(">", "").sanitize(); };
    auto img_link = data.between("<a", "id=\"highres\"", GLOMP_RIGHT).between("\"", "\"");
    if (!img_link.length) {
      logln("Couldn't find image link in ", page_url);
      return;
    }
    string img_path, img_name;
    ptuple(img_path, img_name) = img_link.downloadImage();
    //                              ,-- root folder
    auto local_path = "..".sub("..").sub(img_path).sub(img_name);
    foreach (tag; tags) {
      auto this_tag_folder = tag_folder.sub(tag);
      folder.sub(this_tag_folder).multi_mkdir();
      auto tag_file = folder.sub(this_tag_folder).sub(img_name);
      if (!tag_file.exists()) local_path.symlink(tag_file);
    }
    logln(id, ": ", img_name, " done, ", tags.length, " tags");
  }
  
  tp.mt_foreach(Range[from .. to].endIncl, (int i) { fetch_page(i); });
}

Replies:

    (some replies deleted)