1 /** 2 Copyright: © 2013-2014 rejectedsoftware e.K. 3 License: Subject to the terms of the GNU GPLv3 license, as written in the included LICENSE.txt file. 4 Authors: Sönke Ludwig 5 */ 6 module dubregistry.cache; 7 8 import vibe.core.log; 9 import vibe.db.mongo.mongo; 10 import vibe.http.client; 11 import vibe.stream.memory; 12 13 import core.time; 14 import std.algorithm : startsWith; 15 import std.exception; 16 import std.typecons : tuple; 17 18 19 enum CacheMatchMode { 20 always, // return cached data if available 21 etag, // return cached data if the server responds with "not modified" 22 never // always request fresh data 23 } 24 25 26 class URLCache { 27 private { 28 MongoClient m_db; 29 MongoCollection m_entries; 30 Duration m_maxCacheTime = 365.days; 31 } 32 33 this() 34 { 35 m_db = connectMongoDB("127.0.0.1"); 36 m_entries = m_db.getCollection("urlcache.entries"); 37 m_entries.ensureIndex([tuple("url", 1)]); 38 } 39 40 void clearEntry(URL url) 41 { 42 m_entries.remove(["url": url.toString()]); 43 } 44 45 void get(URL url, scope void delegate(scope InputStream str) callback, bool cache_priority = false) 46 { 47 get(url, callback, cache_priority ? CacheMatchMode.always : CacheMatchMode.etag); 48 } 49 50 void get(URL url, scope void delegate(scope InputStream str) callback, CacheMatchMode mode = CacheMatchMode.etag) 51 { 52 import std.datetime : Clock, UTC; 53 import vibe.http.auth.basic_auth; 54 import dubregistry.internal.utils : black; 55 56 auto user = url.username; 57 auto password = url.password; 58 url.username = null; 59 url.password = null; 60 61 InputStream result; 62 bool handled_uncached = false; 63 64 auto now = Clock.currTime(UTC()); 65 66 foreach (i; 0 .. 10) { // follow max 10 redirects 67 auto be = m_entries.findOne(["url": url.toString()]); 68 CacheEntry entry; 69 if (!be.isNull()) { 70 // invalidate out of date cache entries 71 if (be["_id"].get!BsonObjectID.timeStamp < now - m_maxCacheTime) 72 m_entries.remove(["_id": be["_id"]]); 73 74 deserializeBson(entry, be); 75 if (mode == CacheMatchMode.always) { 76 // directly return cache result for cache_priority == true 77 logDiagnostic("Cache HIT (early): %s", url.toString()); 78 if (entry.redirectURL.length) { 79 url = URL(entry.redirectURL); 80 continue; 81 } else { 82 auto data = be["data"].get!BsonBinData().rawData(); 83 scope tmpresult = createMemoryStream(cast(ubyte[])data, false); 84 callback(tmpresult); 85 return; 86 } 87 } 88 } else { 89 entry._id = BsonObjectID.generate(); 90 entry.url = url.toString(); 91 } 92 93 requestHTTP(url, 94 (scope req){ 95 if (entry.etag.length && mode != CacheMatchMode.never) req.headers["If-None-Match"] = entry.etag; 96 if (user.length) addBasicAuth(req, user, password); 97 }, 98 (scope res){ 99 switch (res.statusCode) { 100 default: 101 throw new Exception("Unexpected reply for '"~url.toString().black~"': "~httpStatusText(res.statusCode)); 102 case HTTPStatus.notModified: 103 logDiagnostic("Cache HIT: %s", url.toString()); 104 res.dropBody(); 105 auto data = be["data"].get!BsonBinData().rawData(); 106 result = createMemoryStream(cast(ubyte[])data, false); 107 break; 108 case HTTPStatus.notFound: 109 res.dropBody(); 110 throw new FileNotFoundException("File '"~url.toString().black~"' does not exist."); 111 case HTTPStatus.movedPermanently, HTTPStatus.found, HTTPStatus.temporaryRedirect: 112 auto pv = "Location" in res.headers; 113 enforce(pv !is null, "Server responded with redirect but did not specify the redirect location for "~url.toString()); 114 logDebug("Redirect to '%s'", *pv); 115 if (startsWith((*pv), "http:") || startsWith((*pv), "https:")) { 116 url = URL(*pv); 117 } else url.localURI = *pv; 118 res.dropBody(); 119 120 entry.redirectURL = url.toString(); 121 m_entries.update(["_id": entry._id], entry, UpdateFlags.Upsert); 122 break; 123 case HTTPStatus.ok: 124 auto pet = "ETag" in res.headers; 125 if (pet || mode == CacheMatchMode.always) { 126 logDiagnostic("Cache MISS: %s", url.toString()); 127 auto dst = createMemoryOutputStream(); 128 res.bodyReader.pipe(dst); 129 auto rawdata = dst.data; 130 if (pet) entry.etag = *pet; 131 entry.data = BsonBinData(BsonBinData.Type.Generic, cast(immutable)rawdata); 132 m_entries.update(["_id": entry._id], entry, UpdateFlags.Upsert); 133 result = createMemoryStream(rawdata, false); 134 break; 135 } 136 137 logDebug("Response without etag.. not caching: "~url.toString()); 138 139 logDiagnostic("Cache MISS (no etag): %s", url.toString()); 140 handled_uncached = true; 141 callback(res.bodyReader); 142 break; 143 } 144 } 145 ); 146 147 if (handled_uncached) return; 148 149 if (result) { 150 callback(result); 151 return; 152 } 153 } 154 155 throw new Exception("Too many redirects for "~url.toString().black); 156 } 157 } 158 159 class FileNotFoundException : Exception { 160 this(string msg, string file = __FILE__, size_t line = __LINE__) 161 { 162 super(msg, file, line); 163 } 164 } 165 166 private struct CacheEntry { 167 BsonObjectID _id; 168 string url; 169 string etag; 170 BsonBinData data; 171 @optional string redirectURL; 172 } 173 174 private URLCache s_cache; 175 176 void downloadCached(URL url, scope void delegate(scope InputStream str) callback, bool cache_priority = false) 177 { 178 if (!s_cache) s_cache = new URLCache; 179 s_cache.get(url, callback, cache_priority); 180 } 181 182 void downloadCached(string url, scope void delegate(scope InputStream str) callback, bool cache_priority = false) 183 { 184 return downloadCached(URL.parse(url), callback, cache_priority); 185 } 186 187 void clearCacheEntry(URL url) 188 { 189 if (!s_cache) s_cache = new URLCache; 190 s_cache.clearEntry(url); 191 } 192 193 void clearCacheEntry(string url) 194 { 195 clearCacheEntry(URL(url)); 196 }