1 /**
2 	Copyright: © 2013-2014 rejectedsoftware e.K.
3 	License: Subject to the terms of the GNU GPLv3 license, as written in the included LICENSE.txt file.
4 	Authors: Sönke Ludwig
5 */
6 module dubregistry.cache;
7 
8 import vibe.core.log;
9 import vibe.db.mongo.mongo;
10 import vibe.http.client;
11 import vibe.stream.memory;
12 
13 import core.time;
14 import std.algorithm : startsWith;
15 import std.exception;
16 
17 
18 enum CacheMatchMode {
19 	always, // return cached data if available
20 	etag,   // return cached data if the server responds with "not modified"
21 	never   // always request fresh data
22 }
23 
24 
25 class URLCache {
26 	private {
27 		MongoClient m_db;
28 		MongoCollection m_entries;
29 		Duration m_maxCacheTime = 365.days;
30 	}
31 
32 	this()
33 	{
34 		m_db = connectMongoDB("127.0.0.1");
35 		m_entries = m_db.getCollection("urlcache.entries");
36 		m_entries.ensureIndex(["url": true]);
37 	}
38 
39 	void clearEntry(URL url)
40 	{
41 		m_entries.remove(["url": url.toString()]);
42 	}
43 
44 	void get(URL url, scope void delegate(scope InputStream str) callback, bool cache_priority = false)
45 	{
46 		get(url, callback, cache_priority ? CacheMatchMode.always : CacheMatchMode.etag);
47 	}
48 
49 	void get(URL url, scope void delegate(scope InputStream str) callback, CacheMatchMode mode = CacheMatchMode.etag)
50 	{
51 		import std.datetime : Clock, UTC;
52 		import vibe.http.auth.basic_auth;
53 
54 		auto user = url.username;
55 		auto password = url.password;
56 		url.username = null;
57 		url.password = null;
58 
59 		InputStream result;
60 		bool handled_uncached = false;
61 
62 		auto now = Clock.currTime(UTC());
63 
64 		foreach (i; 0 .. 10) { // follow max 10 redirects
65 			auto be = m_entries.findOne(["url": url.toString()]);
66 			CacheEntry entry;
67 			if (!be.isNull()) {
68 				// invalidate out of date cache entries
69 				if (be._id.get!BsonObjectID.timeStamp < now - m_maxCacheTime)
70 					m_entries.remove(["_id": be._id]);
71 				
72 				deserializeBson(entry, be);
73 				if (mode == CacheMatchMode.always) {
74 					// directly return cache result for cache_priority == true
75 					logDiagnostic("Cache HIT (early): %s", url.toString());
76 					if (entry.redirectURL.length) {
77 						url = URL(entry.redirectURL);
78 						continue;
79 					} else {
80 						auto data = be["data"].get!BsonBinData().rawData();
81 						scope tmpresult = new MemoryStream(cast(ubyte[])data, false);
82 						callback(tmpresult);
83 						return;
84 					}
85 				}
86 			} else {
87 				entry._id = BsonObjectID.generate();
88 				entry.url = url.toString();
89 			}
90 
91 			requestHTTP(url,
92 				(scope req){
93 					if (entry.etag.length && mode != CacheMatchMode.never) req.headers["If-None-Match"] = entry.etag;
94 					if (user.length) addBasicAuth(req, user, password);
95 				},
96 				(scope res){
97 					switch (res.statusCode) {
98 						default:
99 							throw new Exception("Unexpected reply for '"~url.toString()~"': "~httpStatusText(res.statusCode));
100 						case HTTPStatus.notModified:
101 							logDiagnostic("Cache HIT: %s", url.toString());
102 							res.dropBody();
103 							auto data = be["data"].get!BsonBinData().rawData();
104 							result = new MemoryStream(cast(ubyte[])data, false);
105 							break;
106 						case HTTPStatus.notFound:
107 							res.dropBody();
108 							throw new FileNotFoundException("File '"~url.toString()~"' does not exist.");
109 						case HTTPStatus.movedPermanently, HTTPStatus.found, HTTPStatus.temporaryRedirect:
110 							auto pv = "Location" in res.headers;
111 							enforce(pv !is null, "Server responded with redirect but did not specify the redirect location for "~url.toString());
112 							logDebug("Redirect to '%s'", *pv);
113 							if (startsWith((*pv), "http:") || startsWith((*pv), "https:")) {
114 								url = URL(*pv);
115 							} else url.localURI = *pv;
116 							res.dropBody();
117 
118 							entry.redirectURL = url.toString();
119 							m_entries.update(["_id": entry._id], entry, UpdateFlags.Upsert);
120 							break;
121 						case HTTPStatus.ok:
122 							auto pet = "ETag" in res.headers;
123 							if (pet || mode == CacheMatchMode.always) {
124 								logDiagnostic("Cache MISS: %s", url.toString());
125 								auto dst = new MemoryOutputStream;
126 								dst.write(res.bodyReader);
127 								auto rawdata = dst.data;
128 								if (pet) entry.etag = *pet;
129 								entry.data = BsonBinData(BsonBinData.Type.Generic, cast(immutable)rawdata);
130 								m_entries.update(["_id": entry._id], entry, UpdateFlags.Upsert);
131 								result = new MemoryStream(rawdata, false);
132 								break;
133 							}
134 
135 							logDebug("Response without etag.. not caching: "~url.toString());
136 
137 							logDiagnostic("Cache MISS (no etag): %s", url.toString());
138 							handled_uncached = true;
139 							callback(res.bodyReader);
140 							break;
141 					}
142 				}
143 			);
144 
145 			if (handled_uncached) return;
146 
147 			if (result) {
148 				callback(result);
149 				return;
150 			}
151 		}
152 
153 		throw new Exception("Too many redirects for "~url.toString());
154 	}
155 }
156 
157 class FileNotFoundException : Exception {
158 	this(string msg, string file = __FILE__, size_t line = __LINE__)
159 	{
160 		super(msg, file, line);
161 	}
162 }
163 
164 private struct CacheEntry {
165 	BsonObjectID _id;
166 	string url;
167 	string etag;
168 	BsonBinData data;
169 	@optional string redirectURL;
170 }
171 
172 private URLCache s_cache;
173 
174 void downloadCached(URL url, scope void delegate(scope InputStream str) callback, bool cache_priority = false)
175 {
176 	if (!s_cache) s_cache = new URLCache;
177 	s_cache.get(url, callback, cache_priority);
178 }
179 
180 void downloadCached(string url, scope void delegate(scope InputStream str) callback, bool cache_priority = false)
181 {
182 	return downloadCached(URL.parse(url), callback, cache_priority);
183 }
184 
185 void clearCacheEntry(URL url)
186 {
187 	if (!s_cache) s_cache = new URLCache;
188 	s_cache.clearEntry(url);
189 }
190 
191 void clearCacheEntry(string url)
192 {
193 	clearCacheEntry(URL(url));
194 }