1 /**
2 	Copyright: © 2013 rejectedsoftware e.K.
3 	License: Subject to the terms of the GNU GPLv3 license, as written in the included LICENSE.txt file.
4 	Authors: Sönke Ludwig
5 */
6 module dubregistry.dbcontroller;
7 
8 import dub.semver;
9 import std.array;
10 import std.algorithm;
11 import std.exception;
12 //import std.string;
13 import std.uni;
14 import vibe.vibe;
15 
16 
17 class DbController {
18 	private {
19 		MongoCollection m_packages;
20 		MongoCollection m_downloads;
21 	}
22 
23 	this(string dbname)
24 	{
25 		auto db = connectMongoDB("127.0.0.1").getDatabase(dbname);
26 		m_packages = db["packages"];
27 		m_downloads = db["downloads"];
28 
29 		// update package format
30 		foreach(p; m_packages.find()){
31 			if (p.branches.type == Bson.Type.object) {
32 				Bson[] branches;
33 				foreach( b; p.branches )
34 					branches ~= b;
35 				p.branches = branches;
36 			}
37 			if (p.branches.type == Bson.Type.array) {
38 				auto versions = p.versions.get!(Bson[]);
39 				foreach (b; p.branches) versions ~= b;
40 				p.branches = Bson(null);
41 				p.versions = Bson(versions);
42 			}
43 			m_packages.update(["_id": p._id], p);
44 		}
45 
46 		// add updateCounter field for packages that don't have it yet
47 		m_packages.update(["updateCounter": ["$exists": false]], ["$set" : ["updateCounter" : 0L]], UpdateFlags.MultiUpdate);
48 
49 		repairVersionOrder();
50 
51 		// create indices
52 		m_packages.ensureIndex(["name": 1], IndexFlags.Unique);
53 		m_packages.ensureIndex(["searchTerms": 1]);
54 		m_downloads.ensureIndex([tuple("package", 1), tuple("version", 1)]);
55 	}
56 
57 	void addPackage(ref DbPackage pack)
58 	{
59 		enforce(m_packages.findOne(["name": pack.name], ["_id": true]).isNull(), "A package with the same name is already registered.");
60 		pack._id = BsonObjectID.generate();
61 		m_packages.insert(pack);
62 		updateKeywords(pack.name);
63 	}
64 
65 	DbPackage getPackage(string packname)
66 	{
67 		auto bpack = m_packages.findOne(["name": packname]);
68 		enforce(!bpack.isNull(), "Unknown package name.");
69 		return deserializeBson!DbPackage(bpack);
70 	}
71 
72 	auto getAllPackages()
73 	{
74 		return m_packages.find(Bson.emptyObject, ["name": 1]).map!(p => p.name.get!string)();
75 	}
76 
77 	auto getUserPackages(BsonObjectID user_id)
78 	{
79 		return m_packages.find(["owner": user_id], ["name": 1]).map!(p => p.name.get!string)();
80 	}
81 
82 	bool isUserPackage(BsonObjectID user_id, string package_name)
83 	{
84 		return !m_packages.findOne(["owner": Bson(user_id), "name": Bson(package_name)]).isNull();
85 	}
86 
87 	void removePackage(string packname, BsonObjectID user)
88 	{
89 		m_packages.remove(["name": Bson(packname), "owner": Bson(user)]);
90 	}
91 
92 	void setPackageErrors(string packname, string[] error...)
93 	{
94 		m_packages.update(["name": packname], ["$set": ["errors": error]]);
95 	}
96 
97 	void setPackageCategories(string packname, string[] categories...)
98 	{
99 		m_packages.update(["name": packname], ["$set": ["categories": categories]]);
100 	}
101 
102 	void setPackageRepository(string packname, Json repo)
103 	{
104 		m_packages.update(["name": packname], ["$set": ["repository": repo]]);
105 	}
106 
107 	void addVersion(string packname, DbPackageVersion ver)
108 	{
109 		assert(ver.version_.startsWith("~") || ver.version_.isValidVersion());
110 
111 		size_t nretrys = 0;
112 
113 		while (true) {
114 			auto pack = m_packages.findOne(["name": packname], ["versions": true, "updateCounter": true]);
115 			auto counter = pack.updateCounter.get!long;
116 			auto versions = deserializeBson!(DbPackageVersion[])(pack.versions);
117 			auto new_versions = versions ~ ver;
118 			new_versions.sort!((a, b) => vcmp(a, b));
119 
120 			// remove versions with invalid dependency names to avoid the findAndModify below to fail
121 			new_versions = new_versions.filter!(
122 					v => !v.info["dependencies"].opt!(Json[string]).byKey.canFind!(k => k.canFind("."))
123 				).array;
124 
125 			//assert((cast(Json)bversions).toString() == (cast(Json)serializeToBson(versions)).toString());
126 
127 			auto res = m_packages.findAndModify(
128 				["name": Bson(packname), "updateCounter": Bson(counter)],
129 				["$set": ["versions": serializeToBson(new_versions), "updateCounter": Bson(counter+1)]],
130 				["_id": true]);
131 			
132 			if (!res.isNull) {
133 				updateKeywords(packname);
134 				return;
135 			}
136 
137 			enforce(nretrys++ < 20, format("Failed to store updated version list for %s", packname));
138 			logDebug("Failed to update version list atomically, retrying...");
139 		}
140 	}
141 
142 	void removeVersion(string packname, string ver)
143 	{
144 		assert(ver.startsWith("~") || ver.isValidVersion());
145 		m_packages.update(["name": packname], ["$pull": ["versions": ["version": ver]]]);
146 	}
147 
148 	void updateVersion(string packname, DbPackageVersion ver)
149 	{
150 		assert(ver.version_.startsWith("~") || ver.version_.isValidVersion());
151 		m_packages.update(["name": packname, "versions.version": ver.version_], ["$set": ["versions.$": ver]]);
152 		updateKeywords(packname);
153 	}
154 
155 	bool hasVersion(string packname, string ver)
156 	{
157 		auto ret = m_packages.findOne(["name": packname, "versions.version" : ver], ["_id": true]);
158 		return !ret.isNull();
159 	}
160 
161 	string getLatestVersion(string packname)
162 	{
163 		auto slice = serializeToBson(["$slice": -1]);
164 		auto pack = m_packages.findOne(["name": packname], ["_id": Bson(true), "versions": slice]);
165 		if (pack.isNull() || pack.versions.isNull() || pack.versions.length != 1) return null;
166 		return deserializeBson!(string)(pack.versions[0]["version"]);
167 	}
168 
169 	DbPackageVersion getVersionInfo(string packname, string ver)
170 	{
171 		auto pack = m_packages.findOne(["name": packname, "versions.version": ver], ["versions.$": true]);
172 		enforce(!pack.isNull(), "unknown package/version");
173 		assert(pack.versions.length == 1);
174 		return deserializeBson!(DbPackageVersion)(pack.versions[0]);
175 	}
176 
177 	DbPackage[] searchPackages(string[] keywords)
178 	{
179 		Appender!(string[]) barekeywords;
180 		foreach( kw; keywords ) {
181 			kw = kw.strip();
182 			//kw = kw.normalize(); // separate character from diacritics
183 			string[] parts = splitAlphaNumParts(kw.toLower());
184 			barekeywords ~= parts.filter!(p => p.count >= 2).map!(p => p.toLower).array;
185 		}
186 		logInfo("search for %s %s", keywords, barekeywords.data);
187 
188 		static if (0) {
189 			// performs only exact matches - we should implement something more
190 			// flexible, for example based on elastic search
191 			return m_packages.find(["searchTerms": ["$all": barekeywords.data]]).map!(b => deserializeBson!DbPackage(b))();
192 		} else {
193 			// in the meantime, we'll perform a brute force search instead
194 			Appender!(DbPackage[]) pkgs;
195 			Appender!(size_t[]) scores;
196 			foreach (p; m_packages.find().map!(b => deserializeBson!DbPackage(b))) {
197 				size_t score = 0;
198 				foreach (t; p.searchTerms)
199 					foreach (kw; barekeywords.data) {
200 						auto dist = levenshteinDistance(t, kw);
201 						if (dist <= 3 && dist+1 < kw.length) score += 3 - dist;
202 					}
203 				if (score > 0) {
204 					pkgs ~= p;
205 					scores ~= score;
206 				}
207 			}
208 			import std.range : zip;
209 			sort!((a, b) => a[1] > b[1])(zip(pkgs.data, scores.data));
210 			return pkgs.data;
211 		}
212 	}
213 
214 	BsonObjectID addDownload(BsonObjectID pack, string ver, string user_agent)
215 	{
216 		DbPackageDownload download;
217 		download._id = BsonObjectID.generate();
218 		download.package_ = pack;
219 		download.version_ = ver;
220 		download.time = Clock.currTime(UTC());
221 		download.userAgent = user_agent;
222 		m_downloads.insert(download);
223 		return download._id;
224 	}
225 
226 	auto getDownloadStats(BsonObjectID pack, string ver = null)
227 	{
228 		static Bson newerThan(SysTime time)
229 		{
230 			// doc.time >= time ? 1 : 0
231 			alias bs = serializeToBson;
232 			return bs([
233 				"$cond": [bs(["$gte": [bs("$time"), bs(time)]]), bs(1), bs(0)]
234 			]);
235 		}
236 
237 		auto match = Bson.emptyObject();
238 		match["package"] = Bson(pack);
239 		if (ver.length) match["version"] = ver;
240 
241 		immutable now = Clock.currTime;
242 		auto res = m_downloads.aggregate(
243 			["$match": match],
244 			["$project": [
245 					"_id": Bson(false),
246 					"total": serializeToBson(["$literal": 1]),
247 					"monthly": newerThan(now - 30.days),
248 					"weekly": newerThan(now - 7.days),
249 					"daily": newerThan(now - 1.days)]],
250 			["$group": [
251 					"_id": Bson(null), // single group
252 					"total": Bson(["$sum": Bson("$total")]),
253 					"monthly": Bson(["$sum": Bson("$monthly")]),
254 					"weekly": Bson(["$sum": Bson("$weekly")]),
255 					"daily": Bson(["$sum": Bson("$daily")])]]);
256 		assert(res.length <= 1);
257 		return res.length ? deserializeBson!DbDownloadStats(res[0]) : DbDownloadStats.init;
258 	}
259 
260 	private void updateKeywords(string package_name)
261 	{
262 		auto p = getPackage(package_name);
263 		bool[string] keywords;
264 		void processString(string str) {
265 			if (str.length == 0) return;
266 			foreach (w; splitAlphaNumParts(str))
267 				if (w.count >= 2)
268 					keywords[w.toLower()] = true;
269 		}
270 		void processVer(Json info) {
271 			if (auto pv = "description" in info) processString(pv.opt!string);
272 			if (auto pv = "authors" in info) processString(pv.opt!string);
273 			if (auto pv = "homepage" in info) processString(pv.opt!string);
274 		}
275 
276 		processString(p.name);
277 		foreach (ver; p.versions) processVer(ver.info);
278 
279 		Appender!(string[]) kwarray;
280 		foreach (kw; keywords.byKey) kwarray ~= kw;
281 		m_packages.update(["name": package_name], ["$set": ["searchTerms": kwarray.data]]);
282 	}
283 
284 	private void repairVersionOrder()
285 	{
286 		foreach( bp; m_packages.find() ){
287 			auto p = deserializeBson!DbPackage(bp);
288 			p.versions = p.versions
289 				.filter!(v => v.version_.startsWith("~") || v.version_.isValidVersion)
290 				.array
291 				.sort!((a, b) => vcmp(a, b))
292 				.array;
293 			m_packages.update(["_id": p._id], ["$set": ["versions": p.versions]]);
294 		}
295 	}
296 }
297 
298 struct DbPackage {
299 	BsonObjectID _id;
300 	BsonObjectID owner;
301 	string name;
302 	Json repository;
303 	DbPackageVersion[] versions;
304 	string[] errors;
305 	string[] categories;
306 	string[] searchTerms;
307 	long updateCounter = 0; // used to implement lockless read-modify-write cycles
308 }
309 
310 struct DbPackageVersion {
311 	SysTime date;
312 	string version_;
313 	@optional string commitID;
314 	Json info;
315 	@optional string readme;
316 }
317 
318 struct DbPackageDownload {
319 	BsonObjectID _id;
320 	BsonObjectID package_;
321 	string version_;
322 	SysTime time;
323 	string userAgent;
324 }
325 
326 struct DbDownloadStats {
327 	uint total, monthly, weekly, daily;
328 }
329 
330 bool vcmp(DbPackageVersion a, DbPackageVersion b)
331 {
332 	return vcmp(a.version_, b.version_);
333 }
334 
335 bool vcmp(string va, string vb)
336 {
337 	import dub.dependency;
338 	return Version(va) < Version(vb);
339 }
340 
341 private string[] splitAlphaNumParts(string str)
342 {
343 	string[] ret;
344 	while (!str.empty) {
345 		while (!str.empty && !str.front.isIdentChar()) str.popFront();
346 		if (str.empty) break;
347 		size_t i = str.length;
348 		foreach (j, dchar ch; str)
349 			if (!isIdentChar(ch)) {
350 				i = j;
351 				break;
352 			}
353 		if (i > 0) {
354 			ret ~= str[0 .. i];
355 			str = str[i .. $];
356 		}
357 		if (!str.empty) str.popFront(); // pop non-ident-char
358 	}
359 	return ret;
360 }
361 
362 private bool isIdentChar(dchar ch)
363 {
364 	return std.uni.isAlpha(ch) || std.uni.isNumber(ch);
365 }