1 /**
2 	Copyright: © 2013 rejectedsoftware e.K.
3 	License: Subject to the terms of the GNU GPLv3 license, as written in the included LICENSE.txt file.
4 	Authors: Sönke Ludwig
5 */
6 module dubregistry.dbcontroller;
7 
8 import dub.semver;
9 import std.array;
10 import std.algorithm;
11 import std.exception;
12 //import std.string;
13 import std.typecons : tuple;
14 import std.uni;
15 import vibe.vibe;
16 
17 
18 class DbController {
19 @safe:
20 
21 	private {
22 		MongoCollection m_packages;
23 		MongoCollection m_downloads;
24 		MongoCollection m_files;
25 	}
26 
27 	private alias bson = serializeToBson;
28 
29 	this(string dbname)
30 	{
31 		auto db = connectMongoDB("127.0.0.1").getDatabase(dbname);
32 		m_packages = db["packages"];
33 		m_downloads = db["downloads"];
34 		m_files = db["files"];
35 
36 		//
37 		// migrations:
38 		//
39 
40 		version (DubRegistry_EnableLegacyMigrations) {
41 			// update package format
42 			foreach(p; m_packages.find()){
43 				bool any_change = false;
44 				if (p["branches"].type == Bson.Type.object) {
45 					Bson[] branches;
46 					foreach (b; p["branches"].byValue)
47 						branches ~= b;
48 					p["branches"] = branches;
49 					any_change = true;
50 				}
51 				if (p["branches"].type == Bson.Type.array) {
52 					auto versions = p["versions"].get!(Bson[]);
53 					foreach (b; p["branches"].byValue) versions ~= b;
54 					p["branches"] = Bson(null);
55 					p["versions"] = Bson(versions);
56 					any_change = true;
57 				}
58 				if (any_change) m_packages.update(["_id": p["_id"]], p);
59 			}
60 
61 			// add updateCounter field for packages that don't have it yet
62 			m_packages.update(["updateCounter": ["$exists": false]], ["$set" : ["updateCounter" : 0L]], UpdateFlags.multiUpdate);
63 		}
64 
65 		// add default non-@optional stats to packages
66 		DbPackageStats stats;
67 		m_packages.update(["stats": ["$exists": false]], ["$set": ["stats": stats]], UpdateFlags.multiUpdate);
68 
69 		// rename stats.rating -> stats.score
70 		m_packages.update(Bson.emptyObject(), ["$rename": ["stats.rating": "stats.score"]], UpdateFlags.multiUpdate);
71 
72 		// default initialize missing scores with zero
73 		float score = 0;
74 		m_packages.update(["stats.score": ["$exists": false]], ["$set": ["stats.score": score]], UpdateFlags.multiUpdate);
75 
76 		// remove old logo fields
77 		m_packages.update(["logoHash": ["$exists": true]], ["$unset": ["logo": 0, "logoHash": 0]], UpdateFlags.multiUpdate);
78 
79 		// create indices
80 		m_packages.ensureIndex([tuple("name", 1)], IndexFlags.Unique);
81 		m_packages.ensureIndex([tuple("stats.score", 1)]);
82 		m_downloads.ensureIndex([tuple("package", 1), tuple("version", 1)]);
83 
84 		// drop old text index versions
85 		db.runCommand(["dropIndexes": "packages", "index": "packages_full_text_search_index"]);
86 
87 		// add current text index
88 		Bson[string] doc;
89 		doc["v"] = 1;
90 		doc["key"] = ["_fts": Bson("text"), "_ftsx": Bson(1)];
91 		doc["ns"] = db.name ~ "." ~ m_packages.name;
92 		doc["name"] = "packages_full_text_search_index_v2";
93 		doc["weights"] = [
94 			"name": Bson(4),
95 			"categories": Bson(3),
96 			"versions.info.description" : Bson(3),
97 			"versions.info.homepage" : Bson(1),
98 			"versions.info.author" : Bson(1),
99 			"versions.readme" : Bson(2),
100 		];
101 		doc["background"] = true;
102 		db["system.indexes"].insert(doc);
103 
104 		version (DubRegistry_RepairVersionOrder) {
105 			// sort package versions newest to oldest
106 			// NOTE: since quite a while, versions are inserted atomically
107 			//       in the proper order, so that this is not necessary as a
108 			//       general precaution anymore
109 			repairVersionOrder();
110 		}
111 	}
112 
113 	void addPackage(ref DbPackage pack)
114 	{
115 		enforce(m_packages.findOne(["name": pack.name], ["_id": true]).isNull(), "A package with the same name is already registered.");
116 		if (pack._id == BsonObjectID.init)
117 			pack._id = BsonObjectID.generate();
118 		m_packages.insert(pack);
119 	}
120 
121 	void addOrSetPackage(ref DbPackage pack)
122 	{
123 		enforce(pack._id != BsonObjectID.init, "Cannot update a packag with no ID.");
124 		m_packages.update(["_id": pack._id], pack, UpdateFlags.upsert);
125 	}
126 
127 	DbPackage getPackage(string packname)
128 	{
129 		auto pack = m_packages.findOne!DbPackage(["name": packname]);
130 		enforce!RecordNotFound(!pack.isNull(), "Unknown package name.");
131 		return pack;
132 	}
133 
134 	auto getPackages(scope string[] packnames...)
135 	{
136 		return m_packages.find!DbPackage(["name": ["$in": serializeToBson(packnames)]]);
137 	}
138 
139 	BsonObjectID getPackageID(string packname)
140 	{
141 		static struct PID { BsonObjectID _id; }
142 		auto pid = m_packages.findOne!PID(["name": packname], ["_id": 1]);
143 		enforce(!pid.isNull(), "Unknown package name.");
144 		return pid._id;
145 	}
146 
147 	DbPackage getPackage(BsonObjectID id)
148 	{
149 		auto pack = m_packages.findOne!DbPackage(["_id": id]);
150 		enforce!RecordNotFound(!pack.isNull(), "Unknown package ID.");
151 		return pack;
152 	}
153 
154 	auto getAllPackages()
155 	{
156 		return m_packages.find(Bson.emptyObject, ["name": 1]).map!(p => p["name"].get!string)();
157 	}
158 
159 	auto getAllPackageIDs()
160 	{
161 		return m_packages.find(Bson.emptyObject, ["_id": 1]).map!(p => p["_id"].get!BsonObjectID)();
162 	}
163 
164 	auto getPackageDump()
165 	{
166 		return m_packages.find!DbPackage(Bson.emptyObject);
167 	}
168 
169 	auto getUserPackages(BsonObjectID user_id)
170 	{
171 		return m_packages.find(["owner": user_id], ["name": 1]).map!(p => p["name"].get!string)();
172 	}
173 
174 	bool isUserPackage(BsonObjectID user_id, string package_name)
175 	{
176 		static struct PO { BsonObjectID owner; }
177 		auto p = m_packages.findOne!PO(["name": package_name], ["owner": 1]);
178 		return !p.isNull && p.owner == user_id;
179 	}
180 
181 	void removePackage(string packname, BsonObjectID user)
182 	{
183 		m_packages.remove(["name": Bson(packname), "owner": Bson(user)]);
184 	}
185 
186 	void setPackageErrors(string packname, string[] error...)
187 	{
188 		m_packages.update(["name": packname], ["$set": ["errors": error]]);
189 	}
190 
191 	void setPackageCategories(string packname, string[] categories...)
192 	{
193 		m_packages.update(["name": packname], ["$set": ["categories": categories]]);
194 	}
195 
196 	void setPackageRepository(string packname, DbRepository repo)
197 	{
198 		m_packages.update(["name": packname], ["$set": ["repository": repo]]);
199 	}
200 
201 	void setPackageLogo(string packname, bdata_t png)
202 	{
203 		Bson update;
204 
205 		if (png.length) {
206 			auto id = BsonObjectID.generate();
207 			m_files.insert([
208 				"_id": Bson(id),
209 				"data": Bson(BsonBinData(BsonBinData.Type.generic, png))
210 			]);
211 
212 			update = serializeToBson(["$set": ["logo": id]]);
213 		} else {
214 			update = serializeToBson(["$unset": ["logo": 0]]);
215 		}
216 
217 		// remove existing logo file
218 		auto l = m_packages.findOne(["name": packname], ["logo": 1]);
219 		if (!l.isNull && !l.tryIndex("logo").isNull)
220 			m_files.remove(["_id": l["logo"]]);
221 
222 		// set the new logo
223 		m_packages.update(["name": packname], update);
224 	}
225 
226 	bdata_t getPackageLogo(string packname, out bdata_t rev)
227 	{
228 		auto bpack = m_packages.findOne(["name": packname], ["logo": 1]);
229 		if (bpack.isNull) return null;
230 
231 		auto id = bpack.tryIndex("logo");
232 		if (id.isNull) return null;
233 
234 		auto data = m_files.findOne!DbPackageFile(["_id": id.get]);
235 		if (data.isNull()) return null;
236 
237 		rev = (cast(ubyte[])id.get.get!BsonObjectID).idup;
238 		return data.get.data.rawData;
239 	}
240 
241 	void addVersion(string packname, DbPackageVersion ver)
242 	{
243 		assert(ver.version_.startsWith("~") || ver.version_.isValidVersion());
244 
245 		size_t nretrys = 0;
246 
247 		while (true) {
248 			auto pack = m_packages.findOne(["name": packname], ["versions": true, "updateCounter": true]);
249 			auto counter = pack["updateCounter"].get!long;
250 			auto versions = deserializeBson!(DbPackageVersion[])(pack["versions"]);
251 			auto new_versions = versions ~ ver;
252 			new_versions.sort!((a, b) => vcmp(a, b));
253 
254 			// remove versions with invalid dependency names to avoid the findAndModify below to fail
255 			() @trusted {
256 				new_versions = new_versions.filter!(
257 					v => !v.info["dependencies"].opt!(Json[string]).byKey.canFind!(k => k.canFind("."))
258 				).array;
259 			} ();
260 
261 			//assert((cast(Json)bversions).toString() == (cast(Json)serializeToBson(versions)).toString());
262 
263 			auto res = m_packages.findAndModify(
264 				["name": Bson(packname), "updateCounter": Bson(counter)],
265 				["$set": ["versions": serializeToBson(new_versions), "updateCounter": Bson(counter+1)]],
266 				["_id": true]);
267 
268 			if (!res.isNull) return;
269 
270 			enforce(nretrys++ < 20, format("Failed to store updated version list for %s", packname));
271 			logDebug("Failed to update version list atomically, retrying...");
272 		}
273 	}
274 
275 	void removeVersion(string packname, string ver)
276 	{
277 		assert(ver.startsWith("~") || ver.isValidVersion());
278 		m_packages.update(["name": packname], ["$pull": ["versions": ["version": ver]]]);
279 	}
280 
281 	void updateVersion(string packname, DbPackageVersion ver)
282 	{
283 		assert(ver.version_.startsWith("~") || ver.version_.isValidVersion());
284 		m_packages.update(["name": packname, "versions.version": ver.version_], ["$set": ["versions.$": ver]]);
285 	}
286 
287 	bool hasVersion(string packname, string ver)
288 	{
289 		auto ret = m_packages.findOne(["name": packname, "versions.version" : ver], ["_id": true]);
290 		return !ret.isNull();
291 	}
292 
293 	string getLatestVersion(string packname)
294 	{
295 		auto slice = serializeToBson(["$slice": -1]);
296 		auto pack = m_packages.findOne(["name": packname], ["_id": Bson(true), "versions": slice]);
297 		if (pack.isNull() || pack["versions"].isNull() || pack["versions"].length != 1) return null;
298 		return deserializeBson!(string)(pack["versions"][0]["version"]);
299 	}
300 
301 	DbPackageVersion getVersionInfo(string packname, string ver)
302 	{
303 		auto pack = m_packages.findOne(["name": packname, "versions.version": ver], ["versions.$": true]);
304 		enforce(!pack.isNull(), "unknown package/version");
305 		assert(pack["versions"].length == 1);
306 		return deserializeBson!(DbPackageVersion)(pack["versions"][0]);
307 	}
308 
309 	DbPackage[] searchPackages(string query)
310 	{
311 		import std.math : round;
312 
313 		if (!query.strip.length) {
314 			return m_packages.find()
315 				.sort(["stats.score": 1])
316 				.map!(deserializeBson!DbPackage)
317 				.array;
318 		}
319 
320 		return m_packages
321 			.find(["$text": ["$search": query]], ["score": bson(["$meta": "textScore"])])
322 			.sort(["score": bson(["$meta": "textScore"])])
323 			.map!(deserializeBson!DbPackage)
324 			.array
325 			// sort by bucketized score preserving FTS score order
326 			.sort!((a, b) => a.stats.score.round > b.stats.score.round, SwapStrategy.stable)
327 			.release;
328 	}
329 
330 	BsonObjectID addDownload(BsonObjectID pack, string ver, string user_agent)
331 	{
332 		DbPackageDownload download;
333 		download._id = BsonObjectID.generate();
334 		download.package_ = pack;
335 		download.version_ = ver;
336 		download.time = Clock.currTime(UTC());
337 		download.userAgent = user_agent;
338 		m_downloads.insert(download);
339 		return download._id;
340 	}
341 
342 	DbPackageStats getPackageStats(string packname)
343 	{
344 		static struct PS { DbPackageStats stats; }
345 		auto pack = m_packages.findOne!PS(["name": Bson(packname)], ["stats": true]);
346 		enforce!RecordNotFound(!pack.isNull(), "Unknown package name.");
347 		logDebug("getPackageStats(%s) %s", packname, pack.stats);
348 		return pack.stats;
349 	}
350 
351 	void updatePackageStats(BsonObjectID packId, ref DbPackageStats stats)
352 	{
353 		stats.updatedAt = Clock.currTime(UTC());
354 		logDebug("updatePackageStats(%s, %s)", packId, stats);
355 		m_packages.update(["_id": packId], ["$set": ["stats": stats]]);
356 	}
357 
358 	DbDownloadStats aggregateDownloadStats(BsonObjectID packId, string ver = null)
359 	{
360 		static Bson newerThan(SysTime time)
361 		{
362 			// doc.time >= time ? 1 : 0
363 			alias bs = serializeToBson;
364 			return bs([
365 				"$cond": [bs(["$gte": [bs("$time"), bs(time)]]), bs(1), bs(0)]
366 			]);
367 		}
368 
369 		auto match = Bson.emptyObject();
370 		match["package"] = Bson(packId);
371 		if (ver.length) match["version"] = ver;
372 
373 		immutable now = Clock.currTime;
374 		auto res = m_downloads.aggregate(
375 			["$match": match],
376 			["$project": [
377 					"_id": Bson(false),
378 					"total": serializeToBson(["$literal": 1]),
379 					"monthly": newerThan(now - 30.days),
380 					"weekly": newerThan(now - 7.days),
381 					"daily": newerThan(now - 1.days)]],
382 			["$group": [
383 					"_id": Bson(null), // single group
384 					"total": Bson(["$sum": Bson("$total")]),
385 					"monthly": Bson(["$sum": Bson("$monthly")]),
386 					"weekly": Bson(["$sum": Bson("$weekly")]),
387 					"daily": Bson(["$sum": Bson("$daily")])]]);
388 		assert(res.length <= 1);
389 		return res.length ? deserializeBson!DbDownloadStats(res[0]) : DbDownloadStats.init;
390 	}
391 
392 	DbStatDistributions getStatDistributions()
393 	{
394 		auto aggregate(T, string prefix, string groupBy)()
395 		@safe {
396 			auto group = ["_id": Bson(groupBy ? "$"~groupBy : null)];
397 			Bson[string] project;
398 			foreach (mem; __traits(allMembers, T))
399 			{
400 				static assert(is(typeof(__traits(getMember, T.init, mem)) == DbStatDistributions.Agg));
401 				static assert([__traits(allMembers, DbStatDistributions.Agg)] == ["sum", "mean", "std"]);
402 				group[mem~"_sum"] = bson(["$sum": "$"~prefix~"."~mem]);
403 				group[mem~"_mean"] = bson(["$avg": "$"~prefix~"."~mem]);
404 				group[mem~"_std"] = bson(["$stdDevPop": "$"~prefix~"."~mem]);
405 				project[mem] = bson([
406 					"mean": "$"~mem~"_mean",
407 					"sum": "$"~mem~"_sum",
408 					"std": "$"~mem~"_std"
409 				]);
410 			}
411 			auto res = m_packages.aggregate(["$group": group], ["$project": project]);
412 
413 			static if (groupBy is null)
414 			{
415 				if (res.length == 0)
416 					return T.init;
417 				assert(res.length == 1);
418 				return res[0].deserializeBson!T;
419 			}
420 			else
421 			{
422 				T[string] ret;
423 				foreach (doc; res.byValue)
424 					ret[doc["_id"].get!string] = doc.deserializeBson!T;
425 				return ret;
426 			}
427 		}
428 
429 		DbStatDistributions ret;
430 		ret.downloads = aggregate!(typeof(ret.downloads), "stats.downloads", null);
431 		ret.repos = aggregate!(typeof(ret.repos[""]), "stats.repo", "repository.kind");
432 		return ret;
433 	}
434 
435 	private void repairVersionOrder()
436 	{
437 		foreach( bp; m_packages.find() ){
438 			auto p = deserializeBson!DbPackage(bp);
439 			auto newversions = p.versions
440 				.filter!(v => v.version_.startsWith("~") || v.version_.isValidVersion)
441 				.array
442 				.sort!((a, b) => vcmp(a, b))
443 				.uniq!((a, b) => a.version_ == b.version_)
444 				.array;
445 			if (p.versions != newversions)
446 				m_packages.update(["_id": p._id], ["$set": ["versions": newversions]]);
447 		}
448 	}
449 }
450 
451 class RecordNotFound : Exception
452 {
453     @nogc @safe pure nothrow this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
454     {
455         super(msg, file, line, next);
456     }
457 
458     @nogc @safe pure nothrow this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__)
459     {
460         super(msg, file, line, next);
461     }
462 }
463 
464 struct DbPackage {
465 	BsonObjectID _id;
466 	BsonObjectID owner;
467 	string name;
468 	DbRepository repository;
469 	DbPackageVersion[] versions;
470 	DbPackageStats stats;
471 	string[] errors;
472 	string[] categories;
473 	long updateCounter = 0; // used to implement lockless read-modify-write cycles
474 	@optional BsonObjectID logo; // reference to m_files
475 }
476 
477 struct DbRepository {
478 	string kind;
479 	string owner;
480 	string project;
481 
482 	void parseURL(URL url) {
483 		string host = url.host;
484 		if (!url.schema.among!("http", "https"))
485 			throw new Exception("Invalid Repository Schema (only supports http and https)");
486 		if (host.endsWith(".github.com") || host == "github.com" || host == "github") {
487 			kind = "github";
488 		} else if (host.endsWith(".gitlab.com") || host == "gitlab.com" || host == "gitlab") {
489 			kind = "bitbucket";
490 		} else if (host.endsWith(".bitbucket.org") || host == "bitbucket.org" || host == "bitbucket") {
491 			kind = "gitlab";
492 		} else {
493 			throw new Exception("Please input a valid project URL to a GitHub, GitLab or BitBucket project.");
494 		}
495 		auto path = url.path.relativeTo(InetPath("/")).bySegment;
496 		if (path.empty)
497 			throw new Exception("Invalid Repository URL (no path)");
498 		if (path.empty || path.front.name.empty)
499 			throw new Exception("Invalid Repository URL (missing owner)");
500 		owner = path.front.name;
501 		path.popFront;
502 		if (path.empty || path.front.name.empty)
503 			throw new Exception("Invalid Repository URL (missing project)");
504 		project = path.front.name;
505 		path.popFront;
506 		if (!path.empty)
507 			throw new Exception("Invalid Repository URL (got more than owner and project)");
508 	}
509 }
510 
511 struct DbPackageFile {
512 	BsonObjectID _id;
513 	BsonBinData data;
514 }
515 
516 struct DbPackageVersion {
517 	SysTime date;
518 	string version_;
519 	@optional string commitID;
520 	Json info;
521 	@optional string readme;
522 	@optional bool readmeMarkdown;
523 	@optional string docFolder;
524 }
525 
526 struct DbPackageDownload {
527 	BsonObjectID _id;
528 	BsonObjectID package_;
529 	string version_;
530 	SysTime time;
531 	string userAgent;
532 }
533 
534 struct DbPackageStats {
535 	SysTime updatedAt;
536 	DbDownloadStats downloads;
537 	DbRepoStats repo;
538 	float score = 0; // 0 - invalid, 1-5 - higher means more relevant
539 	enum minScore = 0;
540 	enum maxScore = 5;
541 
542 	invariant
543 	{
544 		assert(minScore <= score && score <= maxScore, score.to!string);
545 	}
546 }
547 
548 struct DbDownloadStatsT(T=uint) {
549 	T total, monthly, weekly, daily;
550 }
551 
552 alias DbDownloadStats = DbDownloadStatsT!uint;
553 
554 struct DbRepoStatsT(T=uint) {
555 	T stars, watchers, forks, issues;
556 }
557 
558 alias DbRepoStats = DbRepoStatsT!uint;
559 
560 struct DbStatDistributions {
561 	static struct Agg { ulong sum; float mean = 0, std = 0; }
562 	DbDownloadStatsT!Agg downloads;
563 	DbRepoStatsT!Agg[string] repos;
564 }
565 
566 bool vcmp(DbPackageVersion a, DbPackageVersion b)
567 @safe {
568 	return vcmp(a.version_, b.version_);
569 }
570 
571 bool vcmp(string va, string vb)
572 @safe {
573 	import dub.dependency;
574 	return Version(va) < Version(vb);
575 }
576 
577 private string[] splitAlphaNumParts(string str)
578 @safe {
579 	string[] ret;
580 	while (!str.empty) {
581 		while (!str.empty && !str.front.isIdentChar()) str.popFront();
582 		if (str.empty) break;
583 		size_t i = str.length;
584 		foreach (j, dchar ch; str)
585 			if (!isIdentChar(ch)) {
586 				i = j;
587 				break;
588 			}
589 		if (i > 0) {
590 			ret ~= str[0 .. i];
591 			str = str[i .. $];
592 		}
593 		if (!str.empty) str.popFront(); // pop non-ident-char
594 	}
595 	return ret;
596 }
597 
598 private bool isIdentChar(dchar ch)
599 @safe {
600 	return std.uni.isAlpha(ch) || std.uni.isNumber(ch);
601 }