1 /** 2 Copyright: © 2013 rejectedsoftware e.K. 3 License: Subject to the terms of the GNU GPLv3 license, as written in the included LICENSE.txt file. 4 Authors: Sönke Ludwig 5 */ 6 module dubregistry.dbcontroller; 7 8 import dub.semver; 9 import std.array; 10 import std.algorithm; 11 import std.exception; 12 //import std.string; 13 import std.typecons : tuple; 14 import std.uni; 15 import vibe.vibe; 16 17 18 class DbController { 19 @safe: 20 21 private { 22 MongoCollection m_packages; 23 MongoCollection m_downloads; 24 MongoCollection m_files; 25 } 26 27 private alias bson = serializeToBson; 28 29 this(string dbname) 30 { 31 import dubregistry.mongodb : getMongoClient; 32 auto db = getMongoClient.getDatabase(dbname); 33 m_packages = db["packages"]; 34 m_downloads = db["downloads"]; 35 m_files = db["files"]; 36 37 // 38 // migrations: 39 // 40 41 version (DubRegistry_EnableLegacyMigrations) { 42 // update package format 43 foreach(p; m_packages.find()){ 44 bool any_change = false; 45 if (p["branches"].type == Bson.Type.object) { 46 Bson[] branches; 47 foreach (b; p["branches"].byValue) 48 branches ~= b; 49 p["branches"] = branches; 50 any_change = true; 51 } 52 if (p["branches"].type == Bson.Type.array) { 53 auto versions = p["versions"].get!(Bson[]); 54 foreach (b; p["branches"].byValue) versions ~= b; 55 p["branches"] = Bson(null); 56 p["versions"] = Bson(versions); 57 any_change = true; 58 } 59 if (any_change) m_packages.update(["_id": p["_id"]], p); 60 } 61 62 // add updateCounter field for packages that don't have it yet 63 m_packages.update(["updateCounter": ["$exists": false]], ["$set" : ["updateCounter" : 0L]], UpdateFlags.multiUpdate); 64 } 65 66 // add default non-@optional stats to packages 67 DbPackageStats stats; 68 m_packages.update(["stats": ["$exists": false]], ["$set": ["stats": stats]], UpdateFlags.multiUpdate); 69 70 // rename stats.rating -> stats.score 71 m_packages.update(Bson.emptyObject(), ["$rename": ["stats.rating": "stats.score"]], UpdateFlags.multiUpdate); 72 73 // default initialize missing scores with zero 74 float score = 0; 75 m_packages.update(["stats.score": ["$exists": false]], ["$set": ["stats.score": score]], UpdateFlags.multiUpdate); 76 77 // remove old logo fields 78 m_packages.update(["logoHash": ["$exists": true]], ["$unset": ["logo": 0, "logoHash": 0]], UpdateFlags.multiUpdate); 79 80 // create indices 81 m_packages.ensureIndex([tuple("name", 1)], IndexFlags.Unique); 82 m_packages.ensureIndex([tuple("stats.score", 1)]); 83 m_downloads.ensureIndex([tuple("package", 1), tuple("version", 1)]); 84 85 // drop old text index versions 86 db.runCommand(["dropIndexes": "packages", "index": "packages_full_text_search_index"]); 87 db.runCommand(["dropIndexes": "packages", "index": "packages_full_text_search_index_v2"]); 88 89 // add current text index 90 immutable keyWeights = [ 91 "name": 8, 92 "categories": 4, 93 "versions.info.description": 2, 94 "versions.info.authors": 1 95 ]; 96 Bson[string] fts; 97 fts["key"] = Bson.emptyObject; 98 fts["weights"] = Bson.emptyObject; 99 foreach (k, w; keyWeights) 100 { 101 fts["key"][k] = Bson("text"); 102 fts["weights"][k] = Bson(w); 103 } 104 fts["name"] = "packages_full_text_search_index_v3"; 105 fts["background"] = true; 106 auto cmd = Bson.emptyObject; 107 cmd["createIndexes"] = Bson("packages"); 108 cmd["indexes"] = [Bson(fts)]; 109 auto res = db.runCommand(cmd); 110 enforce(res["ok"].opt!double == 1.0, "Failed to create search index.\n"~res.toString); 111 112 version (DubRegistry_RepairVersionOrder) { 113 // sort package versions newest to oldest 114 // NOTE: since quite a while, versions are inserted atomically 115 // in the proper order, so that this is not necessary as a 116 // general precaution anymore 117 repairVersionOrder(); 118 } 119 } 120 121 void addPackage(ref DbPackage pack) 122 { 123 enforce(m_packages.findOne(["name": pack.name], ["_id": true]).isNull(), "A package with the same name is already registered."); 124 if (pack._id == BsonObjectID.init) 125 pack._id = BsonObjectID.generate(); 126 m_packages.insert(pack); 127 } 128 129 void addOrSetPackage(ref DbPackage pack) 130 { 131 enforce(pack._id != BsonObjectID.init, "Cannot update a packag with no ID."); 132 m_packages.update(["_id": pack._id], pack, UpdateFlags.upsert); 133 } 134 135 DbPackage getPackage(string packname) 136 { 137 auto pack = m_packages.findOne!DbPackage(["name": packname]); 138 enforce!RecordNotFound(!pack.isNull(), "Unknown package name."); 139 return pack.get; 140 } 141 142 auto getPackages(scope string[] packnames...) 143 { 144 return m_packages.find!DbPackage(["name": ["$in": serializeToBson(packnames)]]); 145 } 146 147 BsonObjectID getPackageID(string packname) 148 { 149 static struct PID { BsonObjectID _id; } 150 auto pid = m_packages.findOne!PID(["name": packname], ["_id": 1]); 151 enforce(!pid.isNull(), "Unknown package name."); 152 return pid.get._id; 153 } 154 155 DbPackage getPackage(BsonObjectID id) 156 { 157 auto pack = m_packages.findOne!DbPackage(["_id": id]); 158 enforce!RecordNotFound(!pack.isNull(), "Unknown package ID."); 159 return pack.get; 160 } 161 162 auto getAllPackages() 163 { 164 return m_packages.find(Bson.emptyObject, ["name": 1]).map!(p => p["name"].get!string)(); 165 } 166 167 auto getAllPackageIDs() 168 { 169 return m_packages.find(Bson.emptyObject, ["_id": 1]).map!(p => p["_id"].get!BsonObjectID)(); 170 } 171 172 auto getPackageDump() 173 { 174 return m_packages.find!DbPackage(Bson.emptyObject); 175 } 176 177 auto getUserPackages(BsonObjectID user_id) 178 { 179 return m_packages.find(["owner": user_id], ["name": 1]).map!(p => p["name"].get!string)(); 180 } 181 182 bool isUserPackage(BsonObjectID user_id, string package_name) 183 { 184 static struct PO { BsonObjectID owner; } 185 auto p = m_packages.findOne!PO(["name": package_name], ["owner": 1]); 186 return !p.isNull && p.get.owner == user_id; 187 } 188 189 void removePackage(string packname, BsonObjectID user) 190 { 191 m_packages.remove(["name": Bson(packname), "owner": Bson(user)]); 192 } 193 194 void setPackageErrors(string packname, string[] error...) 195 { 196 m_packages.update(["name": packname], ["$set": ["errors": error]]); 197 } 198 199 void setPackageCategories(string packname, string[] categories...) 200 { 201 m_packages.update(["name": packname], ["$set": ["categories": categories]]); 202 } 203 204 void setPackageRepository(string packname, DbRepository repo) 205 { 206 m_packages.update(["name": packname], ["$set": ["repository": repo]]); 207 } 208 209 void setPackageLogo(string packname, bdata_t png) 210 { 211 Bson update; 212 213 if (png.length) { 214 auto id = BsonObjectID.generate(); 215 m_files.insert([ 216 "_id": Bson(id), 217 "data": Bson(BsonBinData(BsonBinData.Type.generic, png)) 218 ]); 219 220 update = serializeToBson(["$set": ["logo": id]]); 221 } else { 222 update = serializeToBson(["$unset": ["logo": 0]]); 223 } 224 225 // remove existing logo file 226 auto l = m_packages.findOne(["name": packname], ["logo": 1]); 227 if (!l.isNull && !l.tryIndex("logo").isNull) 228 m_files.remove(["_id": l["logo"]]); 229 230 // set the new logo 231 m_packages.update(["name": packname], update); 232 } 233 234 void setDocumentationURL(string packname, string documentationURL) 235 { 236 m_packages.update(["name": packname], ["$set": ["documentationURL": documentationURL]]); 237 } 238 239 bdata_t getPackageLogo(string packname, out bdata_t rev) 240 { 241 auto bpack = m_packages.findOne(["name": packname], ["logo": 1]); 242 if (bpack.isNull) return null; 243 244 auto id = bpack.tryIndex("logo"); 245 if (id.isNull) return null; 246 247 auto data = m_files.findOne!DbPackageFile(["_id": id.get]); 248 if (data.isNull()) return null; 249 250 rev = (cast(ubyte[])id.get.get!BsonObjectID).idup; 251 return data.get.data.rawData; 252 } 253 254 void addVersion(string packname, DbPackageVersion ver) 255 { 256 assert(ver.version_.startsWith("~") || ver.version_.isValidVersion()); 257 258 size_t nretrys = 0; 259 260 while (true) { 261 auto pack = m_packages.findOne(["name": packname], ["versions": true, "updateCounter": true]); 262 auto counter = pack["updateCounter"].get!long; 263 auto versions = deserializeBson!(DbPackageVersion[])(pack["versions"]); 264 auto new_versions = versions ~ ver; 265 new_versions.sort!((a, b) => vcmp(a, b)); 266 267 // remove versions with invalid dependency names to avoid the findAndModify below to fail 268 () @trusted { 269 new_versions = new_versions.filter!( 270 v => !v.info["dependencies"].opt!(Json[string]).byKey.canFind!(k => k.canFind(".")) 271 ).array; 272 } (); 273 274 //assert((cast(Json)bversions).toString() == (cast(Json)serializeToBson(versions)).toString()); 275 276 auto res = m_packages.findAndModify( 277 ["name": Bson(packname), "updateCounter": Bson(counter)], 278 ["$set": ["versions": serializeToBson(new_versions), "updateCounter": Bson(counter+1)]], 279 ["_id": true]); 280 281 if (!res.isNull) return; 282 283 enforce(nretrys++ < 20, format("Failed to store updated version list for %s", packname)); 284 logDebug("Failed to update version list atomically, retrying..."); 285 } 286 } 287 288 void removeVersion(string packname, string ver) 289 { 290 assert(ver.startsWith("~") || ver.isValidVersion()); 291 m_packages.update(["name": packname], ["$pull": ["versions": ["version": ver]]]); 292 } 293 294 void updateVersion(string packname, DbPackageVersion ver) 295 { 296 assert(ver.version_.startsWith("~") || ver.version_.isValidVersion()); 297 m_packages.update(["name": packname, "versions.version": ver.version_], ["$set": ["versions.$": ver]]); 298 } 299 300 bool hasVersion(string packname, string ver) 301 { 302 auto ret = m_packages.findOne(["name": packname, "versions.version" : ver], ["_id": true]); 303 return !ret.isNull(); 304 } 305 306 string getLatestVersion(string packname) 307 { 308 auto slice = serializeToBson(["$slice": -1]); 309 auto pack = m_packages.findOne(["name": packname], ["_id": Bson(true), "versions": slice]); 310 if (pack.isNull() || pack["versions"].isNull() || pack["versions"].length != 1) return null; 311 return deserializeBson!(string)(pack["versions"][0]["version"]); 312 } 313 314 DbPackageVersion getVersionInfo(string packname, string ver) 315 { 316 auto pack = m_packages.findOne(["name": packname, "versions.version": ver], ["versions.$": true]); 317 enforce(!pack.isNull(), "unknown package/version"); 318 assert(pack["versions"].length == 1); 319 return deserializeBson!(DbPackageVersion)(pack["versions"][0]); 320 } 321 322 DbPackage[] searchPackages(string query) 323 { 324 import std.math : round; 325 326 if (!query.strip.length) { 327 return m_packages.find() 328 .sort(["stats.score": 1]) 329 .map!(deserializeBson!DbPackage) 330 .array; 331 } 332 333 auto pkgs = m_packages 334 .find(["$text": ["$search": query]], ["textScore": bson(["$meta": "textScore"])]) 335 .sort(["textScore": bson(["$meta": "textScore"])]) // sort to only keep most relevant results 336 .limit(50) // limit irrelevant sort results (fixes #341) 337 .map!(deserializeBson!DbPackage) 338 .array; 339 340 // normalize textScore to same scale as package score 341 immutable minMaxTS = pkgs.map!(p => p.textScore).fold!(min, max)(0.0f, 0.0f); 342 immutable scale = (DbPackageStats.maxScore - DbPackageStats.minScore) / (minMaxTS[1] - minMaxTS[0]); 343 foreach (ref pkg; pkgs) 344 pkg.textScore = (pkg.textScore - minMaxTS[0]) * scale + DbPackageStats.minScore; 345 346 // sort found packages by weighted textScore and package score 347 return pkgs 348 .sort!((a, b) => a.stats.score + 2 * a.textScore > b.stats.score + 2 * b.textScore) 349 .release; 350 } 351 352 BsonObjectID addDownload(BsonObjectID pack, string ver, string user_agent) 353 { 354 DbPackageDownload download; 355 download._id = BsonObjectID.generate(); 356 download.package_ = pack; 357 download.version_ = ver; 358 download.time = Clock.currTime(UTC()); 359 download.userAgent = user_agent; 360 m_downloads.insert(download); 361 return download._id; 362 } 363 364 DbPackageStats getPackageStats(string packname) 365 { 366 static struct PS { DbPackageStats stats; } 367 auto pack = m_packages.findOne!PS(["name": Bson(packname)], ["stats": true]); 368 enforce!RecordNotFound(!pack.isNull(), "Unknown package name."); 369 logDebug("getPackageStats(%s) %s", packname, pack.get.stats); 370 return pack.get.stats; 371 } 372 373 void updatePackageStats(BsonObjectID packId, ref DbPackageStats stats) 374 { 375 stats.updatedAt = Clock.currTime(UTC()); 376 logDebug("updatePackageStats(%s, %s)", packId, stats); 377 m_packages.update(["_id": packId], ["$set": ["stats": stats]]); 378 } 379 380 DbDownloadStats aggregateDownloadStats(BsonObjectID packId, string ver = null) 381 { 382 static Bson newerThan(SysTime time) 383 { 384 // doc.time >= time ? 1 : 0 385 alias bs = serializeToBson; 386 return bs([ 387 "$cond": [bs(["$gte": [bs("$time"), bs(time)]]), bs(1), bs(0)] 388 ]); 389 } 390 391 auto match = Bson.emptyObject(); 392 match["package"] = Bson(packId); 393 if (ver.length) match["version"] = ver; 394 395 immutable now = Clock.currTime; 396 auto res = () @trusted { return m_downloads.aggregate( 397 ["$match": match], 398 ["$project": [ 399 "_id": Bson(false), 400 "total": serializeToBson(["$literal": 1]), 401 "monthly": newerThan(now - 30.days), 402 "weekly": newerThan(now - 7.days), 403 "daily": newerThan(now - 1.days)]], 404 ["$group": [ 405 "_id": Bson(null), // single group 406 "total": Bson(["$sum": Bson("$total")]), 407 "monthly": Bson(["$sum": Bson("$monthly")]), 408 "weekly": Bson(["$sum": Bson("$weekly")]), 409 "daily": Bson(["$sum": Bson("$daily")])]]); 410 } (); 411 assert(res.length <= 1); 412 return res.length ? deserializeBson!DbDownloadStats(res[0]) : DbDownloadStats.init; 413 } 414 415 DbStatDistributions getStatDistributions() 416 { 417 auto aggregate(T, string prefix, string groupBy)() 418 @safe { 419 auto group = ["_id": Bson(groupBy ? "$"~groupBy : null)]; 420 Bson[string] project; 421 foreach (mem; __traits(allMembers, T)) 422 { 423 static assert(is(typeof(__traits(getMember, T.init, mem)) == DbStatDistributions.Agg)); 424 static assert([__traits(allMembers, DbStatDistributions.Agg)] == ["sum", "mean", "std"]); 425 group[mem~"_sum"] = bson(["$sum": "$"~prefix~"."~mem]); 426 group[mem~"_mean"] = bson(["$avg": "$"~prefix~"."~mem]); 427 group[mem~"_std"] = bson(["$stdDevPop": "$"~prefix~"."~mem]); 428 project[mem] = bson([ 429 "mean": "$"~mem~"_mean", 430 "sum": "$"~mem~"_sum", 431 "std": "$"~mem~"_std" 432 ]); 433 } 434 auto res = () @trusted { 435 return m_packages.aggregate(["$group": group], ["$project": project]); 436 } (); 437 438 static if (groupBy is null) 439 { 440 if (res.length == 0) 441 return T.init; 442 assert(res.length == 1); 443 return res[0].deserializeBson!T; 444 } 445 else 446 { 447 T[string] ret; 448 foreach (doc; res.byValue) 449 ret[doc["_id"].get!string] = doc.deserializeBson!T; 450 return ret; 451 } 452 } 453 454 DbStatDistributions ret; 455 ret.downloads = aggregate!(typeof(ret.downloads), "stats.downloads", null); 456 ret.repos = aggregate!(typeof(ret.repos[""]), "stats.repo", "repository.kind"); 457 return ret; 458 } 459 460 private void repairVersionOrder() 461 { 462 foreach( bp; m_packages.find() ){ 463 auto p = deserializeBson!DbPackage(bp); 464 auto newversions = p.versions 465 .filter!(v => v.version_.startsWith("~") || v.version_.isValidVersion) 466 .array 467 .sort!((a, b) => vcmp(a, b)) 468 .uniq!((a, b) => a.version_ == b.version_) 469 .array; 470 if (p.versions != newversions) 471 m_packages.update(["_id": p._id], ["$set": ["versions": newversions]]); 472 } 473 } 474 } 475 476 class RecordNotFound : Exception 477 { 478 @nogc @safe pure nothrow this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 479 { 480 super(msg, file, line, next); 481 } 482 483 @nogc @safe pure nothrow this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__) 484 { 485 super(msg, file, line, next); 486 } 487 } 488 489 struct DbPackage { 490 BsonObjectID _id; 491 BsonObjectID owner; 492 string name; 493 DbRepository repository; 494 DbPackageVersion[] versions; 495 DbPackageStats stats; 496 string[] errors; 497 string[] categories; 498 long updateCounter = 0; // used to implement lockless read-modify-write cycles 499 @optional BsonObjectID logo; // reference to m_files 500 @optional string documentationURL; 501 @optional float textScore = 0; // for FTS textScore in searchPackages 502 } 503 504 struct DbRepository { 505 string kind; 506 string owner; 507 string project; 508 509 void parseURL(URL url) 510 { 511 string host = url.host; 512 if (!url.schema.among!("http", "https")) 513 throw new Exception("Invalid Repository Schema (only supports http and https)"); 514 if (host.endsWith(".github.com") || host == "github.com" || host == "github") { 515 kind = "github"; 516 } else if (host.endsWith(".gitlab.com") || host == "gitlab.com" || host == "gitlab") { 517 kind = "gitlab"; 518 } else if (host.endsWith(".bitbucket.org") || host == "bitbucket.org" || host == "bitbucket") { 519 kind = "bitbucket"; 520 } else { 521 throw new Exception("Please input a valid project URL to a GitHub, GitLab or BitBucket project."); 522 } 523 auto path = url.path.relativeTo(InetPath("/")).bySegment; 524 if (path.empty) 525 throw new Exception("Invalid Repository URL (no path)"); 526 if (path.empty || path.front.name.empty) 527 throw new Exception("Invalid Repository URL (missing owner)"); 528 owner = path.front.name; 529 path.popFront; 530 if (path.empty || path.front.name.empty) 531 throw new Exception("Invalid Repository URL (missing project)"); 532 533 if(kind == "gitlab") // Allow any number of segments, as GitLab's subgroups can be nested 534 project = path.map!"a.name".join("/"); 535 else 536 project = path.front.name; 537 path.popFront; 538 if (!path.empty && kind != "gitlab") 539 throw new Exception("Invalid Repository URL (got more than owner and project)"); 540 } 541 542 unittest { 543 DbRepository r; 544 r.parseURL(URL("https://github.com/foo/bar")); 545 assert(r == DbRepository("github", "foo", "bar")); 546 r.parseURL(URL("http://bitbucket.org/bar/baz/")); 547 assert(r == DbRepository("bitbucket", "bar", "baz")); 548 r.parseURL(URL("https://gitlab.com/foo/bar")); 549 assert(r == DbRepository("gitlab", "foo", "bar")); 550 r.parseURL(URL("https://gitlab.com/group/subgroup/subsubgroup/project")); 551 assert(r == DbRepository("gitlab", "group", "subgroup/subsubgroup/project")); 552 assertThrown(r.parseURL(URL("ftp://github.com/foo/bar"))); 553 assertThrown(r.parseURL(URL("ftp://github.com/foo/bar"))); 554 assertThrown(r.parseURL(URL("http://github.com/foo/"))); 555 assertThrown(r.parseURL(URL("http://github.com/"))); 556 assertThrown(r.parseURL(URL("http://github.com/foo/bar/baz"))); 557 } 558 } 559 560 struct DbPackageFile { 561 BsonObjectID _id; 562 BsonBinData data; 563 } 564 565 struct DbPackageVersion { 566 SysTime date; 567 string version_; 568 @optional string commitID; 569 Json info; 570 @optional string readme; 571 @optional bool readmeMarkdown; 572 @optional string docFolder; 573 } 574 575 struct DbPackageDownload { 576 BsonObjectID _id; 577 BsonObjectID package_; 578 string version_; 579 SysTime time; 580 string userAgent; 581 } 582 583 struct DbPackageStats { 584 SysTime updatedAt; 585 DbDownloadStats downloads; 586 DbRepoStats repo; 587 float score = 0; // 0 - invalid, 1-5 - higher means more relevant 588 enum minScore = 0; 589 enum maxScore = 5; 590 591 invariant 592 { 593 assert(minScore <= score && score <= maxScore, score.to!string); 594 } 595 } 596 597 struct DbDownloadStatsT(T=uint) { 598 T total, monthly, weekly, daily; 599 } 600 601 alias DbDownloadStats = DbDownloadStatsT!uint; 602 603 struct DbRepoStatsT(T=uint) { 604 T stars, watchers, forks, issues; 605 } 606 607 alias DbRepoStats = DbRepoStatsT!uint; 608 609 struct DbStatDistributions { 610 static struct Agg { ulong sum; float mean = 0, std = 0; } 611 DbDownloadStatsT!Agg downloads; 612 DbRepoStatsT!Agg[string] repos; 613 } 614 615 bool vcmp(DbPackageVersion a, DbPackageVersion b) 616 @safe { 617 return vcmp(a.version_, b.version_); 618 } 619 620 bool vcmp(string va, string vb) 621 @safe { 622 import dub.dependency; 623 return Version(va) < Version(vb); 624 } 625 626 private string[] splitAlphaNumParts(string str) 627 @safe { 628 string[] ret; 629 while (!str.empty) { 630 while (!str.empty && !str.front.isIdentChar()) str.popFront(); 631 if (str.empty) break; 632 size_t i = str.length; 633 foreach (j, dchar ch; str) 634 if (!isIdentChar(ch)) { 635 i = j; 636 break; 637 } 638 if (i > 0) { 639 ret ~= str[0 .. i]; 640 str = str[i .. $]; 641 } 642 if (!str.empty) str.popFront(); // pop non-ident-char 643 } 644 return ret; 645 } 646 647 private bool isIdentChar(dchar ch) 648 @safe { 649 return std.uni.isAlpha(ch) || std.uni.isNumber(ch); 650 }