1 /** 2 Copyright: © 2013 rejectedsoftware e.K. 3 License: Subject to the terms of the GNU GPLv3 license, as written in the included LICENSE.txt file. 4 Authors: Sönke Ludwig 5 */ 6 module dubregistry.dbcontroller; 7 8 import dub.semver; 9 import std.array; 10 import std.algorithm; 11 import std.exception; 12 //import std.string; 13 import std.typecons : tuple; 14 import std.uni; 15 import vibe.vibe; 16 17 18 class DbController { 19 @safe: 20 21 private { 22 MongoCollection m_packages; 23 MongoCollection m_downloads; 24 MongoCollection m_files; 25 } 26 27 private alias bson = serializeToBson; 28 29 this(string dbname) 30 { 31 auto db = connectMongoDB("127.0.0.1").getDatabase(dbname); 32 m_packages = db["packages"]; 33 m_downloads = db["downloads"]; 34 m_files = db["files"]; 35 36 // 37 // migrations: 38 // 39 40 version (DubRegistry_EnableLegacyMigrations) { 41 // update package format 42 foreach(p; m_packages.find()){ 43 bool any_change = false; 44 if (p["branches"].type == Bson.Type.object) { 45 Bson[] branches; 46 foreach (b; p["branches"].byValue) 47 branches ~= b; 48 p["branches"] = branches; 49 any_change = true; 50 } 51 if (p["branches"].type == Bson.Type.array) { 52 auto versions = p["versions"].get!(Bson[]); 53 foreach (b; p["branches"].byValue) versions ~= b; 54 p["branches"] = Bson(null); 55 p["versions"] = Bson(versions); 56 any_change = true; 57 } 58 if (any_change) m_packages.update(["_id": p["_id"]], p); 59 } 60 61 // add updateCounter field for packages that don't have it yet 62 m_packages.update(["updateCounter": ["$exists": false]], ["$set" : ["updateCounter" : 0L]], UpdateFlags.multiUpdate); 63 } 64 65 // add default non-@optional stats to packages 66 DbPackageStats stats; 67 m_packages.update(["stats": ["$exists": false]], ["$set": ["stats": stats]], UpdateFlags.multiUpdate); 68 69 // rename stats.rating -> stats.score 70 m_packages.update(Bson.emptyObject(), ["$rename": ["stats.rating": "stats.score"]], UpdateFlags.multiUpdate); 71 72 // default initialize missing scores with zero 73 float score = 0; 74 m_packages.update(["stats.score": ["$exists": false]], ["$set": ["stats.score": score]], UpdateFlags.multiUpdate); 75 76 // remove old logo fields 77 m_packages.update(["logoHash": ["$exists": true]], ["$unset": ["logo": 0, "logoHash": 0]], UpdateFlags.multiUpdate); 78 79 // create indices 80 m_packages.ensureIndex([tuple("name", 1)], IndexFlags.Unique); 81 m_packages.ensureIndex([tuple("stats.score", 1)]); 82 m_downloads.ensureIndex([tuple("package", 1), tuple("version", 1)]); 83 84 // drop old text index versions 85 db.runCommand(["dropIndexes": "packages", "index": "packages_full_text_search_index"]); 86 87 // add current text index 88 Bson[string] doc; 89 doc["v"] = 1; 90 doc["key"] = ["_fts": Bson("text"), "_ftsx": Bson(1)]; 91 doc["ns"] = db.name ~ "." ~ m_packages.name; 92 doc["name"] = "packages_full_text_search_index_v2"; 93 doc["weights"] = [ 94 "name": Bson(4), 95 "categories": Bson(3), 96 "versions.info.description" : Bson(3), 97 "versions.info.homepage" : Bson(1), 98 "versions.info.author" : Bson(1), 99 "versions.readme" : Bson(2), 100 ]; 101 doc["background"] = true; 102 db["system.indexes"].insert(doc); 103 104 version (DubRegistry_RepairVersionOrder) { 105 // sort package versions newest to oldest 106 // NOTE: since quite a while, versions are inserted atomically 107 // in the proper order, so that this is not necessary as a 108 // general precaution anymore 109 repairVersionOrder(); 110 } 111 } 112 113 void addPackage(ref DbPackage pack) 114 { 115 enforce(m_packages.findOne(["name": pack.name], ["_id": true]).isNull(), "A package with the same name is already registered."); 116 if (pack._id == BsonObjectID.init) 117 pack._id = BsonObjectID.generate(); 118 m_packages.insert(pack); 119 } 120 121 void addOrSetPackage(ref DbPackage pack) 122 { 123 enforce(pack._id != BsonObjectID.init, "Cannot update a packag with no ID."); 124 m_packages.update(["_id": pack._id], pack, UpdateFlags.upsert); 125 } 126 127 DbPackage getPackage(string packname) 128 { 129 auto pack = m_packages.findOne!DbPackage(["name": packname]); 130 enforce!RecordNotFound(!pack.isNull(), "Unknown package name."); 131 return pack; 132 } 133 134 auto getPackages(scope string[] packnames...) 135 { 136 return m_packages.find!DbPackage(["name": ["$in": serializeToBson(packnames)]]); 137 } 138 139 BsonObjectID getPackageID(string packname) 140 { 141 static struct PID { BsonObjectID _id; } 142 auto pid = m_packages.findOne!PID(["name": packname], ["_id": 1]); 143 enforce(!pid.isNull(), "Unknown package name."); 144 return pid._id; 145 } 146 147 DbPackage getPackage(BsonObjectID id) 148 { 149 auto pack = m_packages.findOne!DbPackage(["_id": id]); 150 enforce!RecordNotFound(!pack.isNull(), "Unknown package ID."); 151 return pack; 152 } 153 154 auto getAllPackages() 155 { 156 return m_packages.find(Bson.emptyObject, ["name": 1]).map!(p => p["name"].get!string)(); 157 } 158 159 auto getAllPackageIDs() 160 { 161 return m_packages.find(Bson.emptyObject, ["_id": 1]).map!(p => p["_id"].get!BsonObjectID)(); 162 } 163 164 auto getPackageDump() 165 { 166 return m_packages.find!DbPackage(Bson.emptyObject); 167 } 168 169 auto getUserPackages(BsonObjectID user_id) 170 { 171 return m_packages.find(["owner": user_id], ["name": 1]).map!(p => p["name"].get!string)(); 172 } 173 174 bool isUserPackage(BsonObjectID user_id, string package_name) 175 { 176 static struct PO { BsonObjectID owner; } 177 auto p = m_packages.findOne!PO(["name": package_name], ["owner": 1]); 178 return !p.isNull && p.owner == user_id; 179 } 180 181 void removePackage(string packname, BsonObjectID user) 182 { 183 m_packages.remove(["name": Bson(packname), "owner": Bson(user)]); 184 } 185 186 void setPackageErrors(string packname, string[] error...) 187 { 188 m_packages.update(["name": packname], ["$set": ["errors": error]]); 189 } 190 191 void setPackageCategories(string packname, string[] categories...) 192 { 193 m_packages.update(["name": packname], ["$set": ["categories": categories]]); 194 } 195 196 void setPackageRepository(string packname, DbRepository repo) 197 { 198 m_packages.update(["name": packname], ["$set": ["repository": repo]]); 199 } 200 201 void setPackageLogo(string packname, bdata_t png) 202 { 203 Bson update; 204 205 if (png.length) { 206 auto id = BsonObjectID.generate(); 207 m_files.insert([ 208 "_id": Bson(id), 209 "data": Bson(BsonBinData(BsonBinData.Type.generic, png)) 210 ]); 211 212 update = serializeToBson(["$set": ["logo": id]]); 213 } else { 214 update = serializeToBson(["$unset": ["logo": 0]]); 215 } 216 217 // remove existing logo file 218 auto l = m_packages.findOne(["name": packname], ["logo": 1]); 219 if (!l.isNull && !l.tryIndex("logo").isNull) 220 m_files.remove(["_id": l["logo"]]); 221 222 // set the new logo 223 m_packages.update(["name": packname], update); 224 } 225 226 bdata_t getPackageLogo(string packname, out bdata_t rev) 227 { 228 auto bpack = m_packages.findOne(["name": packname], ["logo": 1]); 229 if (bpack.isNull) return null; 230 231 auto id = bpack.tryIndex("logo"); 232 if (id.isNull) return null; 233 234 auto data = m_files.findOne!DbPackageFile(["_id": id.get]); 235 if (data.isNull()) return null; 236 237 rev = (cast(ubyte[])id.get.get!BsonObjectID).idup; 238 return data.get.data.rawData; 239 } 240 241 void addVersion(string packname, DbPackageVersion ver) 242 { 243 assert(ver.version_.startsWith("~") || ver.version_.isValidVersion()); 244 245 size_t nretrys = 0; 246 247 while (true) { 248 auto pack = m_packages.findOne(["name": packname], ["versions": true, "updateCounter": true]); 249 auto counter = pack["updateCounter"].get!long; 250 auto versions = deserializeBson!(DbPackageVersion[])(pack["versions"]); 251 auto new_versions = versions ~ ver; 252 new_versions.sort!((a, b) => vcmp(a, b)); 253 254 // remove versions with invalid dependency names to avoid the findAndModify below to fail 255 () @trusted { 256 new_versions = new_versions.filter!( 257 v => !v.info["dependencies"].opt!(Json[string]).byKey.canFind!(k => k.canFind(".")) 258 ).array; 259 } (); 260 261 //assert((cast(Json)bversions).toString() == (cast(Json)serializeToBson(versions)).toString()); 262 263 auto res = m_packages.findAndModify( 264 ["name": Bson(packname), "updateCounter": Bson(counter)], 265 ["$set": ["versions": serializeToBson(new_versions), "updateCounter": Bson(counter+1)]], 266 ["_id": true]); 267 268 if (!res.isNull) return; 269 270 enforce(nretrys++ < 20, format("Failed to store updated version list for %s", packname)); 271 logDebug("Failed to update version list atomically, retrying..."); 272 } 273 } 274 275 void removeVersion(string packname, string ver) 276 { 277 assert(ver.startsWith("~") || ver.isValidVersion()); 278 m_packages.update(["name": packname], ["$pull": ["versions": ["version": ver]]]); 279 } 280 281 void updateVersion(string packname, DbPackageVersion ver) 282 { 283 assert(ver.version_.startsWith("~") || ver.version_.isValidVersion()); 284 m_packages.update(["name": packname, "versions.version": ver.version_], ["$set": ["versions.$": ver]]); 285 } 286 287 bool hasVersion(string packname, string ver) 288 { 289 auto ret = m_packages.findOne(["name": packname, "versions.version" : ver], ["_id": true]); 290 return !ret.isNull(); 291 } 292 293 string getLatestVersion(string packname) 294 { 295 auto slice = serializeToBson(["$slice": -1]); 296 auto pack = m_packages.findOne(["name": packname], ["_id": Bson(true), "versions": slice]); 297 if (pack.isNull() || pack["versions"].isNull() || pack["versions"].length != 1) return null; 298 return deserializeBson!(string)(pack["versions"][0]["version"]); 299 } 300 301 DbPackageVersion getVersionInfo(string packname, string ver) 302 { 303 auto pack = m_packages.findOne(["name": packname, "versions.version": ver], ["versions.$": true]); 304 enforce(!pack.isNull(), "unknown package/version"); 305 assert(pack["versions"].length == 1); 306 return deserializeBson!(DbPackageVersion)(pack["versions"][0]); 307 } 308 309 DbPackage[] searchPackages(string query) 310 { 311 import std.math : round; 312 313 if (!query.strip.length) { 314 return m_packages.find() 315 .sort(["stats.score": 1]) 316 .map!(deserializeBson!DbPackage) 317 .array; 318 } 319 320 return m_packages 321 .find(["$text": ["$search": query]], ["score": bson(["$meta": "textScore"])]) 322 .sort(["score": bson(["$meta": "textScore"])]) 323 .map!(deserializeBson!DbPackage) 324 .array 325 // sort by bucketized score preserving FTS score order 326 .sort!((a, b) => a.stats.score.round > b.stats.score.round, SwapStrategy.stable) 327 .release; 328 } 329 330 BsonObjectID addDownload(BsonObjectID pack, string ver, string user_agent) 331 { 332 DbPackageDownload download; 333 download._id = BsonObjectID.generate(); 334 download.package_ = pack; 335 download.version_ = ver; 336 download.time = Clock.currTime(UTC()); 337 download.userAgent = user_agent; 338 m_downloads.insert(download); 339 return download._id; 340 } 341 342 DbPackageStats getPackageStats(string packname) 343 { 344 static struct PS { DbPackageStats stats; } 345 auto pack = m_packages.findOne!PS(["name": Bson(packname)], ["stats": true]); 346 enforce!RecordNotFound(!pack.isNull(), "Unknown package name."); 347 logDebug("getPackageStats(%s) %s", packname, pack.stats); 348 return pack.stats; 349 } 350 351 void updatePackageStats(BsonObjectID packId, ref DbPackageStats stats) 352 { 353 stats.updatedAt = Clock.currTime(UTC()); 354 logDebug("updatePackageStats(%s, %s)", packId, stats); 355 m_packages.update(["_id": packId], ["$set": ["stats": stats]]); 356 } 357 358 DbDownloadStats aggregateDownloadStats(BsonObjectID packId, string ver = null) 359 { 360 static Bson newerThan(SysTime time) 361 { 362 // doc.time >= time ? 1 : 0 363 alias bs = serializeToBson; 364 return bs([ 365 "$cond": [bs(["$gte": [bs("$time"), bs(time)]]), bs(1), bs(0)] 366 ]); 367 } 368 369 auto match = Bson.emptyObject(); 370 match["package"] = Bson(packId); 371 if (ver.length) match["version"] = ver; 372 373 immutable now = Clock.currTime; 374 auto res = m_downloads.aggregate( 375 ["$match": match], 376 ["$project": [ 377 "_id": Bson(false), 378 "total": serializeToBson(["$literal": 1]), 379 "monthly": newerThan(now - 30.days), 380 "weekly": newerThan(now - 7.days), 381 "daily": newerThan(now - 1.days)]], 382 ["$group": [ 383 "_id": Bson(null), // single group 384 "total": Bson(["$sum": Bson("$total")]), 385 "monthly": Bson(["$sum": Bson("$monthly")]), 386 "weekly": Bson(["$sum": Bson("$weekly")]), 387 "daily": Bson(["$sum": Bson("$daily")])]]); 388 assert(res.length <= 1); 389 return res.length ? deserializeBson!DbDownloadStats(res[0]) : DbDownloadStats.init; 390 } 391 392 DbStatDistributions getStatDistributions() 393 { 394 auto aggregate(T, string prefix, string groupBy)() 395 @safe { 396 auto group = ["_id": Bson(groupBy ? "$"~groupBy : null)]; 397 Bson[string] project; 398 foreach (mem; __traits(allMembers, T)) 399 { 400 static assert(is(typeof(__traits(getMember, T.init, mem)) == DbStatDistributions.Agg)); 401 static assert([__traits(allMembers, DbStatDistributions.Agg)] == ["sum", "mean", "std"]); 402 group[mem~"_sum"] = bson(["$sum": "$"~prefix~"."~mem]); 403 group[mem~"_mean"] = bson(["$avg": "$"~prefix~"."~mem]); 404 group[mem~"_std"] = bson(["$stdDevPop": "$"~prefix~"."~mem]); 405 project[mem] = bson([ 406 "mean": "$"~mem~"_mean", 407 "sum": "$"~mem~"_sum", 408 "std": "$"~mem~"_std" 409 ]); 410 } 411 auto res = m_packages.aggregate(["$group": group], ["$project": project]); 412 413 static if (groupBy is null) 414 { 415 if (res.length == 0) 416 return T.init; 417 assert(res.length == 1); 418 return res[0].deserializeBson!T; 419 } 420 else 421 { 422 T[string] ret; 423 foreach (doc; res.byValue) 424 ret[doc["_id"].get!string] = doc.deserializeBson!T; 425 return ret; 426 } 427 } 428 429 DbStatDistributions ret; 430 ret.downloads = aggregate!(typeof(ret.downloads), "stats.downloads", null); 431 ret.repos = aggregate!(typeof(ret.repos[""]), "stats.repo", "repository.kind"); 432 return ret; 433 } 434 435 private void repairVersionOrder() 436 { 437 foreach( bp; m_packages.find() ){ 438 auto p = deserializeBson!DbPackage(bp); 439 auto newversions = p.versions 440 .filter!(v => v.version_.startsWith("~") || v.version_.isValidVersion) 441 .array 442 .sort!((a, b) => vcmp(a, b)) 443 .uniq!((a, b) => a.version_ == b.version_) 444 .array; 445 if (p.versions != newversions) 446 m_packages.update(["_id": p._id], ["$set": ["versions": newversions]]); 447 } 448 } 449 } 450 451 class RecordNotFound : Exception 452 { 453 @nogc @safe pure nothrow this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 454 { 455 super(msg, file, line, next); 456 } 457 458 @nogc @safe pure nothrow this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__) 459 { 460 super(msg, file, line, next); 461 } 462 } 463 464 struct DbPackage { 465 BsonObjectID _id; 466 BsonObjectID owner; 467 string name; 468 DbRepository repository; 469 DbPackageVersion[] versions; 470 DbPackageStats stats; 471 string[] errors; 472 string[] categories; 473 long updateCounter = 0; // used to implement lockless read-modify-write cycles 474 @optional BsonObjectID logo; // reference to m_files 475 } 476 477 struct DbRepository { 478 string kind; 479 string owner; 480 string project; 481 482 void parseURL(URL url) { 483 string host = url.host; 484 if (!url.schema.among!("http", "https")) 485 throw new Exception("Invalid Repository Schema (only supports http and https)"); 486 if (host.endsWith(".github.com") || host == "github.com" || host == "github") { 487 kind = "github"; 488 } else if (host.endsWith(".gitlab.com") || host == "gitlab.com" || host == "gitlab") { 489 kind = "bitbucket"; 490 } else if (host.endsWith(".bitbucket.org") || host == "bitbucket.org" || host == "bitbucket") { 491 kind = "gitlab"; 492 } else { 493 throw new Exception("Please input a valid project URL to a GitHub, GitLab or BitBucket project."); 494 } 495 auto path = url.path.relativeTo(InetPath("/")).bySegment; 496 if (path.empty) 497 throw new Exception("Invalid Repository URL (no path)"); 498 if (path.empty || path.front.name.empty) 499 throw new Exception("Invalid Repository URL (missing owner)"); 500 owner = path.front.name; 501 path.popFront; 502 if (path.empty || path.front.name.empty) 503 throw new Exception("Invalid Repository URL (missing project)"); 504 project = path.front.name; 505 path.popFront; 506 if (!path.empty) 507 throw new Exception("Invalid Repository URL (got more than owner and project)"); 508 } 509 } 510 511 struct DbPackageFile { 512 BsonObjectID _id; 513 BsonBinData data; 514 } 515 516 struct DbPackageVersion { 517 SysTime date; 518 string version_; 519 @optional string commitID; 520 Json info; 521 @optional string readme; 522 @optional bool readmeMarkdown; 523 @optional string docFolder; 524 } 525 526 struct DbPackageDownload { 527 BsonObjectID _id; 528 BsonObjectID package_; 529 string version_; 530 SysTime time; 531 string userAgent; 532 } 533 534 struct DbPackageStats { 535 SysTime updatedAt; 536 DbDownloadStats downloads; 537 DbRepoStats repo; 538 float score = 0; // 0 - invalid, 1-5 - higher means more relevant 539 enum minScore = 0; 540 enum maxScore = 5; 541 542 invariant 543 { 544 assert(minScore <= score && score <= maxScore, score.to!string); 545 } 546 } 547 548 struct DbDownloadStatsT(T=uint) { 549 T total, monthly, weekly, daily; 550 } 551 552 alias DbDownloadStats = DbDownloadStatsT!uint; 553 554 struct DbRepoStatsT(T=uint) { 555 T stars, watchers, forks, issues; 556 } 557 558 alias DbRepoStats = DbRepoStatsT!uint; 559 560 struct DbStatDistributions { 561 static struct Agg { ulong sum; float mean = 0, std = 0; } 562 DbDownloadStatsT!Agg downloads; 563 DbRepoStatsT!Agg[string] repos; 564 } 565 566 bool vcmp(DbPackageVersion a, DbPackageVersion b) 567 @safe { 568 return vcmp(a.version_, b.version_); 569 } 570 571 bool vcmp(string va, string vb) 572 @safe { 573 import dub.dependency; 574 return Version(va) < Version(vb); 575 } 576 577 private string[] splitAlphaNumParts(string str) 578 @safe { 579 string[] ret; 580 while (!str.empty) { 581 while (!str.empty && !str.front.isIdentChar()) str.popFront(); 582 if (str.empty) break; 583 size_t i = str.length; 584 foreach (j, dchar ch; str) 585 if (!isIdentChar(ch)) { 586 i = j; 587 break; 588 } 589 if (i > 0) { 590 ret ~= str[0 .. i]; 591 str = str[i .. $]; 592 } 593 if (!str.empty) str.popFront(); // pop non-ident-char 594 } 595 return ret; 596 } 597 598 private bool isIdentChar(dchar ch) 599 @safe { 600 return std.uni.isAlpha(ch) || std.uni.isNumber(ch); 601 }