From de38887296bd4e6799ce98cf1093aa195aee67c9 Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Mon, 27 Jul 2020 19:52:36 +0200
Subject: [PATCH 1/9] Refactor owner updating to update changed data and remove
 old owners

---
 src/db/add_package.rs              | 109 ++++++++++++++++++++++++-----
 src/db/mod.rs                      |   5 +-
 src/docbuilder/rustwide_builder.rs |  14 +++-
 src/index/api.rs                   |  30 +++++---
 src/test/fakes.rs                  |  22 +++---
 5 files changed, 139 insertions(+), 41 deletions(-)

diff --git a/src/db/add_package.rs b/src/db/add_package.rs
index 985dcf265..b85b732ab 100644
--- a/src/db/add_package.rs
+++ b/src/db/add_package.rs
@@ -7,11 +7,11 @@ use std::{
 use crate::{
     docbuilder::BuildResult,
     error::Result,
-    index::api::{CrateOwner, RegistryCrateData},
+    index::api::{CrateData, CrateOwner, ReleaseData},
     storage::CompressionAlgorithm,
     utils::MetadataPackage,
 };
-use log::debug;
+use log::{debug, info};
 use postgres::Connection;
 use regex::Regex;
 use serde_json::Value;
@@ -32,7 +32,7 @@ pub(crate) fn add_package_into_database(
     default_target: &str,
     source_files: Option<Value>,
     doc_targets: Vec<String>,
-    registry_data: &RegistryCrateData,
+    registry_data: &ReleaseData,
     has_docs: bool,
     has_examples: bool,
     compression_algorithms: std::collections::HashSet<CompressionAlgorithm>,
@@ -117,7 +117,6 @@ pub(crate) fn add_package_into_database(
 
     add_keywords_into_database(&conn, &metadata_pkg, release_id)?;
     add_authors_into_database(&conn, &metadata_pkg, release_id)?;
-    add_owners_into_database(&conn, &registry_data.owners, crate_id)?;
     add_compression_into_database(&conn, compression_algorithms.into_iter(), release_id)?;
 
     // Update the crates table with the new release
@@ -328,31 +327,103 @@ fn add_authors_into_database(
     Ok(())
 }
 
+pub(crate) fn update_crate_data_in_database(
+    conn: &Connection,
+    name: &str,
+    registry_data: &CrateData,
+) -> Result<()> {
+    info!("Updating crate data for {}", name);
+    let crate_id = conn
+        .query("SELECT id FROM crates WHERE crates.name = $1", &[&name])?
+        .get(0)
+        .get(0);
+
+    update_owners_in_database(conn, &registry_data.owners, crate_id)?;
+
+    Ok(())
+}
+
 /// Adds owners into database
-fn add_owners_into_database(conn: &Connection, owners: &[CrateOwner], crate_id: i32) -> Result<()> {
+fn update_owners_in_database(
+    conn: &Connection,
+    owners: &[CrateOwner],
+    crate_id: i32,
+) -> Result<()> {
+    let existing_owners: Vec<String> = conn
+        .query(
+            "
+        SELECT login
+        FROM owners
+        INNER JOIN owner_rels
+            ON owner_rels.oid = owners.id
+        WHERE owner_rels.cid = $1
+    ",
+            &[&crate_id],
+        )?
+        .into_iter()
+        .map(|row| row.get(0))
+        .collect();
+
     for owner in owners {
+        debug!("Updating owner data for {}: {:?}", owner.login, owner);
+
+        // Update any existing owner data since it is mutable and could have changed since last
+        // time we pulled it
         let owner_id: i32 = {
-            let rows = conn.query("SELECT id FROM owners WHERE login = $1", &[&owner.login])?;
-            if !rows.is_empty() {
-                rows.get(0).get(0)
-            } else {
-                conn.query(
-                    "INSERT INTO owners (login, avatar, name, email)
-                                 VALUES ($1, $2, $3, $4)
-                                 RETURNING id",
-                    &[&owner.login, &owner.avatar, &owner.name, &owner.email],
-                )?
-                .get(0)
-                .get(0)
-            }
+            conn.query(
+                "
+                INSERT INTO owners (login, avatar, name, email)
+                VALUES ($1, $2, $3, $4)
+                ON CONFLICT (login) DO UPDATE
+                    SET
+                        avatar = $2,
+                        name = $3,
+                        email = $4
+                RETURNING id
+            ",
+                &[&owner.login, &owner.avatar, &owner.name, &owner.email],
+            )?
+            .get(0)
+            .get(0)
         };
 
         // add relationship
-        let _ = conn.query(
+        let updated = conn.query(
             "INSERT INTO owner_rels (cid, oid) VALUES ($1, $2)",
             &[&crate_id, &owner_id],
         );
+
+        match updated {
+            Ok(_) => debug!("Added new owner relationship"),
+            Err(e)
+                if e.as_db().and_then(|e| e.constraint.as_deref())
+                    == Some("owner_rels_cid_oid_key") =>
+            {
+                debug!("Existing owner relationship");
+            }
+            Err(e) => return Err(e.into()),
+        }
     }
+
+    let to_remove = existing_owners
+        .iter()
+        .filter(|login| !owners.iter().any(|owner| &&owner.login == login));
+
+    for login in to_remove {
+        debug!("Removing owner relationship {}", login);
+        // remove relationship
+        conn.query(
+            "
+            DELETE FROM owner_rels
+            USING owners
+            WHERE owner_rels.cid = $1
+                AND owner_rels.oid = owners.id
+                AND owners.login = $2
+        ",
+            &[&crate_id, &login],
+        )?;
+    }
+
     Ok(())
 }
 
diff --git a/src/db/mod.rs b/src/db/mod.rs
index 732f1cbdf..8d9772708 100644
--- a/src/db/mod.rs
+++ b/src/db/mod.rs
@@ -1,7 +1,8 @@
 //! Database operations
 
-pub(crate) use self::add_package::add_build_into_database;
-pub(crate) use self::add_package::add_package_into_database;
+pub(crate) use self::add_package::{
+    add_build_into_database, add_package_into_database, update_crate_data_in_database,
+};
 pub use self::delete::{delete_crate, delete_version};
 pub use self::file::add_path_into_database;
 pub use self::migrate::migrate;
diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs
index 85bb2c2f3..e5d7bb86a 100644
--- a/src/docbuilder/rustwide_builder.rs
+++ b/src/docbuilder/rustwide_builder.rs
@@ -2,7 +2,9 @@ use super::DocBuilder;
 use super::Metadata;
 use crate::db::blacklist::is_blacklisted;
 use crate::db::file::add_path_into_database;
-use crate::db::{add_build_into_database, add_package_into_database, Pool};
+use crate::db::{
+    add_build_into_database, add_package_into_database, update_crate_data_in_database, Pool,
+};
 use crate::docbuilder::{crates::crates_from_path, Limits};
 use crate::error::Result;
 use crate::storage::CompressionAlgorithms;
@@ -405,13 +407,21 @@ impl RustwideBuilder {
                     &res.target,
                     files_list,
                     successful_targets,
-                    &doc_builder.index.api().get_crate_data(name, version),
+                    &doc_builder.index.api().get_release_data(name, version),
                     has_docs,
                     has_examples,
                     algs,
                 )?;
+
                 add_build_into_database(&conn, release_id, &res.result)?;
 
+                // Some crates.io crate data is mutable, so we proactively update it during a release
+                update_crate_data_in_database(
+                    &conn,
+                    name,
+                    &doc_builder.index.api().get_crate_data(name),
+                )?;
+
                 doc_builder.add_to_cache(name, version);
                 Ok(res)
             })?;
diff --git a/src/index/api.rs b/src/index/api.rs
index 7dbbe5c58..445b14298 100644
--- a/src/index/api.rs
+++ b/src/index/api.rs
@@ -14,18 +14,25 @@ const APP_USER_AGENT: &str = concat!(
     include_str!(concat!(env!("OUT_DIR"), "/git_version"))
 );
 
+#[derive(Debug)]
 pub(crate) struct Api {
     api_base: Option<Url>,
     client: reqwest::blocking::Client,
 }
 
-pub(crate) struct RegistryCrateData {
+#[derive(Debug)]
+pub(crate) struct CrateData {
+    pub(crate) owners: Vec<CrateOwner>,
+}
+
+#[derive(Debug)]
+pub(crate) struct ReleaseData {
     pub(crate) release_time: DateTime<Utc>,
     pub(crate) yanked: bool,
     pub(crate) downloads: i32,
-    pub(crate) owners: Vec<CrateOwner>,
 }
 
+#[derive(Debug)]
 pub(crate) struct CrateOwner {
     pub(crate) avatar: String,
     pub(crate) email: String,
@@ -55,7 +62,16 @@ impl Api {
             .ok_or_else(|| err_msg("index is missing an api base url"))
     }
 
-    pub(crate) fn get_crate_data(&self, name: &str, version: &str) -> RegistryCrateData {
+    pub(crate) fn get_crate_data(&self, name: &str) -> CrateData {
+        let owners = self.get_owners(name).unwrap_or_else(|err| {
+            warn!("Failed to get owners for {}: {}", name, err);
+            Vec::new()
+        });
+
+        CrateData { owners }
+    }
+
+    pub(crate) fn get_release_data(&self, name: &str, version: &str) -> ReleaseData {
         let (release_time, yanked, downloads) = self
             .get_release_time_yanked_downloads(name, version)
             .unwrap_or_else(|err| {
@@ -63,16 +79,10 @@ impl Api {
                 (Utc::now(), false, 0)
             });
 
-        let owners = self.get_owners(name).unwrap_or_else(|err| {
-            warn!("Failed to get owners for {}-{}: {}", name, version, err);
-            Vec::new()
-        });
-
-        RegistryCrateData {
+        ReleaseData {
             release_time,
             yanked,
             downloads,
-            owners,
         }
     }
 
diff --git a/src/test/fakes.rs b/src/test/fakes.rs
index 4d30760ed..14e98144e 100644
--- a/src/test/fakes.rs
+++ b/src/test/fakes.rs
@@ -1,6 +1,6 @@
 use super::TestDatabase;
 use crate::docbuilder::BuildResult;
-use crate::index::api::RegistryCrateData;
+use crate::index::api::{CrateData, ReleaseData};
 use crate::storage::Storage;
 use crate::utils::{Dependency, MetadataPackage, Target};
 use chrono::{DateTime, Utc};
@@ -19,7 +19,8 @@ pub(crate) struct FakeRelease<'a> {
     rustdoc_files: Vec<(&'a str, &'a [u8])>,
     doc_targets: Vec<String>,
     default_target: Option<&'a str>,
-    registry_crate_data: RegistryCrateData,
+    registry_crate_data: CrateData,
+    registry_release_data: ReleaseData,
     has_docs: bool,
     has_examples: bool,
     /// This stores the content, while `package.readme` stores the filename
@@ -60,11 +61,11 @@ impl<'a> FakeRelease<'a> {
             rustdoc_files: Vec::new(),
             doc_targets: Vec::new(),
             default_target: None,
-            registry_crate_data: RegistryCrateData {
+            registry_crate_data: CrateData { owners: Vec::new() },
+            registry_release_data: ReleaseData {
                 release_time: Utc::now(),
                 yanked: false,
                 downloads: 0,
-                owners: Vec::new(),
             },
             has_docs: true,
             has_examples: false,
@@ -73,7 +74,7 @@ impl<'a> FakeRelease<'a> {
     }
 
     pub(crate) fn downloads(mut self, downloads: i32) -> Self {
-        self.registry_crate_data.downloads = downloads;
+        self.registry_release_data.downloads = downloads;
         self
     }
 
@@ -83,7 +84,7 @@ impl<'a> FakeRelease<'a> {
     }
 
     pub(crate) fn release_time(mut self, new: DateTime<Utc>) -> Self {
-        self.registry_crate_data.release_time = new;
+        self.registry_release_data.release_time = new;
         self
     }
 
@@ -116,7 +117,7 @@ impl<'a> FakeRelease<'a> {
     }
 
     pub(crate) fn yanked(mut self, new: bool) -> Self {
-        self.registry_crate_data.yanked = new;
+        self.registry_release_data.yanked = new;
         self
     }
 
@@ -249,11 +250,16 @@ impl<'a> FakeRelease<'a> {
             self.default_target.unwrap_or("x86_64-unknown-linux-gnu"),
             source_meta,
             self.doc_targets,
-            &self.registry_crate_data,
+            &self.registry_release_data,
             self.has_docs,
             self.has_examples,
             algs,
         )?;
+        crate::db::update_crate_data_in_database(
+            &db.conn(),
+            &package.name,
+            &self.registry_crate_data,
+        )?;
         crate::db::add_build_into_database(&db.conn(), release_id, &self.build_result)?;
 
         Ok(release_id)

From 087d982ddf4df9f9b1dbfc7f90f424940c5a7b7f Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Tue, 28 Jul 2020 19:00:17 +0200
Subject: [PATCH 2/9] Add test for updating owner details

---
 src/test/fakes.rs        |  7 ++-
 src/web/crate_details.rs | 92 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/src/test/fakes.rs b/src/test/fakes.rs
index 14e98144e..c9ecd9f7a 100644
--- a/src/test/fakes.rs
+++ b/src/test/fakes.rs
@@ -1,6 +1,6 @@
 use super::TestDatabase;
 use crate::docbuilder::BuildResult;
-use crate::index::api::{CrateData, ReleaseData};
+use crate::index::api::{CrateData, CrateOwner, ReleaseData};
 use crate::storage::Storage;
 use crate::utils::{Dependency, MetadataPackage, Target};
 use chrono::{DateTime, Utc};
@@ -167,6 +167,11 @@ impl<'a> FakeRelease<'a> {
         self.source_file("README.md", content.as_bytes())
     }
 
+    pub(crate) fn add_owner(mut self, owner: CrateOwner) -> Self {
+        self.registry_crate_data.owners.push(owner);
+        self
+    }
+
     /// Returns the release_id
     pub(crate) fn create(self) -> Result<i32, Error> {
         use std::collections::HashSet;
diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs
index fc0adad1b..769c00a52 100644
--- a/src/web/crate_details.rs
+++ b/src/web/crate_details.rs
@@ -315,6 +315,7 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult<Response> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::index::api::CrateOwner;
     use crate::test::{wrapper, TestDatabase};
     use failure::Error;
     use kuchiki::traits::TendrilSink;
@@ -610,4 +611,95 @@ mod tests {
             Ok(())
         });
     }
+
+    #[test]
+    fn test_updating_owners() {
+        wrapper(|env| {
+            let db = env.db();
+
+            env.fake_release()
+                .name("foo")
+                .version("0.0.1")
+                .add_owner(CrateOwner {
+                    login: "foobar".into(),
+                    avatar: "https://example.org/foobar".into(),
+                    name: "Foo Bar".into(),
+                    email: "foobar@example.org".into(),
+                })
+                .create()?;
+
+            let details = CrateDetails::new(&db.conn(), "foo", "0.0.1").unwrap();
+            assert_eq!(
+                details.owners,
+                vec![("foobar".into(), "https://example.org/foobar".into())]
+            );
+
+            // Adding a new owner, and changing details on an existing owner
+            env.fake_release()
+                .name("foo")
+                .version("0.0.2")
+                .add_owner(CrateOwner {
+                    login: "foobar".into(),
+                    avatar: "https://example.org/foobarv2".into(),
+                    name: "Foo Bar".into(),
+                    email: "foobar@example.org".into(),
+                })
+                .add_owner(CrateOwner {
+                    login: "barfoo".into(),
+                    avatar: "https://example.org/barfoo".into(),
+                    name: "Bar Foo".into(),
+                    email: "foobar@example.org".into(),
+                })
+                .create()?;
+
+            let details = CrateDetails::new(&db.conn(), "foo", "0.0.1").unwrap();
+            let mut owners = details.owners.clone();
+            owners.sort();
+            assert_eq!(
+                owners,
+                vec![
+                    ("barfoo".into(), "https://example.org/barfoo".into()),
+                    ("foobar".into(), "https://example.org/foobarv2".into())
+                ]
+            );
+
+            // Removing an existing owner
+            env.fake_release()
+                .name("foo")
+                .version("0.0.3")
+                .add_owner(CrateOwner {
+                    login: "barfoo".into(),
+                    avatar: "https://example.org/barfoo".into(),
+                    name: "Bar Foo".into(),
+                    email: "foobar@example.org".into(),
+                })
+                .create()?;
+
+            let details = CrateDetails::new(&db.conn(), "foo", "0.0.1").unwrap();
+            assert_eq!(
+                details.owners,
+                vec![("barfoo".into(), "https://example.org/barfoo".into())]
+            );
+
+            // Changing owner details on another of there crates applies too
+            env.fake_release()
+                .name("bar")
+                .version("0.0.1")
+                .add_owner(CrateOwner {
+                    login: "barfoo".into(),
+                    avatar: "https://example.org/barfoov2".into(),
+                    name: "Bar Foo".into(),
+                    email: "foobar@example.org".into(),
+                })
+                .create()?;
+
+            let details = CrateDetails::new(&db.conn(), "foo", "0.0.1").unwrap();
+            assert_eq!(
+                details.owners,
+                vec![("barfoo".into(), "https://example.org/barfoov2".into())]
+            );
+
+            Ok(())
+        });
+    }
 }

From 48d41b313abb11d14726cea26f7b2e00d2138d0f Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Mon, 29 Jun 2020 14:57:31 +0200
Subject: [PATCH 3/9] Move CRATESFYI_PREFIX handling to Config

---
 src/bin/cratesfyi.rs      |  4 ++-
 src/config.rs             |  9 +++++++
 src/docbuilder/options.rs | 53 +++++----------------------------------
 src/utils/daemon.rs       | 40 +++++++++++------------------
 4 files changed, 33 insertions(+), 73 deletions(-)

diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs
index dff84007e..b2d83413c 100644
--- a/src/bin/cratesfyi.rs
+++ b/src/bin/cratesfyi.rs
@@ -249,6 +249,7 @@ struct Build {
     )]
     prefix: PathBuf,
 
+    /// DEPRECATED
     /// Sets the registry index path, where on disk the registry index will be cloned to
     #[structopt(
         name = "REGISTRY_INDEX_PATH",
@@ -280,9 +281,10 @@ struct Build {
 impl Build {
     pub fn handle_args(self, ctx: Context) -> Result<(), Error> {
         let docbuilder = {
-            let mut doc_options = DocBuilderOptions::from_prefix(self.prefix);
+            let mut doc_options = DocBuilderOptions::new(&*ctx.config()?);
 
             if let Some(registry_index_path) = self.registry_index_path {
+                log::warn!("Use of deprecated cli flag --registry-index-path");
                 doc_options.registry_index_path = registry_index_path;
             }
 
diff --git a/src/config.rs b/src/config.rs
index 0054f353e..503889445 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -1,5 +1,6 @@
 use failure::{bail, format_err, Error, Fail, ResultExt};
 use std::env::VarError;
+use std::path::PathBuf;
 use std::str::FromStr;
 
 #[derive(Debug)]
@@ -7,6 +8,9 @@ pub struct Config {
     // Build params
     pub(crate) build_attempts: u16,
 
+    pub(crate) prefix: PathBuf,
+    pub(crate) registry_index_path: PathBuf,
+
     // Database connection params
     pub(crate) database_url: String,
     pub(crate) max_pool_size: u32,
@@ -23,9 +27,14 @@ pub struct Config {
 
 impl Config {
     pub fn from_env() -> Result<Self, Error> {
+        let prefix: PathBuf = require_env("CRATESFYI_PREFIX")?;
+
         Ok(Self {
             build_attempts: env("DOCSRS_BUILD_ATTEMPTS", 5)?,
 
+            prefix: prefix.clone(),
+            registry_index_path: env("REGISTRY_INDEX_PATH", prefix.join("crates.io-index"))?,
+
             database_url: require_env("CRATESFYI_DATABASE_URL")?,
             max_pool_size: env("DOCSRS_MAX_POOL_SIZE", 90)?,
             min_pool_idle: env("DOCSRS_MIN_POOL_IDLE", 10)?,
diff --git a/src/docbuilder/options.rs b/src/docbuilder/options.rs
index 693b2c929..b0fbe3d10 100644
--- a/src/docbuilder/options.rs
+++ b/src/docbuilder/options.rs
@@ -1,8 +1,7 @@
-use crate::error::Result;
+use crate::{config::Config, error::Result};
 use std::path::PathBuf;
-use std::{env, fmt};
 
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct DocBuilderOptions {
     pub keep_build_directory: bool,
     pub prefix: PathBuf,
@@ -14,15 +13,11 @@ pub struct DocBuilderOptions {
     pub debug: bool,
 }
 
-impl Default for DocBuilderOptions {
-    fn default() -> DocBuilderOptions {
-        let cwd = env::current_dir().unwrap();
-
-        let (prefix, registry_index_path) = generate_paths(cwd);
-
+impl DocBuilderOptions {
+    pub fn new(config: &Config) -> DocBuilderOptions {
         DocBuilderOptions {
-            prefix,
-            registry_index_path,
+            prefix: config.prefix.clone(),
+            registry_index_path: config.registry_index_path.clone(),
 
             keep_build_directory: false,
             skip_if_exists: false,
@@ -32,36 +27,6 @@ impl Default for DocBuilderOptions {
             debug: false,
         }
     }
-}
-
-impl fmt::Debug for DocBuilderOptions {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(
-            f,
-            "DocBuilderOptions {{ \
-                registry_index_path: {:?}, \
-                keep_build_directory: {:?}, skip_if_exists: {:?}, \
-                skip_if_log_exists: {:?}, debug: {:?} }}",
-            self.registry_index_path,
-            self.keep_build_directory,
-            self.skip_if_exists,
-            self.skip_if_log_exists,
-            self.debug
-        )
-    }
-}
-
-impl DocBuilderOptions {
-    /// Creates new DocBuilderOptions from prefix
-    pub fn from_prefix(prefix: PathBuf) -> DocBuilderOptions {
-        let (prefix, registry_index_path) = generate_paths(prefix);
-
-        DocBuilderOptions {
-            prefix,
-            registry_index_path,
-            ..Default::default()
-        }
-    }
 
     pub fn check_paths(&self) -> Result<()> {
         if !self.registry_index_path.exists() {
@@ -74,9 +39,3 @@ impl DocBuilderOptions {
         Ok(())
     }
 }
-
-fn generate_paths(prefix: PathBuf) -> (PathBuf, PathBuf) {
-    let registry_index_path = PathBuf::from(&prefix).join("crates.io-index");
-
-    (prefix, registry_index_path)
-}
diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs
index 1e146c591..b1ba481a7 100644
--- a/src/utils/daemon.rs
+++ b/src/utils/daemon.rs
@@ -11,20 +11,23 @@ use crate::{
 use chrono::{Timelike, Utc};
 use failure::Error;
 use log::{debug, error, info};
-use std::path::PathBuf;
 use std::sync::Arc;
+use std::thread;
 use std::time::Duration;
-use std::{env, thread};
 
-fn start_registry_watcher(pool: Pool, build_queue: Arc<BuildQueue>) -> Result<(), Error> {
+fn start_registry_watcher(
+    opts: DocBuilderOptions,
+    pool: Pool,
+    build_queue: Arc<BuildQueue>,
+) -> Result<(), Error> {
     thread::Builder::new()
         .name("registry index reader".to_string())
         .spawn(move || {
             // space this out to prevent it from clashing against the queue-builder thread on launch
             thread::sleep(Duration::from_secs(30));
             loop {
-                let opts = opts();
-                let mut doc_builder = DocBuilder::new(opts, pool.clone(), build_queue.clone());
+                let mut doc_builder =
+                    DocBuilder::new(opts.clone(), pool.clone(), build_queue.clone());
 
                 if doc_builder.is_locked() {
                     debug!("Lock file exists, skipping checking new crates");
@@ -50,23 +53,14 @@ pub fn start_daemon(
     storage: Arc<Storage>,
     enable_registry_watcher: bool,
 ) -> Result<(), Error> {
-    const CRATE_VARIABLES: &[&str] = &["CRATESFYI_PREFIX"];
-
-    // first check required environment variables
-    for v in CRATE_VARIABLES.iter() {
-        if env::var(v).is_err() {
-            panic!("Environment variable {} not found", v)
-        }
-    }
-
-    let dbopts = opts();
+    let dbopts = DocBuilderOptions::new(&config);
 
     // check paths once
     dbopts.check_paths().unwrap();
 
     if enable_registry_watcher {
         // check new crates every minute
-        start_registry_watcher(db.clone(), build_queue.clone())?;
+        start_registry_watcher(dbopts.clone(), db.clone(), build_queue.clone())?;
     }
 
     // build new crates every minute
@@ -76,8 +70,11 @@ pub fn start_daemon(
     thread::Builder::new()
         .name("build queue reader".to_string())
         .spawn(move || {
-            let doc_builder =
-                DocBuilder::new(opts(), cloned_db.clone(), cloned_build_queue.clone());
+            let doc_builder = DocBuilder::new(
+                dbopts.clone(),
+                cloned_db.clone(),
+                cloned_build_queue.clone(),
+            );
             queue_builder(doc_builder, cloned_db, cloned_build_queue, cloned_storage).unwrap();
         })
         .unwrap();
@@ -131,10 +128,3 @@ where
         })?;
     Ok(())
 }
-
-fn opts() -> DocBuilderOptions {
-    let prefix = PathBuf::from(
-        env::var("CRATESFYI_PREFIX").expect("CRATESFYI_PREFIX environment variable not found"),
-    );
-    DocBuilderOptions::from_prefix(prefix)
-}

From 8fb7ad60237f3203aaae5a953cbe98526159c7d6 Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Sat, 18 Jul 2020 13:16:23 +0200
Subject: [PATCH 4/9] Pass just the paths into DocBuilderOptions

---
 src/bin/cratesfyi.rs      | 4 +++-
 src/config.rs             | 4 ++--
 src/docbuilder/options.rs | 8 ++++----
 src/utils/daemon.rs       | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs
index b2d83413c..2580adaeb 100644
--- a/src/bin/cratesfyi.rs
+++ b/src/bin/cratesfyi.rs
@@ -281,7 +281,9 @@ struct Build {
 impl Build {
     pub fn handle_args(self, ctx: Context) -> Result<(), Error> {
         let docbuilder = {
-            let mut doc_options = DocBuilderOptions::new(&*ctx.config()?);
+            let config = ctx.config()?;
+            let mut doc_options =
+                DocBuilderOptions::new(config.prefix.clone(), config.registry_index_path.clone());
 
             if let Some(registry_index_path) = self.registry_index_path {
                 log::warn!("Use of deprecated cli flag --registry-index-path");
diff --git a/src/config.rs b/src/config.rs
index 503889445..cb7518fcc 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -8,8 +8,8 @@ pub struct Config {
     // Build params
     pub(crate) build_attempts: u16,
 
-    pub(crate) prefix: PathBuf,
-    pub(crate) registry_index_path: PathBuf,
+    pub prefix: PathBuf,
+    pub registry_index_path: PathBuf,
 
     // Database connection params
     pub(crate) database_url: String,
diff --git a/src/docbuilder/options.rs b/src/docbuilder/options.rs
index b0fbe3d10..a9cf399c8 100644
--- a/src/docbuilder/options.rs
+++ b/src/docbuilder/options.rs
@@ -1,4 +1,4 @@
-use crate::{config::Config, error::Result};
+use crate::error::Result;
 use std::path::PathBuf;
 
 #[derive(Clone, Debug)]
@@ -14,10 +14,10 @@ pub struct DocBuilderOptions {
 }
 
 impl DocBuilderOptions {
-    pub fn new(config: &Config) -> DocBuilderOptions {
+    pub fn new(prefix: PathBuf, registry_index_path: PathBuf) -> DocBuilderOptions {
         DocBuilderOptions {
-            prefix: config.prefix.clone(),
-            registry_index_path: config.registry_index_path.clone(),
+            prefix,
+            registry_index_path,
 
             keep_build_directory: false,
             skip_if_exists: false,
diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs
index b1ba481a7..50e9ba759 100644
--- a/src/utils/daemon.rs
+++ b/src/utils/daemon.rs
@@ -53,7 +53,7 @@ pub fn start_daemon(
     storage: Arc<Storage>,
     enable_registry_watcher: bool,
 ) -> Result<(), Error> {
-    let dbopts = DocBuilderOptions::new(&config);
+    let dbopts = DocBuilderOptions::new(config.prefix.clone(), config.registry_index_path.clone());
 
     // check paths once
     dbopts.check_paths().unwrap();

From c8af9184e460bbce5afb6135362c2e14c8148882 Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Sat, 18 Jul 2020 13:18:47 +0200
Subject: [PATCH 5/9] Remove --registry-index-path and --prefix flags

---
 src/bin/cratesfyi.rs | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs
index 2580adaeb..f8006423b 100644
--- a/src/bin/cratesfyi.rs
+++ b/src/bin/cratesfyi.rs
@@ -241,23 +241,6 @@ impl PrioritySubcommand {
 #[derive(Debug, Clone, PartialEq, Eq, StructOpt)]
 #[structopt(rename_all = "kebab-case")]
 struct Build {
-    #[structopt(
-        name = "PREFIX",
-        short = "P",
-        long = "prefix",
-        env = "CRATESFYI_PREFIX"
-    )]
-    prefix: PathBuf,
-
-    /// DEPRECATED
-    /// Sets the registry index path, where on disk the registry index will be cloned to
-    #[structopt(
-        name = "REGISTRY_INDEX_PATH",
-        long = "registry-index-path",
-        alias = "crates-io-index-path"
-    )]
-    registry_index_path: Option<PathBuf>,
-
     /// Skips building documentation if documentation exists
     #[structopt(name = "SKIP_IF_EXISTS", short = "s", long = "skip")]
     skip_if_exists: bool,
@@ -285,11 +268,6 @@ impl Build {
             let mut doc_options =
                 DocBuilderOptions::new(config.prefix.clone(), config.registry_index_path.clone());
 
-            if let Some(registry_index_path) = self.registry_index_path {
-                log::warn!("Use of deprecated cli flag --registry-index-path");
-                doc_options.registry_index_path = registry_index_path;
-            }
-
             doc_options.skip_if_exists = self.skip_if_exists;
             doc_options.skip_if_log_exists = self.skip_if_log_exists;
             doc_options.keep_build_directory = self.keep_build_directory;

From c5be00cdf596eb80cf592a8e29d4787ce87e6482 Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Mon, 27 Jul 2020 20:18:18 +0200
Subject: [PATCH 6/9] Add CLI command to update registry data for a crate

---
 src/bin/cratesfyi.rs  | 17 +++++++++++++++++
 src/db/add_package.rs |  2 +-
 src/db/mod.rs         |  5 ++---
 src/index/api.rs      |  8 ++++----
 src/index/mod.rs      |  6 +++---
 src/lib.rs            |  2 +-
 6 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs
index f8006423b..e3b938ad7 100644
--- a/src/bin/cratesfyi.rs
+++ b/src/bin/cratesfyi.rs
@@ -4,6 +4,7 @@ use std::path::PathBuf;
 use std::sync::Arc;
 
 use cratesfyi::db::{self, add_path_into_database, Pool};
+use cratesfyi::index::Index;
 use cratesfyi::utils::{remove_crate_priority, set_crate_priority};
 use cratesfyi::{
     BuildQueue, Config, DocBuilder, DocBuilderOptions, RustwideBuilder, Server, Storage,
@@ -417,6 +418,12 @@ enum DatabaseSubcommand {
     /// Updates github stats for crates.
     UpdateGithubFields,
 
+    /// Updates info for a crate from the registry's API
+    UpdateCrateRegistryFields {
+        #[structopt(name = "CRATE")]
+        name: String,
+    },
+
     AddDirectory {
         /// Path of file or directory
         #[structopt(name = "DIRECTORY")]
@@ -454,6 +461,16 @@ impl DatabaseSubcommand {
                     .update_all_crates()?;
             }
 
+            Self::UpdateCrateRegistryFields { name } => {
+                let index = Index::new(&ctx.config()?.registry_index_path)?;
+
+                db::update_crate_data_in_database(
+                    &*ctx.conn()?,
+                    &name,
+                    &index.api().get_crate_data(&name),
+                )?;
+            }
+
             Self::AddDirectory { directory, prefix } => {
                 add_path_into_database(&*ctx.storage()?, &prefix, directory)
                     .context("Failed to add directory into database")?;
diff --git a/src/db/add_package.rs b/src/db/add_package.rs
index b85b732ab..43821a65e 100644
--- a/src/db/add_package.rs
+++ b/src/db/add_package.rs
@@ -327,7 +327,7 @@ fn add_authors_into_database(
     Ok(())
 }
 
-pub(crate) fn update_crate_data_in_database(
+pub fn update_crate_data_in_database(
     conn: &Connection,
     name: &str,
     registry_data: &CrateData,
diff --git a/src/db/mod.rs b/src/db/mod.rs
index 8d9772708..9783cea23 100644
--- a/src/db/mod.rs
+++ b/src/db/mod.rs
@@ -1,8 +1,7 @@
 //! Database operations
 
-pub(crate) use self::add_package::{
-    add_build_into_database, add_package_into_database, update_crate_data_in_database,
-};
+pub use self::add_package::update_crate_data_in_database;
+pub(crate) use self::add_package::{add_build_into_database, add_package_into_database};
 pub use self::delete::{delete_crate, delete_version};
 pub use self::file::add_path_into_database;
 pub use self::migrate::migrate;
diff --git a/src/index/api.rs b/src/index/api.rs
index 445b14298..5784e745c 100644
--- a/src/index/api.rs
+++ b/src/index/api.rs
@@ -15,13 +15,13 @@ const APP_USER_AGENT: &str = concat!(
 );
 
 #[derive(Debug)]
-pub(crate) struct Api {
+pub struct Api {
     api_base: Option<Url>,
     client: reqwest::blocking::Client,
 }
 
 #[derive(Debug)]
-pub(crate) struct CrateData {
+pub struct CrateData {
     pub(crate) owners: Vec<CrateOwner>,
 }
 
@@ -33,7 +33,7 @@ pub(crate) struct ReleaseData {
 }
 
 #[derive(Debug)]
-pub(crate) struct CrateOwner {
+pub struct CrateOwner {
     pub(crate) avatar: String,
     pub(crate) email: String,
     pub(crate) login: String,
@@ -62,7 +62,7 @@ impl Api {
             .ok_or_else(|| err_msg("index is missing an api base url"))
     }
 
-    pub(crate) fn get_crate_data(&self, name: &str) -> CrateData {
+    pub fn get_crate_data(&self, name: &str) -> CrateData {
         let owners = self.get_owners(name).unwrap_or_else(|err| {
             warn!("Failed to get owners for {}: {}", name, err);
             Vec::new()
diff --git a/src/index/mod.rs b/src/index/mod.rs
index 0953f3668..acb86a8fc 100644
--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -8,7 +8,7 @@ use failure::ResultExt;
 
 pub(crate) mod api;
 
-pub(crate) struct Index {
+pub struct Index {
     path: PathBuf,
     api: Api,
 }
@@ -39,7 +39,7 @@ fn load_config(repo: &git2::Repository) -> Result<IndexConfig> {
 }
 
 impl Index {
-    pub(crate) fn new(path: impl AsRef<Path>) -> Result<Self> {
+    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
         let path = path.as_ref().to_owned();
         // This initializes the repository, then closes it afterwards to avoid leaking file descriptors.
         // See https://github.com/rust-lang/docs.rs/pull/847
@@ -56,7 +56,7 @@ impl Index {
         Ok(diff)
     }
 
-    pub(crate) fn api(&self) -> &Api {
+    pub fn api(&self) -> &Api {
         &self.api
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index d4276dc9f..d25e79c5a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,7 +15,7 @@ mod config;
 pub mod db;
 mod docbuilder;
 mod error;
-mod index;
+pub mod index;
 pub mod storage;
 #[cfg(test)]
 mod test;

From 9d81de996c00e700719fdf9ba17b4f05e4794b18 Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Fri, 31 Jul 2020 17:03:16 +0200
Subject: [PATCH 7/9] Cleanup inserting owners into database

---
 src/db/add_package.rs | 77 +++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 46 deletions(-)

diff --git a/src/db/add_package.rs b/src/db/add_package.rs
index 43821a65e..6eab47aaf 100644
--- a/src/db/add_package.rs
+++ b/src/db/add_package.rs
@@ -349,20 +349,17 @@ fn update_owners_in_database(
     owners: &[CrateOwner],
     crate_id: i32,
 ) -> Result<()> {
-    let existing_owners: Vec<String> = conn
-        .query(
-            "
-        SELECT login
-        FROM owners
-        INNER JOIN owner_rels
-            ON owner_rels.oid = owners.id
-        WHERE owner_rels.cid = $1
-    ",
-            &[&crate_id],
-        )?
-        .into_iter()
-        .map(|row| row.get(0))
-        .collect();
+    let rows = conn.query(
+        "
+            SELECT login
+            FROM owners
+            INNER JOIN owner_rels
+                ON owner_rels.oid = owners.id
+            WHERE owner_rels.cid = $1
+        ",
+        &[&crate_id],
+    )?;
+    let existing_owners = rows.into_iter().map(|row| -> String { row.get(0) });
 
     for owner in owners {
         debug!("Updating owner data for {}: {:?}", owner.login, owner);
@@ -372,15 +369,15 @@ fn update_owners_in_database(
         let owner_id: i32 = {
             conn.query(
                 "
-                INSERT INTO owners (login, avatar, name, email)
-                VALUES ($1, $2, $3, $4)
-                ON CONFLICT (login) DO UPDATE
-                    SET
-                        avatar = $2,
-                        name = $3,
-                        email = $4
-                RETURNING id
-            ",
+                    INSERT INTO owners (login, avatar, name, email)
+                    VALUES ($1, $2, $3, $4)
+                    ON CONFLICT (login) DO UPDATE
+                        SET
+                            avatar = $2,
+                            name = $3,
+                            email = $4
+                    RETURNING id
+                ",
                 &[&owner.login, &owner.avatar, &owner.name, &owner.email],
             )?
             .get(0)
@@ -388,38 +385,26 @@ fn update_owners_in_database(
         };
 
         // add relationship
-        let updated = conn.query(
-            "INSERT INTO owner_rels (cid, oid) VALUES ($1, $2)",
+        conn.query(
+            "INSERT INTO owner_rels (cid, oid) VALUES ($1, $2) ON CONFLICT DO NOTHING",
             &[&crate_id, &owner_id],
-        );
-
-        match updated {
-            Ok(_) => debug!("Added new owner relationship"),
-            Err(e)
-                if e.as_db().and_then(|e| e.constraint.as_deref())
-                    == Some("owner_rels_cid_oid_key") =>
-            {
-                debug!("Existing owner relationship");
-            }
-            Err(e) => return Err(e.into()),
-        }
+        )?;
     }
 
-    let to_remove = existing_owners
-        .iter()
-        .filter(|login| !owners.iter().any(|owner| &&owner.login == login));
+    let to_remove =
+        existing_owners.filter(|login| !owners.iter().any(|owner| &owner.login == login));
 
     for login in to_remove {
         debug!("Removing owner relationship {}", login);
         // remove relationship
         conn.query(
             "
-            DELETE FROM owner_rels
-            USING owners
-            WHERE owner_rels.cid = $1
-                AND owner_rels.oid = owners.id
-                AND owners.login = $2
-        ",
+                DELETE FROM owner_rels
+                USING owners
+                WHERE owner_rels.cid = $1
+                    AND owner_rels.oid = owners.id
+                    AND owners.login = $2
+            ",
             &[&crate_id, &login],
         )?;
     }

From 0560662eddf4c0e4a6ee2fc2bc3b741182adc286 Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Fri, 31 Jul 2020 17:05:00 +0200
Subject: [PATCH 8/9] Fix test comment

---
 src/web/crate_details.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs
index 769c00a52..cc608bd10 100644
--- a/src/web/crate_details.rs
+++ b/src/web/crate_details.rs
@@ -681,7 +681,7 @@ mod tests {
                 vec![("barfoo".into(), "https://example.org/barfoo".into())]
             );
 
-            // Changing owner details on another of there crates applies too
+            // Changing owner details on another of their crates applies the change to both
             env.fake_release()
                 .name("bar")
                 .version("0.0.1")

From 865c310336536e04659dd66ae3282efc6a2b1416 Mon Sep 17 00:00:00 2001
From: Wim Looman <git@nemo157.com>
Date: Fri, 31 Jul 2020 17:13:45 +0200
Subject: [PATCH 9/9] Move index API failure handling to docbuilder

---
 src/bin/cratesfyi.rs               |  2 +-
 src/docbuilder/rustwide_builder.rs | 21 +++++++++++++-----
 src/index/api.rs                   | 35 +++++++++++++++++-------------
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs
index e3b938ad7..b078390e1 100644
--- a/src/bin/cratesfyi.rs
+++ b/src/bin/cratesfyi.rs
@@ -467,7 +467,7 @@ impl DatabaseSubcommand {
                 db::update_crate_data_in_database(
                     &*ctx.conn()?,
                     &name,
-                    &index.api().get_crate_data(&name),
+                    &index.api().get_crate_data(&name)?,
                 )?;
             }
 
diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs
index e5d7bb86a..4ef38303c 100644
--- a/src/docbuilder/rustwide_builder.rs
+++ b/src/docbuilder/rustwide_builder.rs
@@ -7,6 +7,7 @@ use crate::db::{
 };
 use crate::docbuilder::{crates::crates_from_path, Limits};
 use crate::error::Result;
+use crate::index::api::ReleaseData;
 use crate::storage::CompressionAlgorithms;
 use crate::storage::Storage;
 use crate::utils::{copy_doc_dir, parse_rustc_version, CargoMetadata};
@@ -399,6 +400,15 @@ impl RustwideBuilder {
                 } else {
                     crate::web::metrics::NON_LIBRARY_BUILDS.inc();
                 }
+
+                let release_data = match doc_builder.index.api().get_release_data(name, version) {
+                    Ok(data) => data,
+                    Err(err) => {
+                        warn!("{:#?}", err);
+                        ReleaseData::default()
+                    }
+                };
+
                 let release_id = add_package_into_database(
                     &conn,
                     res.cargo_metadata.root(),
@@ -407,7 +417,7 @@ impl RustwideBuilder {
                     &res.target,
                     files_list,
                     successful_targets,
-                    &doc_builder.index.api().get_release_data(name, version),
+                    &release_data,
                     has_docs,
                     has_examples,
                     algs,
@@ -416,11 +426,10 @@ impl RustwideBuilder {
                 add_build_into_database(&conn, release_id, &res.result)?;
 
                 // Some crates.io crate data is mutable, so we proactively update it during a release
-                update_crate_data_in_database(
-                    &conn,
-                    name,
-                    &doc_builder.index.api().get_crate_data(name),
-                )?;
+                match doc_builder.index.api().get_crate_data(name) {
+                    Ok(crate_data) => update_crate_data_in_database(&conn, name, &crate_data)?,
+                    Err(err) => warn!("{:#?}", err),
+                }
 
                 doc_builder.add_to_cache(name, version);
                 Ok(res)
diff --git a/src/index/api.rs b/src/index/api.rs
index 5784e745c..ce9a3c6c0 100644
--- a/src/index/api.rs
+++ b/src/index/api.rs
@@ -1,6 +1,5 @@
 use chrono::{DateTime, Utc};
-use failure::err_msg;
-use log::warn;
+use failure::{err_msg, ResultExt};
 use reqwest::header::{HeaderValue, ACCEPT, USER_AGENT};
 use semver::Version;
 use serde::Deserialize;
@@ -32,6 +31,16 @@ pub(crate) struct ReleaseData {
     pub(crate) downloads: i32,
 }
 
+impl Default for ReleaseData {
+    fn default() -> ReleaseData {
+        ReleaseData {
+            release_time: Utc::now(),
+            yanked: false,
+            downloads: 0,
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct CrateOwner {
     pub(crate) avatar: String,
@@ -62,28 +71,24 @@ impl Api {
             .ok_or_else(|| err_msg("index is missing an api base url"))
     }
 
-    pub fn get_crate_data(&self, name: &str) -> CrateData {
-        let owners = self.get_owners(name).unwrap_or_else(|err| {
-            warn!("Failed to get owners for {}: {}", name, err);
-            Vec::new()
-        });
+    pub fn get_crate_data(&self, name: &str) -> Result<CrateData> {
+        let owners = self
+            .get_owners(name)
+            .context(format!("Failed to get owners for {}", name))?;
 
-        CrateData { owners }
+        Ok(CrateData { owners })
     }
 
-    pub(crate) fn get_release_data(&self, name: &str, version: &str) -> ReleaseData {
+    pub(crate) fn get_release_data(&self, name: &str, version: &str) -> Result<ReleaseData> {
         let (release_time, yanked, downloads) = self
             .get_release_time_yanked_downloads(name, version)
-            .unwrap_or_else(|err| {
-                warn!("Failed to get crate data for {}-{}: {}", name, version, err);
-                (Utc::now(), false, 0)
-            });
+            .context(format!("Failed to get crate data for {}-{}", name, version))?;
 
-        ReleaseData {
+        Ok(ReleaseData {
             release_time,
             yanked,
             downloads,
-        }
+        })
     }
 
     /// Get release_time, yanked and downloads from the registry's API