mirror of
https://github.com/deps-rs/deps.rs.git
synced 2024-11-21 18:06:30 +00:00
Clone crates.io-index instead of querying it through GitHub's API (#69)
* Clone crates.io-index instead of querying it through GitHub's API * Implement refreshing the crates.io-index * Run the initial index clone before starting the server * Log cloning the crates.io-index * Disambiguate name of ManagedIndex initial clone fn * Log errors with cloning or refreshing the index
This commit is contained in:
parent
b18c621779
commit
84a7d5154b
7 changed files with 117 additions and 51 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1329,6 +1329,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"badge",
|
||||
"cadence",
|
||||
"crates-index",
|
||||
"derive_more",
|
||||
"futures",
|
||||
"hyper",
|
||||
|
|
|
@ -26,6 +26,7 @@ pin-project = "0.4"
|
|||
relative-path = { version = "1.3", features = ["serde"] }
|
||||
route-recognizer = "0.2"
|
||||
rustsec = "0.21"
|
||||
crates-index = "0.15.0"
|
||||
semver = { version = "0.10", features = ["serde"] }
|
||||
reqwest = { version = "0.10", features = ["json"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
|
|
|
@ -7,6 +7,7 @@ use std::{
|
|||
|
||||
use anyhow::{anyhow, Error};
|
||||
use cadence::{MetricSink, NopMetricSink, StatsdClient};
|
||||
use crates_index::Index;
|
||||
use futures::{future::try_join_all, stream, StreamExt};
|
||||
use hyper::service::Service;
|
||||
use once_cell::sync::Lazy;
|
||||
|
@ -27,8 +28,7 @@ use crate::utils::cache::Cache;
|
|||
mod fut;
|
||||
mod machines;
|
||||
|
||||
use self::fut::analyze_dependencies;
|
||||
use self::fut::crawl_manifest;
|
||||
use self::fut::{analyze_dependencies, crawl_manifest};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Engine {
|
||||
|
@ -43,12 +43,12 @@ pub struct Engine {
|
|||
}
|
||||
|
||||
impl Engine {
|
||||
pub fn new(client: reqwest::Client, logger: Logger) -> Engine {
|
||||
pub fn new(client: reqwest::Client, index: Index, logger: Logger) -> Engine {
|
||||
let metrics = StatsdClient::from_sink("engine", NopMetricSink);
|
||||
|
||||
let query_crate = Cache::new(
|
||||
QueryCrate::new(client.clone()),
|
||||
Duration::from_secs(300),
|
||||
QueryCrate::new(index),
|
||||
Duration::from_secs(10),
|
||||
500,
|
||||
logger.clone(),
|
||||
);
|
||||
|
|
|
@ -1,17 +1,18 @@
|
|||
use std::{fmt, str, task::Context, task::Poll};
|
||||
|
||||
use anyhow::Error;
|
||||
use anyhow::{anyhow, Error};
|
||||
use crates_index::{Crate, DependencyKind, Index};
|
||||
use futures::FutureExt as _;
|
||||
use hyper::service::Service;
|
||||
use semver::{Version, VersionReq};
|
||||
use serde::Deserialize;
|
||||
use tokio::task::spawn_blocking;
|
||||
|
||||
use crate::{
|
||||
models::crates::{CrateDep, CrateDeps, CrateName, CratePath, CrateRelease},
|
||||
BoxFuture,
|
||||
};
|
||||
|
||||
const CRATES_INDEX_BASE_URI: &str = "https://raw.githubusercontent.com/rust-lang/crates.io-index";
|
||||
const CRATES_API_BASE_URI: &str = "https://crates.io/api/v1";
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
|
@ -33,27 +34,30 @@ struct RegistryPackage {
|
|||
yanked: bool,
|
||||
}
|
||||
|
||||
fn convert_pkgs(
|
||||
name: &CrateName,
|
||||
packages: Vec<RegistryPackage>,
|
||||
) -> Result<QueryCrateResponse, Error> {
|
||||
let releases = packages
|
||||
.into_iter()
|
||||
fn convert_pkgs(krate: Crate) -> Result<QueryCrateResponse, Error> {
|
||||
let name: CrateName = krate.name().parse()?;
|
||||
|
||||
let releases = krate
|
||||
.versions()
|
||||
.iter()
|
||||
.map(|package| {
|
||||
let mut deps = CrateDeps::default();
|
||||
for dep in package.deps {
|
||||
let name = dep.package.as_deref().unwrap_or(&dep.name).parse()?;
|
||||
match dep.kind.as_deref().unwrap_or("normal") {
|
||||
"normal" => deps.main.insert(name, CrateDep::External(dep.req)),
|
||||
"dev" => deps.dev.insert(name, CrateDep::External(dep.req)),
|
||||
for dep in package.dependencies() {
|
||||
let name = dep.crate_name().parse()?;
|
||||
let req = VersionReq::parse(dep.requirement())?;
|
||||
|
||||
match dep.kind() {
|
||||
DependencyKind::Normal => deps.main.insert(name, CrateDep::External(req)),
|
||||
DependencyKind::Dev => deps.main.insert(name, CrateDep::External(req)),
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
let version = Version::parse(package.version())?;
|
||||
Ok(CrateRelease {
|
||||
name: name.clone(),
|
||||
version: package.vers,
|
||||
version,
|
||||
deps,
|
||||
yanked: package.yanked,
|
||||
yanked: package.is_yanked(),
|
||||
})
|
||||
})
|
||||
.collect::<Result<_, Error>>()?;
|
||||
|
@ -68,40 +72,21 @@ pub struct QueryCrateResponse {
|
|||
|
||||
#[derive(Clone)]
|
||||
pub struct QueryCrate {
|
||||
client: reqwest::Client,
|
||||
index: Index,
|
||||
}
|
||||
|
||||
impl QueryCrate {
|
||||
pub fn new(client: reqwest::Client) -> Self {
|
||||
Self { client }
|
||||
pub fn new(index: Index) -> Self {
|
||||
Self { index }
|
||||
}
|
||||
|
||||
pub async fn query(
|
||||
client: reqwest::Client,
|
||||
crate_name: CrateName,
|
||||
) -> anyhow::Result<QueryCrateResponse> {
|
||||
let lower_name = crate_name.as_ref().to_lowercase();
|
||||
pub async fn query(index: Index, crate_name: CrateName) -> anyhow::Result<QueryCrateResponse> {
|
||||
let crate_name2 = crate_name.clone();
|
||||
let krate = spawn_blocking(move || index.crate_(crate_name2.as_ref()))
|
||||
.await?
|
||||
.ok_or_else(|| anyhow!("crate '{}' not found", crate_name.as_ref()))?;
|
||||
|
||||
let path = match lower_name.len() {
|
||||
1 => format!("1/{}", lower_name),
|
||||
2 => format!("2/{}", lower_name),
|
||||
3 => format!("3/{}/{}", &lower_name[..1], lower_name),
|
||||
_ => format!("{}/{}/{}", &lower_name[0..2], &lower_name[2..4], lower_name),
|
||||
};
|
||||
|
||||
let url = format!("{}/HEAD/{}", CRATES_INDEX_BASE_URI, path);
|
||||
let res = client.get(&url).send().await?.error_for_status()?;
|
||||
|
||||
let string_body = res.text().await?;
|
||||
|
||||
let pkgs = string_body
|
||||
.lines()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(serde_json::from_str)
|
||||
.collect::<Result<_, _>>()?;
|
||||
|
||||
convert_pkgs(&crate_name, pkgs)
|
||||
convert_pkgs(krate)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,8 +106,8 @@ impl Service<CrateName> for QueryCrate {
|
|||
}
|
||||
|
||||
fn call(&mut self, crate_name: CrateName) -> Self::Future {
|
||||
let client = self.client.clone();
|
||||
Self::query(client, crate_name).boxed()
|
||||
let index = self.index.clone();
|
||||
Self::query(index, crate_name).boxed()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
16
src/main.rs
16
src/main.rs
|
@ -28,6 +28,7 @@ mod utils;
|
|||
|
||||
use self::engine::Engine;
|
||||
use self::server::App;
|
||||
use self::utils::index::ManagedIndex;
|
||||
|
||||
/// Future crate's BoxFuture without the explicit lifetime parameter.
|
||||
pub type BoxFuture<T> = Pin<Box<dyn Future<Output = T> + Send>>;
|
||||
|
@ -70,7 +71,20 @@ async fn main() {
|
|||
|
||||
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), port);
|
||||
|
||||
let mut engine = Engine::new(client.clone(), logger.new(o!()));
|
||||
let mut managed_index = ManagedIndex::new(Duration::from_secs(20), logger.clone());
|
||||
if let Err(e) = managed_index.initial_clone().await {
|
||||
error!(
|
||||
logger,
|
||||
"failed running initial clone of the crates.io-index: {}", e
|
||||
);
|
||||
}
|
||||
|
||||
let index = managed_index.index();
|
||||
tokio::spawn(async move {
|
||||
managed_index.refresh_at_interval().await;
|
||||
});
|
||||
|
||||
let mut engine = Engine::new(client.clone(), index, logger.new(o!()));
|
||||
engine.set_metrics(metrics);
|
||||
|
||||
let svc_logger = logger.new(o!());
|
||||
|
|
64
src/utils/index.rs
Normal file
64
src/utils/index.rs
Normal file
|
@ -0,0 +1,64 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Error, Result};
|
||||
use crates_index::Index;
|
||||
use slog::{error, info, Logger};
|
||||
use tokio::task::spawn_blocking;
|
||||
use tokio::time::{self, Interval};
|
||||
|
||||
pub struct ManagedIndex {
|
||||
index: Index,
|
||||
update_interval: Interval,
|
||||
logger: Logger,
|
||||
}
|
||||
|
||||
impl ManagedIndex {
|
||||
pub fn new(update_interval: Duration, logger: Logger) -> Self {
|
||||
// the index path is configurable through the `CARGO_HOME` env variable
|
||||
let index = Index::new_cargo_default();
|
||||
let update_interval = time::interval(update_interval);
|
||||
Self {
|
||||
index,
|
||||
update_interval,
|
||||
logger,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index(&self) -> Index {
|
||||
self.index.clone()
|
||||
}
|
||||
|
||||
pub async fn initial_clone(&mut self) -> Result<()> {
|
||||
let index = self.index();
|
||||
let logger = self.logger.clone();
|
||||
|
||||
spawn_blocking(move || {
|
||||
if !index.exists() {
|
||||
info!(logger, "Cloning crates.io-index");
|
||||
index.retrieve()?;
|
||||
}
|
||||
Ok::<_, Error>(())
|
||||
})
|
||||
.await??;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn refresh_at_interval(&mut self) {
|
||||
loop {
|
||||
if let Err(e) = self.refresh().await {
|
||||
error!(
|
||||
self.logger,
|
||||
"failed refreshing the crates.io-index, the operation will be retried: {}", e
|
||||
);
|
||||
}
|
||||
self.update_interval.tick().await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn refresh(&self) -> Result<()> {
|
||||
let index = self.index();
|
||||
|
||||
spawn_blocking(move || index.retrieve_or_update()).await??;
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1 +1,2 @@
|
|||
pub mod cache;
|
||||
pub mod index;
|
||||
|
|
Loading…
Reference in a new issue