Simple tool for automatic file management

ft(filters/magic): support for MIME filtering and magic offsets

hauleth.dev cc25bdc6 7a1a7003

verified
Changed files
+68 -7
src
+39
Cargo.lock
···
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
[[package]]
+
name = "byteorder"
+
version = "1.5.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+
[[package]]
name = "bytes"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
+
]
+
+
[[package]]
+
name = "cfb"
+
version = "0.7.3"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f"
+
dependencies = [
+
"byteorder",
+
"fnv",
+
"uuid",
]
[[package]]
···
]
[[package]]
+
name = "fnv"
+
version = "1.0.7"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+
[[package]]
name = "form_urlencoded"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
[[package]]
+
name = "infer"
+
version = "0.15.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "cb33622da908807a06f9513c19b3c1ad50fab3e4137d82a78107d502075aa199"
+
dependencies = [
+
"cfb",
+
]
+
+
[[package]]
name = "inventory"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
"clap",
"color-eyre",
"futures",
+
"infer",
"libc",
"regex",
"serde",
···
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
+
+
[[package]]
+
name = "uuid"
+
version = "1.6.1"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
[[package]]
name = "valuable"
+1
Cargo.toml
···
clap = { version = "4.4.16", features = ["derive"] }
color-eyre = "0.6.2"
futures = "0.3.30"
+
infer = { version = "0.15.0", default-features = false, features = ["alloc", "cfb"] }
libc = "0.2.152"
regex = "1.10.2"
serde = { version = "1.0.195", features = ["derive"] }
+28 -7
src/filters.rs
···
use async_trait::async_trait;
use serde::Deserialize;
-
use tokio::io::{self, AsyncReadExt};
use tokio::fs;
+
use tokio::io::{self, AsyncReadExt, AsyncSeekExt};
use futures::prelude::*;
···
#[serde(rename_all = "snake_case")]
pub enum Magic {
Mime(String),
-
Bytes(Box<[u8]>),
+
Magic {
+
bytes: Box<[u8]>,
+
#[serde(default)]
+
offset: u64,
+
},
}
-
async fn read_first_bytes(n: usize, path: &Path) -> io::Result<Box<[u8]>> {
+
async fn read_first_bytes(n: usize, path: &Path, offset: u64) -> io::Result<Box<[u8]>> {
+
use std::io::SeekFrom;
+
let mut file = fs::File::open(path).await?;
let mut buf = vec![0; n];
+
file.seek(SeekFrom::Start(offset)).await?;
file.read_exact(&mut buf).await?;
Ok(buf.into())
}
+
async fn guess_mime(path: &Path) -> Option<infer::Type> {
+
let mut file = fs::File::open(path).await.ok()?;
+
+
let mut buf = vec![0; 8192];
+
+
let len = file.read(&mut buf).await.ok()?;
+
+
infer::get(&buf[0..len])
+
}
+
#[typetag::deserialize(name = "content_type")]
#[async_trait]
impl Filter for Magic {
async fn matches(&self, file: &Path) -> bool {
match *self {
-
Magic::Bytes(ref bytes) => {
-
read_first_bytes(bytes.len(), file).await.map(|read| read == *bytes).unwrap_or(false)
-
},
-
Magic::Mime(_) => unimplemented!()
+
Magic::Magic { ref bytes, offset } => read_first_bytes(bytes.len(), file, offset)
+
.await
+
.map(|read| read == *bytes)
+
.unwrap_or(false),
+
Magic::Mime(ref mime_type) => guess_mime(file)
+
.await
+
.map(|typ| typ.mime_type() == mime_type)
+
.unwrap_or(false),
}
}
}