aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xsrc/cargo-meson.sh11
-rw-r--r--src/csum_file.rs81
-rw-r--r--src/hash.rs335
-rw-r--r--src/lib.rs3
-rw-r--r--src/loose.rs912
-rw-r--r--src/meson.build3
-rw-r--r--src/varint.rs15
7 files changed, 1358 insertions, 2 deletions
diff --git a/src/cargo-meson.sh b/src/cargo-meson.sh
index 99400986d9..3998db0435 100755
--- a/src/cargo-meson.sh
+++ b/src/cargo-meson.sh
@@ -26,7 +26,14 @@ then
exit $RET
fi
-if ! cmp "$BUILD_DIR/$BUILD_TYPE/libgitcore.a" "$BUILD_DIR/libgitcore.a" >/dev/null 2>&1
+case "$(cargo -vV | sed -s 's/^host: \(.*\)$/\1/')" in
+ *-windows-*)
+ LIBNAME=gitcore.lib;;
+ *)
+ LIBNAME=libgitcore.a;;
+esac
+
+if ! cmp "$BUILD_DIR/$BUILD_TYPE/$LIBNAME" "$BUILD_DIR/libgitcore.a" >/dev/null 2>&1
then
- cp "$BUILD_DIR/$BUILD_TYPE/libgitcore.a" "$BUILD_DIR/libgitcore.a"
+ cp "$BUILD_DIR/$BUILD_TYPE/$LIBNAME" "$BUILD_DIR/libgitcore.a"
fi
diff --git a/src/csum_file.rs b/src/csum_file.rs
new file mode 100644
index 0000000000..7f2c6c4fcb
--- /dev/null
+++ b/src/csum_file.rs
@@ -0,0 +1,81 @@
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation: version 2 of the License, dated June 1991.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, see <https://www.gnu.org/licenses/>.
+
+use crate::hash::{HashAlgorithm, GIT_MAX_RAWSZ};
+use std::ffi::CStr;
+use std::io::{self, Write};
+use std::os::raw::c_void;
+
+/// A writer that can write files identified by their hash or containing a trailing hash.
+pub struct HashFile {
+ ptr: *mut c_void,
+ algo: HashAlgorithm,
+}
+
+impl HashFile {
+ /// Create a new HashFile.
+ ///
+ /// The hash used will be `algo`, its name should be in `name`, and an open file descriptor
+ /// pointing to that file should be in `fd`.
+ pub fn new(algo: HashAlgorithm, fd: i32, name: &CStr) -> HashFile {
+ HashFile {
+ ptr: unsafe { c::hashfd(algo.hash_algo_ptr(), fd, name.as_ptr()) },
+ algo,
+ }
+ }
+
+ /// Finalize this HashFile instance.
+ ///
+ /// Returns the hash computed over the data.
+ pub fn finalize(self, component: u32, flags: u32) -> Vec<u8> {
+ let mut result = vec![0u8; GIT_MAX_RAWSZ];
+ unsafe { c::finalize_hashfile(self.ptr, result.as_mut_ptr(), component, flags) };
+ result.truncate(self.algo.raw_len());
+ result
+ }
+}
+
+impl Write for HashFile {
+ fn write(&mut self, data: &[u8]) -> io::Result<usize> {
+ for chunk in data.chunks(u32::MAX as usize) {
+ unsafe {
+ c::hashwrite(
+ self.ptr,
+ chunk.as_ptr() as *const c_void,
+ chunk.len() as u32,
+ )
+ };
+ }
+ Ok(data.len())
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ unsafe { c::hashflush(self.ptr) };
+ Ok(())
+ }
+}
+
+pub mod c {
+ use std::os::raw::{c_char, c_int, c_void};
+
+ extern "C" {
+ pub fn hashfd(algop: *const c_void, fd: i32, name: *const c_char) -> *mut c_void;
+ pub fn hashwrite(f: *mut c_void, data: *const c_void, len: u32);
+ pub fn hashflush(f: *mut c_void);
+ pub fn finalize_hashfile(
+ f: *mut c_void,
+ data: *mut u8,
+ component: u32,
+ flags: u32,
+ ) -> c_int;
+ }
+}
diff --git a/src/hash.rs b/src/hash.rs
new file mode 100644
index 0000000000..8798a50aef
--- /dev/null
+++ b/src/hash.rs
@@ -0,0 +1,335 @@
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation: version 2 of the License, dated June 1991.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, see <https://www.gnu.org/licenses/>.
+
+use std::io::{self, Write};
+use std::os::raw::c_void;
+
+pub const GIT_MAX_RAWSZ: usize = 32;
+
+/// A binary object ID.
+#[repr(C)]
+#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct ObjectID {
+ pub hash: [u8; GIT_MAX_RAWSZ],
+ pub algo: u32,
+}
+
+#[allow(dead_code)]
+impl ObjectID {
+ pub fn as_slice(&self) -> &[u8] {
+ match HashAlgorithm::from_u32(self.algo) {
+ Some(algo) => &self.hash[0..algo.raw_len()],
+ None => &self.hash,
+ }
+ }
+
+ pub fn as_mut_slice(&mut self) -> &mut [u8] {
+ match HashAlgorithm::from_u32(self.algo) {
+ Some(algo) => &mut self.hash[0..algo.raw_len()],
+ None => &mut self.hash,
+ }
+ }
+}
+
+pub struct Hasher {
+ algo: HashAlgorithm,
+ safe: bool,
+ ctx: *mut c_void,
+}
+
+impl Hasher {
+ /// Create a new safe hasher.
+ pub fn new(algo: HashAlgorithm) -> Hasher {
+ let ctx = unsafe { c::git_hash_alloc() };
+ unsafe { c::git_hash_init(ctx, algo.hash_algo_ptr()) };
+ Hasher {
+ algo,
+ safe: true,
+ ctx,
+ }
+ }
+
+ /// Return whether this is a safe hasher.
+ pub fn is_safe(&self) -> bool {
+ self.safe
+ }
+
+ /// Update the hasher with the specified data.
+ pub fn update(&mut self, data: &[u8]) {
+ unsafe { c::git_hash_update(self.ctx, data.as_ptr() as *const c_void, data.len()) };
+ }
+
+ /// Return an object ID, consuming the hasher.
+ pub fn into_oid(self) -> ObjectID {
+ let mut oid = ObjectID {
+ hash: [0u8; 32],
+ algo: self.algo as u32,
+ };
+ unsafe { c::git_hash_final_oid(&mut oid as *mut ObjectID as *mut c_void, self.ctx) };
+ oid
+ }
+
+ /// Return a hash as a `Vec`, consuming the hasher.
+ pub fn into_vec(self) -> Vec<u8> {
+ let mut v = vec![0u8; self.algo.raw_len()];
+ unsafe { c::git_hash_final(v.as_mut_ptr(), self.ctx) };
+ v
+ }
+}
+
+impl Write for Hasher {
+ fn write(&mut self, data: &[u8]) -> io::Result<usize> {
+ self.update(data);
+ Ok(data.len())
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ Ok(())
+ }
+}
+
+impl Clone for Hasher {
+ fn clone(&self) -> Hasher {
+ let ctx = unsafe { c::git_hash_alloc() };
+ unsafe { c::git_hash_clone(ctx, self.ctx) };
+ Hasher {
+ algo: self.algo,
+ safe: self.safe,
+ ctx,
+ }
+ }
+}
+
+impl Drop for Hasher {
+ fn drop(&mut self) {
+ unsafe { c::git_hash_free(self.ctx) };
+ }
+}
+
+/// A hash algorithm,
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub enum HashAlgorithm {
+ SHA1 = 1,
+ SHA256 = 2,
+}
+
+#[allow(dead_code)]
+impl HashAlgorithm {
+ const SHA1_NULL_OID: ObjectID = ObjectID {
+ hash: [0u8; 32],
+ algo: Self::SHA1 as u32,
+ };
+ const SHA256_NULL_OID: ObjectID = ObjectID {
+ hash: [0u8; 32],
+ algo: Self::SHA256 as u32,
+ };
+
+ const SHA1_EMPTY_TREE: ObjectID = ObjectID {
+ hash: *b"\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ algo: Self::SHA1 as u32,
+ };
+ const SHA256_EMPTY_TREE: ObjectID = ObjectID {
+ hash: *b"\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc\x53\x21",
+ algo: Self::SHA256 as u32,
+ };
+
+ const SHA1_EMPTY_BLOB: ObjectID = ObjectID {
+ hash: *b"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ algo: Self::SHA1 as u32,
+ };
+ const SHA256_EMPTY_BLOB: ObjectID = ObjectID {
+ hash: *b"\x47\x3a\x0f\x4c\x3b\xe8\xa9\x36\x81\xa2\x67\xe3\xb1\xe9\xa7\xdc\xda\x11\x85\x43\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72\x18\x13",
+ algo: Self::SHA256 as u32,
+ };
+
+ /// Return a hash algorithm based on the internal integer ID used by Git.
+ ///
+ /// Returns `None` if the algorithm doesn't indicate a valid algorithm.
+ pub const fn from_u32(algo: u32) -> Option<HashAlgorithm> {
+ match algo {
+ 1 => Some(HashAlgorithm::SHA1),
+ 2 => Some(HashAlgorithm::SHA256),
+ _ => None,
+ }
+ }
+
+ /// Return a hash algorithm based on the internal integer ID used by Git.
+ ///
+ /// Returns `None` if the algorithm doesn't indicate a valid algorithm.
+ pub const fn from_format_id(algo: u32) -> Option<HashAlgorithm> {
+ match algo {
+ 0x73686131 => Some(HashAlgorithm::SHA1),
+ 0x73323536 => Some(HashAlgorithm::SHA256),
+ _ => None,
+ }
+ }
+
+ /// The name of this hash algorithm as a string suitable for the configuration file.
+ pub const fn name(self) -> &'static str {
+ match self {
+ HashAlgorithm::SHA1 => "sha1",
+ HashAlgorithm::SHA256 => "sha256",
+ }
+ }
+
+ /// The format ID of this algorithm for binary formats.
+ ///
+ /// Note that when writing this to a data format, it should be written in big-endian format
+ /// explicitly.
+ pub const fn format_id(self) -> u32 {
+ match self {
+ HashAlgorithm::SHA1 => 0x73686131,
+ HashAlgorithm::SHA256 => 0x73323536,
+ }
+ }
+
+ /// The length of binary object IDs in this algorithm in bytes.
+ pub const fn raw_len(self) -> usize {
+ match self {
+ HashAlgorithm::SHA1 => 20,
+ HashAlgorithm::SHA256 => 32,
+ }
+ }
+
+ /// The length of object IDs in this algorithm in hexadecimal characters.
+ pub const fn hex_len(self) -> usize {
+ self.raw_len() * 2
+ }
+
+ /// The number of bytes which is processed by one iteration of this algorithm's compression
+ /// function.
+ pub const fn block_size(self) -> usize {
+ match self {
+ HashAlgorithm::SHA1 => 64,
+ HashAlgorithm::SHA256 => 64,
+ }
+ }
+
+ /// The object ID representing the empty blob.
+ pub const fn empty_blob(self) -> &'static ObjectID {
+ match self {
+ HashAlgorithm::SHA1 => &Self::SHA1_EMPTY_BLOB,
+ HashAlgorithm::SHA256 => &Self::SHA256_EMPTY_BLOB,
+ }
+ }
+
+ /// The object ID representing the empty tree.
+ pub const fn empty_tree(self) -> &'static ObjectID {
+ match self {
+ HashAlgorithm::SHA1 => &Self::SHA1_EMPTY_TREE,
+ HashAlgorithm::SHA256 => &Self::SHA256_EMPTY_TREE,
+ }
+ }
+
+ /// The object ID which is all zeros.
+ pub const fn null_oid(self) -> &'static ObjectID {
+ match self {
+ HashAlgorithm::SHA1 => &Self::SHA1_NULL_OID,
+ HashAlgorithm::SHA256 => &Self::SHA256_NULL_OID,
+ }
+ }
+
+ /// A pointer to the C `struct git_hash_algo` for interoperability with C.
+ pub fn hash_algo_ptr(self) -> *const c_void {
+ unsafe { c::hash_algo_ptr_by_offset(self as u32) }
+ }
+
+ /// Create a hasher for this algorithm.
+ pub fn hasher(self) -> Hasher {
+ Hasher::new(self)
+ }
+}
+
+pub mod c {
+ use std::os::raw::c_void;
+
+ extern "C" {
+ pub fn hash_algo_ptr_by_offset(n: u32) -> *const c_void;
+ pub fn unsafe_hash_algo(algop: *const c_void) -> *const c_void;
+ pub fn git_hash_alloc() -> *mut c_void;
+ pub fn git_hash_free(ctx: *mut c_void);
+ pub fn git_hash_init(dst: *mut c_void, algop: *const c_void);
+ pub fn git_hash_clone(dst: *mut c_void, src: *const c_void);
+ pub fn git_hash_update(ctx: *mut c_void, inp: *const c_void, len: usize);
+ pub fn git_hash_final(hash: *mut u8, ctx: *mut c_void);
+ pub fn git_hash_final_oid(hash: *mut c_void, ctx: *mut c_void);
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{HashAlgorithm, ObjectID};
+ use std::io::Write;
+
+ fn all_algos() -> &'static [HashAlgorithm] {
+ &[HashAlgorithm::SHA1, HashAlgorithm::SHA256]
+ }
+
+ #[test]
+ fn format_id_round_trips() {
+ for algo in all_algos() {
+ assert_eq!(
+ *algo,
+ HashAlgorithm::from_format_id(algo.format_id()).unwrap()
+ );
+ }
+ }
+
+ #[test]
+ fn offset_round_trips() {
+ for algo in all_algos() {
+ assert_eq!(*algo, HashAlgorithm::from_u32(*algo as u32).unwrap());
+ }
+ }
+
+ #[test]
+ fn slices_have_correct_length() {
+ for algo in all_algos() {
+ for oid in [algo.null_oid(), algo.empty_blob(), algo.empty_tree()] {
+ assert_eq!(oid.as_slice().len(), algo.raw_len());
+ }
+ }
+ }
+
+ #[test]
+ fn hasher_works_correctly() {
+ for algo in all_algos() {
+ let tests: &[(&[u8], &ObjectID)] = &[
+ (b"blob 0\0", algo.empty_blob()),
+ (b"tree 0\0", algo.empty_tree()),
+ ];
+ for (data, oid) in tests {
+ let mut h = algo.hasher();
+ assert_eq!(h.is_safe(), true);
+ // Test that this works incrementally.
+ h.update(&data[0..2]);
+ h.update(&data[2..]);
+
+ let h2 = h.clone();
+
+ let actual_oid = h.into_oid();
+ assert_eq!(**oid, actual_oid);
+
+ let v = h2.into_vec();
+ assert_eq!((*oid).as_slice(), &v);
+
+ let mut h = algo.hasher();
+ h.write_all(&data[0..2]).unwrap();
+ h.write_all(&data[2..]).unwrap();
+
+ let actual_oid = h.into_oid();
+ assert_eq!(**oid, actual_oid);
+ }
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 9da70d8b57..0c598298b1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1 +1,4 @@
+pub mod csum_file;
+pub mod hash;
+pub mod loose;
pub mod varint;
diff --git a/src/loose.rs b/src/loose.rs
new file mode 100644
index 0000000000..a4e7d2fa48
--- /dev/null
+++ b/src/loose.rs
@@ -0,0 +1,912 @@
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation: version 2 of the License, dated June 1991.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, see <https://www.gnu.org/licenses/>.
+
+use crate::hash::{HashAlgorithm, ObjectID, GIT_MAX_RAWSZ};
+use std::collections::BTreeMap;
+use std::convert::TryInto;
+use std::io::{self, Write};
+
+/// The type of object stored in the map.
+///
+/// If this value is `Reserved`, then it is never written to disk and is used primarily to store
+/// certain hard-coded objects, like the empty tree, empty blob, or null object ID.
+///
+/// If this value is `LooseObject`, then this represents a loose object. `Shallow` represents a
+/// shallow commit, its parent, or its tree. `Submodule` represents a submodule commit.
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
+pub enum MapType {
+ Reserved = 0,
+ LooseObject = 1,
+ Shallow = 2,
+ Submodule = 3,
+}
+
+impl MapType {
+ pub fn from_u32(n: u32) -> Option<MapType> {
+ match n {
+ 0 => Some(Self::Reserved),
+ 1 => Some(Self::LooseObject),
+ 2 => Some(Self::Shallow),
+ 3 => Some(Self::Submodule),
+ _ => None,
+ }
+ }
+}
+
+/// The value of an object stored in a `LooseObjectMemoryMap`.
+///
+/// This keeps the object ID to which the key is mapped and its kind together.
+struct MappedObject {
+ oid: ObjectID,
+ kind: MapType,
+}
+
+/// Memory storage for a loose object.
+struct LooseObjectMemoryMap {
+ to_compat: BTreeMap<ObjectID, MappedObject>,
+ to_storage: BTreeMap<ObjectID, MappedObject>,
+ compat: HashAlgorithm,
+ storage: HashAlgorithm,
+}
+
+impl LooseObjectMemoryMap {
+ /// Create a new `LooseObjectMemoryMap`.
+ ///
+ /// The storage and compatibility `HashAlgorithm` instances are used to store the object IDs in
+ /// the correct map.
+ fn new(storage: HashAlgorithm, compat: HashAlgorithm) -> LooseObjectMemoryMap {
+ LooseObjectMemoryMap {
+ to_compat: BTreeMap::new(),
+ to_storage: BTreeMap::new(),
+ compat,
+ storage,
+ }
+ }
+
+ fn len(&self) -> usize {
+ self.to_compat.len()
+ }
+
+ /// Write this map to an interface implementing `std::io::Write`.
+ fn write<W: Write>(&self, wrtr: W) -> io::Result<()> {
+ const VERSION_NUMBER: u32 = 1;
+ const NUM_OBJECT_FORMATS: u32 = 2;
+ const PADDING: [u8; 4] = [0u8; 4];
+
+ let mut wrtr = wrtr;
+ let header_size: u32 = 4 + 4 + 4 + 4 + 4 + (4 + 4 + 8) * 2 + 8;
+
+ wrtr.write_all(b"LMAP")?;
+ wrtr.write_all(&VERSION_NUMBER.to_be_bytes())?;
+ wrtr.write_all(&header_size.to_be_bytes())?;
+ wrtr.write_all(&(self.to_compat.len() as u32).to_be_bytes())?;
+ wrtr.write_all(&NUM_OBJECT_FORMATS.to_be_bytes())?;
+
+ let storage_short_len = self.find_short_name_len(&self.to_compat, self.storage);
+ let compat_short_len = self.find_short_name_len(&self.to_storage, self.compat);
+
+ let storage_npadding = Self::required_nul_padding(self.to_compat.len(), storage_short_len);
+ let compat_npadding = Self::required_nul_padding(self.to_compat.len(), compat_short_len);
+
+ let mut offset: u64 = header_size as u64;
+
+ for (algo, len, npadding) in &[
+ (self.storage, storage_short_len, storage_npadding),
+ (self.compat, compat_short_len, compat_npadding),
+ ] {
+ wrtr.write_all(&algo.format_id().to_be_bytes())?;
+ wrtr.write_all(&(*len as u32).to_be_bytes())?;
+
+ offset += *npadding;
+ wrtr.write_all(&offset.to_be_bytes())?;
+
+ offset += self.to_compat.len() as u64 * (*len as u64 + algo.raw_len() as u64 + 4);
+ }
+
+ wrtr.write_all(&offset.to_be_bytes())?;
+
+ let order_map: BTreeMap<&ObjectID, usize> = self
+ .to_compat
+ .keys()
+ .enumerate()
+ .map(|(i, oid)| (oid, i))
+ .collect();
+
+ wrtr.write_all(&PADDING[0..storage_npadding as usize])?;
+ for oid in self.to_compat.keys() {
+ wrtr.write_all(&oid.as_slice()[0..storage_short_len])?;
+ }
+ for oid in self.to_compat.keys() {
+ wrtr.write_all(oid.as_slice())?;
+ }
+ for meta in self.to_compat.values() {
+ wrtr.write_all(&(meta.kind as u32).to_be_bytes())?;
+ }
+
+ wrtr.write_all(&PADDING[0..compat_npadding as usize])?;
+ for oid in self.to_storage.keys() {
+ wrtr.write_all(&oid.as_slice()[0..compat_short_len])?;
+ }
+ for meta in self.to_compat.values() {
+ wrtr.write_all(meta.oid.as_slice())?;
+ }
+ for meta in self.to_storage.values() {
+ wrtr.write_all(&(order_map[&meta.oid] as u32).to_be_bytes())?;
+ }
+
+ Ok(())
+ }
+
+ fn required_nul_padding(nitems: usize, short_len: usize) -> u64 {
+ let shortened_table_len = nitems as u64 * short_len as u64;
+ let misalignment = shortened_table_len & 3;
+ // If the value is 0, return 0; otherwise, return the difference from 4.
+ (4 - misalignment) & 3
+ }
+
+ fn last_matching_offset(a: &ObjectID, b: &ObjectID, algop: HashAlgorithm) -> usize {
+ for i in 0..=algop.raw_len() {
+ if a.hash[i] != b.hash[i] {
+ return i;
+ }
+ }
+ algop.raw_len()
+ }
+
+ fn find_short_name_len(
+ &self,
+ map: &BTreeMap<ObjectID, MappedObject>,
+ algop: HashAlgorithm,
+ ) -> usize {
+ if map.len() <= 1 {
+ return 1;
+ }
+ let mut len = 1;
+ let mut iter = map.keys();
+ let mut cur = match iter.next() {
+ Some(cur) => cur,
+ None => return len,
+ };
+ for item in iter {
+ let offset = Self::last_matching_offset(cur, item, algop);
+ if offset >= len {
+ len = offset + 1;
+ }
+ cur = item;
+ }
+ if len > algop.raw_len() {
+ algop.raw_len()
+ } else {
+ len
+ }
+ }
+}
+
+struct ObjectFormatData {
+ data_off: usize,
+ shortened_len: usize,
+ full_off: usize,
+ mapping_off: Option<usize>,
+}
+
+pub struct MmapedLooseObjectMapIter<'a> {
+ offset: usize,
+ algos: Vec<HashAlgorithm>,
+ source: &'a MmapedLooseObjectMap<'a>,
+}
+
+impl<'a> Iterator for MmapedLooseObjectMapIter<'a> {
+ type Item = Vec<ObjectID>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.offset >= self.source.nitems {
+ return None;
+ }
+ let offset = self.offset;
+ self.offset += 1;
+ let v: Vec<ObjectID> = self
+ .algos
+ .iter()
+ .cloned()
+ .filter_map(|algo| self.source.oid_from_offset(offset, algo))
+ .collect();
+ if v.len() != self.algos.len() {
+ return None;
+ }
+ Some(v)
+ }
+}
+
+#[allow(dead_code)]
+pub struct MmapedLooseObjectMap<'a> {
+ memory: &'a [u8],
+ nitems: usize,
+ meta_off: usize,
+ obj_formats: BTreeMap<HashAlgorithm, ObjectFormatData>,
+ main_algo: HashAlgorithm,
+}
+
+#[derive(Debug)]
+#[allow(dead_code)]
+enum MmapedParseError {
+ HeaderTooSmall,
+ InvalidSignature,
+ InvalidVersion,
+ UnknownAlgorithm,
+ OffsetTooLarge,
+ TooFewObjectFormats,
+ UnalignedData,
+ InvalidTrailerOffset,
+}
+
+#[allow(dead_code)]
+impl<'a> MmapedLooseObjectMap<'a> {
+ fn new(
+ slice: &'a [u8],
+ hash_algo: HashAlgorithm,
+ ) -> Result<MmapedLooseObjectMap<'a>, MmapedParseError> {
+ let object_format_header_size = 4 + 4 + 8;
+ let trailer_offset_size = 8;
+ let header_size: usize =
+ 4 + 4 + 4 + 4 + 4 + object_format_header_size * 2 + trailer_offset_size;
+ if slice.len() < header_size {
+ return Err(MmapedParseError::HeaderTooSmall);
+ }
+ if slice[0..4] != *b"LMAP" {
+ return Err(MmapedParseError::InvalidSignature);
+ }
+ if Self::u32_at_offset(slice, 4) != 1 {
+ return Err(MmapedParseError::InvalidVersion);
+ }
+ let _ = Self::u32_at_offset(slice, 8) as usize;
+ let nitems = Self::u32_at_offset(slice, 12) as usize;
+ let nobj_formats = Self::u32_at_offset(slice, 16) as usize;
+ if nobj_formats < 2 {
+ return Err(MmapedParseError::TooFewObjectFormats);
+ }
+ let mut offset = 20;
+ let mut meta_off = None;
+ let mut data = BTreeMap::new();
+ for i in 0..nobj_formats {
+ if offset + object_format_header_size + trailer_offset_size > slice.len() {
+ return Err(MmapedParseError::HeaderTooSmall);
+ }
+ let format_id = Self::u32_at_offset(slice, offset);
+ let shortened_len = Self::u32_at_offset(slice, offset + 4) as usize;
+ let data_off = Self::u64_at_offset(slice, offset + 8);
+
+ let algo = HashAlgorithm::from_format_id(format_id)
+ .ok_or(MmapedParseError::UnknownAlgorithm)?;
+ let data_off: usize = data_off
+ .try_into()
+ .map_err(|_| MmapedParseError::OffsetTooLarge)?;
+
+ // Every object format must have these entries.
+ let shortened_table_len = shortened_len
+ .checked_mul(nitems)
+ .ok_or(MmapedParseError::OffsetTooLarge)?;
+ let full_off = data_off
+ .checked_add(shortened_table_len)
+ .ok_or(MmapedParseError::OffsetTooLarge)?;
+ Self::verify_aligned(full_off)?;
+ Self::verify_valid(slice, full_off as u64)?;
+
+ let full_length = algo
+ .raw_len()
+ .checked_mul(nitems)
+ .ok_or(MmapedParseError::OffsetTooLarge)?;
+ let off = full_length
+ .checked_add(full_off)
+ .ok_or(MmapedParseError::OffsetTooLarge)?;
+ Self::verify_aligned(off)?;
+ Self::verify_valid(slice, off as u64)?;
+
+ // This is for the metadata for the first object format and for the order mapping for
+ // other object formats.
+ let meta_size = nitems
+ .checked_mul(4)
+ .ok_or(MmapedParseError::OffsetTooLarge)?;
+ let meta_end = off
+ .checked_add(meta_size)
+ .ok_or(MmapedParseError::OffsetTooLarge)?;
+ Self::verify_valid(slice, meta_end as u64)?;
+
+ let mut mapping_off = None;
+ if i == 0 {
+ meta_off = Some(off);
+ } else {
+ mapping_off = Some(off);
+ }
+
+ data.insert(
+ algo,
+ ObjectFormatData {
+ data_off,
+ shortened_len,
+ full_off,
+ mapping_off,
+ },
+ );
+ offset += object_format_header_size;
+ }
+ let trailer = Self::u64_at_offset(slice, offset);
+ Self::verify_aligned(trailer as usize)?;
+ Self::verify_valid(slice, trailer)?;
+ let end = trailer
+ .checked_add(hash_algo.raw_len() as u64)
+ .ok_or(MmapedParseError::OffsetTooLarge)?;
+ if end != slice.len() as u64 {
+ return Err(MmapedParseError::InvalidTrailerOffset);
+ }
+ match meta_off {
+ Some(meta_off) => Ok(MmapedLooseObjectMap {
+ memory: slice,
+ nitems,
+ meta_off,
+ obj_formats: data,
+ main_algo: hash_algo,
+ }),
+ None => Err(MmapedParseError::TooFewObjectFormats),
+ }
+ }
+
+ fn iter(&self) -> MmapedLooseObjectMapIter<'_> {
+ let mut algos = Vec::with_capacity(self.obj_formats.len());
+ algos.push(self.main_algo);
+ for algo in self.obj_formats.keys().cloned() {
+ if algo != self.main_algo {
+ algos.push(algo);
+ }
+ }
+ MmapedLooseObjectMapIter {
+ offset: 0,
+ algos,
+ source: self,
+ }
+ }
+
+ /// Treats `sl` as if it were a set of slices of `wanted.len()` bytes, and searches for
+ /// `wanted` within it.
+ ///
+ /// If found, returns the offset of the subslice in `sl`.
+ ///
+ /// ```
+ /// let sl = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+ ///
+ /// assert_eq!(MmapedLooseObjectMap::binary_search_slice(sl, &[2, 3]), Some(1));
+ /// assert_eq!(MmapedLooseObjectMap::binary_search_slice(sl, &[6, 7]), Some(4));
+ /// assert_eq!(MmapedLooseObjectMap::binary_search_slice(sl, &[1, 2]), None);
+ /// assert_eq!(MmapedLooseObjectMap::binary_search_slice(sl, &[10, 20]), None);
+ /// ```
+ fn binary_search_slice(sl: &[u8], wanted: &[u8]) -> Option<usize> {
+ let len = wanted.len();
+ let res = sl.binary_search_by(|item| {
+ // We would like element_offset, but that is currently nightly only. Instead, do a
+ // pointer subtraction to find the index.
+ let index = unsafe { (item as *const u8).offset_from(sl.as_ptr()) } as usize;
+ // Now we have the index of this object. Round it down to the nearest full-sized
+ // chunk to find the actual offset where this starts.
+ let index = index - (index % len);
+ // Compute the comparison of that value instead, which will provide the expected
+ // result.
+ sl[index..index + wanted.len()].cmp(wanted)
+ });
+ res.ok().map(|offset| offset / len)
+ }
+
+ /// Look up `oid` in the map in order to convert it to `algo`.
+ ///
+ /// If this object is in the map, return the offset in the table for the main algorithm.
+ fn look_up_object(&self, oid: &ObjectID) -> Option<usize> {
+ let oid_algo = HashAlgorithm::from_u32(oid.algo)?;
+ let params = self.obj_formats.get(&oid_algo)?;
+ let short_table =
+ &self.memory[params.data_off..params.data_off + (params.shortened_len * self.nitems)];
+ let index =
+ Self::binary_search_slice(short_table, &oid.as_slice()[0..params.shortened_len])?;
+ match params.mapping_off {
+ Some(from_off) => {
+ // oid is in a compatibility algorithm. Find the mapping index.
+ let mapped = Self::u32_at_offset(self.memory, from_off + index * 4) as usize;
+ if mapped >= self.nitems {
+ return None;
+ }
+ let oid_offset = params.full_off + mapped * oid_algo.raw_len();
+ if self.memory[oid_offset..oid_offset + oid_algo.raw_len()] != *oid.as_slice() {
+ return None;
+ }
+ Some(mapped)
+ }
+ None => {
+ // oid is in the main algorithm. Find the object ID in the main map to confirm
+ // it's correct.
+ let oid_offset = params.full_off + index * oid_algo.raw_len();
+ if self.memory[oid_offset..oid_offset + oid_algo.raw_len()] != *oid.as_slice() {
+ return None;
+ }
+ Some(index)
+ }
+ }
+ }
+
+ #[allow(dead_code)]
+ fn map_object(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<MappedObject> {
+ let main = self.look_up_object(oid)?;
+ let meta = MapType::from_u32(Self::u32_at_offset(self.memory, self.meta_off + (main * 4)))?;
+ Some(MappedObject {
+ oid: self.oid_from_offset(main, algo)?,
+ kind: meta,
+ })
+ }
+
+ fn map_oid(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<ObjectID> {
+ if algo as u32 == oid.algo {
+ return Some(oid.clone());
+ }
+
+ let main = self.look_up_object(oid)?;
+ self.oid_from_offset(main, algo)
+ }
+
+ fn oid_from_offset(&self, offset: usize, algo: HashAlgorithm) -> Option<ObjectID> {
+ let aparams = self.obj_formats.get(&algo)?;
+
+ let mut hash = [0u8; GIT_MAX_RAWSZ];
+ let len = algo.raw_len();
+ let oid_off = aparams.full_off + (offset * len);
+ hash[0..len].copy_from_slice(&self.memory[oid_off..oid_off + len]);
+ Some(ObjectID {
+ hash,
+ algo: algo as u32,
+ })
+ }
+
+ fn u32_at_offset(slice: &[u8], offset: usize) -> u32 {
+ u32::from_be_bytes(slice[offset..offset + 4].try_into().unwrap())
+ }
+
+ fn u64_at_offset(slice: &[u8], offset: usize) -> u64 {
+ u64::from_be_bytes(slice[offset..offset + 8].try_into().unwrap())
+ }
+
+ fn verify_aligned(offset: usize) -> Result<(), MmapedParseError> {
+ if (offset & 3) != 0 {
+ return Err(MmapedParseError::UnalignedData);
+ }
+ Ok(())
+ }
+
+ fn verify_valid(slice: &[u8], offset: u64) -> Result<(), MmapedParseError> {
+ if offset >= slice.len() as u64 {
+ return Err(MmapedParseError::OffsetTooLarge);
+ }
+ Ok(())
+ }
+}
+
+/// A map for loose and other non-packed object IDs that maps between a storage and compatibility
+/// mapping.
+///
+/// In addition to the in-memory option, there is an optional batched storage, which can be used to
+/// write objects to disk in an efficient way.
+pub struct LooseObjectMap {
+ mem: LooseObjectMemoryMap,
+ batch: Option<LooseObjectMemoryMap>,
+}
+
+impl LooseObjectMap {
+ /// Create a new `LooseObjectMap` with the given hash algorithms.
+ ///
+ /// This initializes the memory map to automatically map the empty tree, empty blob, and null
+ /// object ID.
+ pub fn new(storage: HashAlgorithm, compat: HashAlgorithm) -> LooseObjectMap {
+ let mut map = LooseObjectMemoryMap::new(storage, compat);
+ for (main, compat) in &[
+ (storage.empty_tree(), compat.empty_tree()),
+ (storage.empty_blob(), compat.empty_blob()),
+ (storage.null_oid(), compat.null_oid()),
+ ] {
+ map.to_storage.insert(
+ (*compat).clone(),
+ MappedObject {
+ oid: (*main).clone(),
+ kind: MapType::Reserved,
+ },
+ );
+ map.to_compat.insert(
+ (*main).clone(),
+ MappedObject {
+ oid: (*compat).clone(),
+ kind: MapType::Reserved,
+ },
+ );
+ }
+ LooseObjectMap {
+ mem: map,
+ batch: None,
+ }
+ }
+
+ pub fn hash_algo(&self) -> HashAlgorithm {
+ self.mem.storage
+ }
+
+ /// Start a batch for efficient writing.
+ ///
+ /// If there is already a batch started, this does nothing and the existing batch is retained.
+ pub fn start_batch(&mut self) {
+ if self.batch.is_none() {
+ self.batch = Some(LooseObjectMemoryMap::new(self.mem.storage, self.mem.compat));
+ }
+ }
+
+ pub fn batch_len(&self) -> Option<usize> {
+ self.batch.as_ref().map(|b| b.len())
+ }
+
+ /// If a batch exists, write it to the writer.
+ pub fn finish_batch<W: Write>(&mut self, w: W) -> io::Result<()> {
+ if let Some(txn) = self.batch.take() {
+ txn.write(w)?;
+ }
+ Ok(())
+ }
+
+ /// If a batch exists, write it to the writer.
+ pub fn abort_batch(&mut self) {
+ self.batch = None;
+ }
+
+ /// Return whether there is a batch already started.
+ ///
+ /// If you just want a batch to exist and don't care whether one has already been started, you
+ /// may simply call `start_batch` unconditionally.
+ pub fn has_batch(&self) -> bool {
+ self.batch.is_some()
+ }
+
+ /// Insert an object into the map.
+ ///
+ /// If `write` is true and there is a batch started, write the object into the batch as well as
+ /// into the memory map.
+ pub fn insert(&mut self, oid1: &ObjectID, oid2: &ObjectID, kind: MapType, write: bool) {
+ let (compat_oid, storage_oid) =
+ if HashAlgorithm::from_u32(oid1.algo) == Some(self.mem.compat) {
+ (oid1, oid2)
+ } else {
+ (oid2, oid1)
+ };
+ Self::insert_into(&mut self.mem, storage_oid, compat_oid, kind);
+ if write {
+ if let Some(ref mut batch) = self.batch {
+ Self::insert_into(batch, storage_oid, compat_oid, kind);
+ }
+ }
+ }
+
+ fn insert_into(
+ map: &mut LooseObjectMemoryMap,
+ storage: &ObjectID,
+ compat: &ObjectID,
+ kind: MapType,
+ ) {
+ map.to_compat.insert(
+ storage.clone(),
+ MappedObject {
+ oid: compat.clone(),
+ kind,
+ },
+ );
+ map.to_storage.insert(
+ compat.clone(),
+ MappedObject {
+ oid: storage.clone(),
+ kind,
+ },
+ );
+ }
+
+ #[allow(dead_code)]
+ fn map_object(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<&MappedObject> {
+ let map = if algo == self.mem.storage {
+ &self.mem.to_storage
+ } else {
+ &self.mem.to_compat
+ };
+ map.get(oid)
+ }
+
+ #[allow(dead_code)]
+ fn map_oid<'a, 'b: 'a>(
+ &'b self,
+ oid: &'a ObjectID,
+ algo: HashAlgorithm,
+ ) -> Option<&'a ObjectID> {
+ if algo as u32 == oid.algo {
+ return Some(oid);
+ }
+ let entry = self.map_object(oid, algo);
+ entry.map(|obj| &obj.oid)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{LooseObjectMap, LooseObjectMemoryMap, MapType, MmapedLooseObjectMap};
+ use crate::hash::{HashAlgorithm, Hasher, ObjectID};
+ use std::convert::TryInto;
+ use std::io::{self, Cursor, Write};
+
+ struct TrailingWriter {
+ curs: Cursor<Vec<u8>>,
+ hasher: Hasher,
+ }
+
+ impl TrailingWriter {
+ fn new() -> TrailingWriter {
+ TrailingWriter {
+ curs: Cursor::new(Vec::new()),
+ hasher: Hasher::new(HashAlgorithm::SHA256),
+ }
+ }
+
+ fn finalize(mut self) -> Vec<u8> {
+ let _ = self.hasher.flush();
+ let mut v = self.curs.into_inner();
+ v.extend(self.hasher.into_vec());
+ v
+ }
+ }
+
+ impl Write for TrailingWriter {
+ fn write(&mut self, data: &[u8]) -> io::Result<usize> {
+ self.hasher.write_all(data)?;
+ self.curs.write_all(data)?;
+ Ok(data.len())
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ self.hasher.flush()?;
+ self.curs.flush()?;
+ Ok(())
+ }
+ }
+
+ fn sha1_oid(b: &[u8]) -> ObjectID {
+ assert_eq!(b.len(), 20);
+ let mut data = [0u8; 32];
+ data[0..20].copy_from_slice(b);
+ ObjectID {
+ hash: data,
+ algo: HashAlgorithm::SHA1 as u32,
+ }
+ }
+
+ fn sha256_oid(b: &[u8]) -> ObjectID {
+ assert_eq!(b.len(), 32);
+ ObjectID {
+ hash: b.try_into().unwrap(),
+ algo: HashAlgorithm::SHA256 as u32,
+ }
+ }
+
+ fn test_entries() -> &'static [(&'static str, &'static [u8], &'static [u8], MapType, bool)] {
+ // These are all example blobs containing the content in the first argument.
+ &[
+ ("abc", b"\xf2\xba\x8f\x84\xab\x5c\x1b\xce\x84\xa7\xb4\x41\xcb\x19\x59\xcf\xc7\x09\x3b\x7f", b"\xc1\xcf\x6e\x46\x50\x77\x93\x0e\x88\xdc\x51\x36\x64\x1d\x40\x2f\x72\xa2\x29\xdd\xd9\x96\xf6\x27\xd6\x0e\x96\x39\xea\xba\x35\xa6", MapType::LooseObject, false),
+ ("def", b"\x0c\x00\x38\x32\xe7\xbf\xa9\xca\x8b\x5c\x20\x35\xc9\xbd\x68\x4a\x5f\x26\x23\xbc", b"\x8a\x90\x17\x26\x48\x4d\xb0\xf2\x27\x9f\x30\x8d\x58\x96\xd9\x6b\xf6\x3a\xd6\xde\x95\x7c\xa3\x8a\xdc\x33\x61\x68\x03\x6e\xf6\x63", MapType::Shallow, true),
+ ("ghi", b"\x45\xa8\x2e\x29\x5c\x52\x47\x31\x14\xc5\x7c\x18\xf4\xf5\x23\x68\xdf\x2a\x3c\xfd", b"\x6e\x47\x4c\x74\xf5\xd7\x78\x14\xc7\xf7\xf0\x7c\x37\x80\x07\x90\x53\x42\xaf\x42\x81\xe6\x86\x8d\x33\x46\x45\x4b\xb8\x63\xab\xc3", MapType::Submodule, false),
+ ("jkl", b"\x45\x32\x8c\x36\xff\x2e\x9b\x9b\x4e\x59\x2c\x84\x7d\x3f\x9a\x7f\xd9\xb3\xe7\x16", b"\xc3\xee\xf7\x54\xa2\x1e\xc6\x9d\x43\x75\xbe\x6f\x18\x47\x89\xa8\x11\x6f\xd9\x66\xfc\x67\xdc\x31\xd2\x11\x15\x42\xc8\xd5\xa0\xaf", MapType::LooseObject, true),
+ ]
+ }
+
+ fn test_map(write_all: bool) -> Box<LooseObjectMap> {
+ let mut map = Box::new(LooseObjectMap::new(
+ HashAlgorithm::SHA256,
+ HashAlgorithm::SHA1,
+ ));
+
+ map.start_batch();
+
+ for (_blob_content, sha1, sha256, kind, swap) in test_entries() {
+ let s256 = sha256_oid(sha256);
+ let s1 = sha1_oid(sha1);
+ let write = write_all || (*kind as u32 & 2) == 0;
+ if *swap {
+ // Insert the item into the batch arbitrarily based on the type. This tests that
+ // we can specify either order and we'll do the right thing.
+ map.insert(&s256, &s1, *kind, write);
+ } else {
+ map.insert(&s1, &s256, *kind, write);
+ }
+ }
+
+ map
+ }
+
+ #[test]
+ fn can_read_and_write_format() {
+ for full in &[true, false] {
+ let mut map = test_map(*full);
+ let mut wrtr = TrailingWriter::new();
+ map.finish_batch(&mut wrtr).unwrap();
+
+ assert_eq!(map.has_batch(), false);
+
+ let data = wrtr.finalize();
+ MmapedLooseObjectMap::new(&data, HashAlgorithm::SHA256).unwrap();
+ }
+ }
+
+ #[test]
+ fn looks_up_from_mmaped() {
+ let mut map = test_map(true);
+ let mut wrtr = TrailingWriter::new();
+ map.finish_batch(&mut wrtr).unwrap();
+
+ assert_eq!(map.has_batch(), false);
+
+ let data = wrtr.finalize();
+ let entries = test_entries();
+ let map = MmapedLooseObjectMap::new(&data, HashAlgorithm::SHA256).unwrap();
+
+ for (_, sha1, sha256, kind, _) in entries {
+ let s256 = sha256_oid(sha256);
+ let s1 = sha1_oid(sha1);
+
+ let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap();
+ assert_eq!(res.oid, s1);
+ assert_eq!(res.kind, *kind);
+ let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap();
+ assert_eq!(res, s1);
+
+ let res = map.map_object(&s256, HashAlgorithm::SHA256).unwrap();
+ assert_eq!(res.oid, s256);
+ assert_eq!(res.kind, *kind);
+ let res = map.map_oid(&s256, HashAlgorithm::SHA256).unwrap();
+ assert_eq!(res, s256);
+
+ let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap();
+ assert_eq!(res.oid, s256);
+ assert_eq!(res.kind, *kind);
+ let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap();
+ assert_eq!(res, s256);
+
+ let res = map.map_object(&s1, HashAlgorithm::SHA1).unwrap();
+ assert_eq!(res.oid, s1);
+ assert_eq!(res.kind, *kind);
+ let res = map.map_oid(&s1, HashAlgorithm::SHA1).unwrap();
+ assert_eq!(res, s1);
+ }
+
+ for octet in &[0x00u8, 0x6d, 0x6e, 0x8a, 0xff] {
+ let missing_oid = ObjectID {
+ hash: [*octet; 32],
+ algo: HashAlgorithm::SHA256 as u32,
+ };
+
+ assert!(map.map_object(&missing_oid, HashAlgorithm::SHA1).is_none());
+ assert!(map.map_oid(&missing_oid, HashAlgorithm::SHA1).is_none());
+
+ assert_eq!(
+ map.map_oid(&missing_oid, HashAlgorithm::SHA256).unwrap(),
+ missing_oid
+ );
+ }
+ }
+
+ #[test]
+ fn binary_searches_slices_correctly() {
+ let sl = &[
+ 0, 1, 2, 15, 14, 13, 18, 10, 2, 20, 20, 20, 21, 21, 0, 21, 21, 1, 21, 21, 21, 21, 21,
+ 22, 22, 23, 24,
+ ];
+
+ let expected: &[(&[u8], Option<usize>)] = &[
+ (&[0, 1, 2], Some(0)),
+ (&[15, 14, 13], Some(1)),
+ (&[18, 10, 2], Some(2)),
+ (&[20, 20, 20], Some(3)),
+ (&[21, 21, 0], Some(4)),
+ (&[21, 21, 1], Some(5)),
+ (&[21, 21, 21], Some(6)),
+ (&[21, 21, 22], Some(7)),
+ (&[22, 23, 24], Some(8)),
+ (&[2, 15, 14], None),
+ (&[0, 21, 21], None),
+ (&[21, 21, 23], None),
+ (&[22, 22, 23], None),
+ (&[0xff, 0xff, 0xff], None),
+ (&[0, 0, 0], None),
+ ];
+
+ for (wanted, value) in expected {
+ assert_eq!(
+ MmapedLooseObjectMap::binary_search_slice(sl, wanted),
+ *value
+ );
+ }
+ }
+
+ #[test]
+ fn looks_up_oid_correctly() {
+ let map = test_map(false);
+ let entries = test_entries();
+
+ let s256 = sha256_oid(entries[0].2);
+ let s1 = sha1_oid(entries[0].1);
+
+ let missing_oid = ObjectID {
+ hash: [0xffu8; 32],
+ algo: HashAlgorithm::SHA256 as u32,
+ };
+
+ let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap();
+ assert_eq!(res.oid, s1);
+ assert_eq!(res.kind, MapType::LooseObject);
+ let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap();
+ assert_eq!(*res, s1);
+
+ let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap();
+ assert_eq!(res.oid, s256);
+ assert_eq!(res.kind, MapType::LooseObject);
+ let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap();
+ assert_eq!(*res, s256);
+
+ assert!(map.map_object(&missing_oid, HashAlgorithm::SHA1).is_none());
+ assert!(map.map_oid(&missing_oid, HashAlgorithm::SHA1).is_none());
+
+ assert_eq!(
+ *map.map_oid(&missing_oid, HashAlgorithm::SHA256).unwrap(),
+ missing_oid
+ );
+ }
+
+ #[test]
+ fn looks_up_known_oids_correctly() {
+ let map = test_map(false);
+
+ let funcs: &[&dyn Fn(HashAlgorithm) -> &'static ObjectID] = &[
+ &|h: HashAlgorithm| h.empty_tree(),
+ &|h: HashAlgorithm| h.empty_blob(),
+ &|h: HashAlgorithm| h.null_oid(),
+ ];
+
+ for f in funcs {
+ let s256 = f(HashAlgorithm::SHA256);
+ let s1 = f(HashAlgorithm::SHA1);
+
+ let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap();
+ assert_eq!(res.oid, *s1);
+ assert_eq!(res.kind, MapType::Reserved);
+ let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap();
+ assert_eq!(*res, *s1);
+
+ let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap();
+ assert_eq!(res.oid, *s256);
+ assert_eq!(res.kind, MapType::Reserved);
+ let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap();
+ assert_eq!(*res, *s256);
+ }
+ }
+
+ #[test]
+ fn nul_padding() {
+ assert_eq!(LooseObjectMemoryMap::required_nul_padding(1, 1), 3);
+ assert_eq!(LooseObjectMemoryMap::required_nul_padding(2, 1), 2);
+ assert_eq!(LooseObjectMemoryMap::required_nul_padding(3, 1), 1);
+ assert_eq!(LooseObjectMemoryMap::required_nul_padding(2, 2), 0);
+
+ assert_eq!(LooseObjectMemoryMap::required_nul_padding(39, 3), 3);
+ }
+}
diff --git a/src/meson.build b/src/meson.build
index 25b9ad5a14..45739957b4 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -1,5 +1,8 @@
libgit_rs_sources = [
+ 'csum_file.rs',
+ 'hash.rs',
'lib.rs',
+ 'loose.rs',
'varint.rs',
]
diff --git a/src/varint.rs b/src/varint.rs
index 6e610bdd8e..06492dfc5e 100644
--- a/src/varint.rs
+++ b/src/varint.rs
@@ -1,3 +1,10 @@
+/// Decode the variable-length integer stored in `bufp` and return the decoded value.
+///
+/// Returns 0 in case the decoded integer would overflow u64::MAX.
+///
+/// # Safety
+///
+/// The buffer must be NUL-terminated to ensure safety.
#[no_mangle]
pub unsafe extern "C" fn decode_varint(bufp: *mut *const u8) -> u64 {
let mut buf = *bufp;
@@ -22,6 +29,14 @@ pub unsafe extern "C" fn decode_varint(bufp: *mut *const u8) -> u64 {
val
}
+/// Encode `value` into `buf` as a variable-length integer unless `buf` is null.
+///
+/// Returns the number of bytes written, or, if `buf` is null, the number of bytes that would be
+/// written to encode the integer.
+///
+/// # Safety
+///
+/// `buf` must either be null or point to at least 16 bytes of memory.
#[no_mangle]
pub unsafe extern "C" fn encode_varint(value: u64, buf: *mut u8) -> u8 {
let mut varint: [u8; 16] = [0; 16];