aboutsummaryrefslogtreecommitdiffstats
path: root/src/hash.rs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/hash.rs335
1 files changed, 335 insertions, 0 deletions
diff --git a/src/hash.rs b/src/hash.rs
new file mode 100644
index 0000000000..8798a50aef
--- /dev/null
+++ b/src/hash.rs
@@ -0,0 +1,335 @@
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation: version 2 of the License, dated June 1991.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, see <https://www.gnu.org/licenses/>.
+
+use std::io::{self, Write};
+use std::os::raw::c_void;
+
+pub const GIT_MAX_RAWSZ: usize = 32;
+
+/// A binary object ID.
+#[repr(C)]
+#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct ObjectID {
+ pub hash: [u8; GIT_MAX_RAWSZ],
+ pub algo: u32,
+}
+
+#[allow(dead_code)]
+impl ObjectID {
+ pub fn as_slice(&self) -> &[u8] {
+ match HashAlgorithm::from_u32(self.algo) {
+ Some(algo) => &self.hash[0..algo.raw_len()],
+ None => &self.hash,
+ }
+ }
+
+ pub fn as_mut_slice(&mut self) -> &mut [u8] {
+ match HashAlgorithm::from_u32(self.algo) {
+ Some(algo) => &mut self.hash[0..algo.raw_len()],
+ None => &mut self.hash,
+ }
+ }
+}
+
+pub struct Hasher {
+ algo: HashAlgorithm,
+ safe: bool,
+ ctx: *mut c_void,
+}
+
+impl Hasher {
+ /// Create a new safe hasher.
+ pub fn new(algo: HashAlgorithm) -> Hasher {
+ let ctx = unsafe { c::git_hash_alloc() };
+ unsafe { c::git_hash_init(ctx, algo.hash_algo_ptr()) };
+ Hasher {
+ algo,
+ safe: true,
+ ctx,
+ }
+ }
+
+ /// Return whether this is a safe hasher.
+ pub fn is_safe(&self) -> bool {
+ self.safe
+ }
+
+ /// Update the hasher with the specified data.
+ pub fn update(&mut self, data: &[u8]) {
+ unsafe { c::git_hash_update(self.ctx, data.as_ptr() as *const c_void, data.len()) };
+ }
+
+ /// Return an object ID, consuming the hasher.
+ pub fn into_oid(self) -> ObjectID {
+ let mut oid = ObjectID {
+ hash: [0u8; 32],
+ algo: self.algo as u32,
+ };
+ unsafe { c::git_hash_final_oid(&mut oid as *mut ObjectID as *mut c_void, self.ctx) };
+ oid
+ }
+
+ /// Return a hash as a `Vec`, consuming the hasher.
+ pub fn into_vec(self) -> Vec<u8> {
+ let mut v = vec![0u8; self.algo.raw_len()];
+ unsafe { c::git_hash_final(v.as_mut_ptr(), self.ctx) };
+ v
+ }
+}
+
+impl Write for Hasher {
+ fn write(&mut self, data: &[u8]) -> io::Result<usize> {
+ self.update(data);
+ Ok(data.len())
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ Ok(())
+ }
+}
+
+impl Clone for Hasher {
+ fn clone(&self) -> Hasher {
+ let ctx = unsafe { c::git_hash_alloc() };
+ unsafe { c::git_hash_clone(ctx, self.ctx) };
+ Hasher {
+ algo: self.algo,
+ safe: self.safe,
+ ctx,
+ }
+ }
+}
+
+impl Drop for Hasher {
+ fn drop(&mut self) {
+ unsafe { c::git_hash_free(self.ctx) };
+ }
+}
+
+/// A hash algorithm,
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub enum HashAlgorithm {
+ SHA1 = 1,
+ SHA256 = 2,
+}
+
+#[allow(dead_code)]
+impl HashAlgorithm {
+ const SHA1_NULL_OID: ObjectID = ObjectID {
+ hash: [0u8; 32],
+ algo: Self::SHA1 as u32,
+ };
+ const SHA256_NULL_OID: ObjectID = ObjectID {
+ hash: [0u8; 32],
+ algo: Self::SHA256 as u32,
+ };
+
+ const SHA1_EMPTY_TREE: ObjectID = ObjectID {
+ hash: *b"\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ algo: Self::SHA1 as u32,
+ };
+ const SHA256_EMPTY_TREE: ObjectID = ObjectID {
+ hash: *b"\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc\x53\x21",
+ algo: Self::SHA256 as u32,
+ };
+
+ const SHA1_EMPTY_BLOB: ObjectID = ObjectID {
+ hash: *b"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ algo: Self::SHA1 as u32,
+ };
+ const SHA256_EMPTY_BLOB: ObjectID = ObjectID {
+ hash: *b"\x47\x3a\x0f\x4c\x3b\xe8\xa9\x36\x81\xa2\x67\xe3\xb1\xe9\xa7\xdc\xda\x11\x85\x43\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72\x18\x13",
+ algo: Self::SHA256 as u32,
+ };
+
+ /// Return a hash algorithm based on the internal integer ID used by Git.
+ ///
+ /// Returns `None` if the algorithm doesn't indicate a valid algorithm.
+ pub const fn from_u32(algo: u32) -> Option<HashAlgorithm> {
+ match algo {
+ 1 => Some(HashAlgorithm::SHA1),
+ 2 => Some(HashAlgorithm::SHA256),
+ _ => None,
+ }
+ }
+
+ /// Return a hash algorithm based on the internal integer ID used by Git.
+ ///
+ /// Returns `None` if the algorithm doesn't indicate a valid algorithm.
+ pub const fn from_format_id(algo: u32) -> Option<HashAlgorithm> {
+ match algo {
+ 0x73686131 => Some(HashAlgorithm::SHA1),
+ 0x73323536 => Some(HashAlgorithm::SHA256),
+ _ => None,
+ }
+ }
+
+ /// The name of this hash algorithm as a string suitable for the configuration file.
+ pub const fn name(self) -> &'static str {
+ match self {
+ HashAlgorithm::SHA1 => "sha1",
+ HashAlgorithm::SHA256 => "sha256",
+ }
+ }
+
+ /// The format ID of this algorithm for binary formats.
+ ///
+ /// Note that when writing this to a data format, it should be written in big-endian format
+ /// explicitly.
+ pub const fn format_id(self) -> u32 {
+ match self {
+ HashAlgorithm::SHA1 => 0x73686131,
+ HashAlgorithm::SHA256 => 0x73323536,
+ }
+ }
+
+ /// The length of binary object IDs in this algorithm in bytes.
+ pub const fn raw_len(self) -> usize {
+ match self {
+ HashAlgorithm::SHA1 => 20,
+ HashAlgorithm::SHA256 => 32,
+ }
+ }
+
+ /// The length of object IDs in this algorithm in hexadecimal characters.
+ pub const fn hex_len(self) -> usize {
+ self.raw_len() * 2
+ }
+
+ /// The number of bytes which is processed by one iteration of this algorithm's compression
+ /// function.
+ pub const fn block_size(self) -> usize {
+ match self {
+ HashAlgorithm::SHA1 => 64,
+ HashAlgorithm::SHA256 => 64,
+ }
+ }
+
+ /// The object ID representing the empty blob.
+ pub const fn empty_blob(self) -> &'static ObjectID {
+ match self {
+ HashAlgorithm::SHA1 => &Self::SHA1_EMPTY_BLOB,
+ HashAlgorithm::SHA256 => &Self::SHA256_EMPTY_BLOB,
+ }
+ }
+
+ /// The object ID representing the empty tree.
+ pub const fn empty_tree(self) -> &'static ObjectID {
+ match self {
+ HashAlgorithm::SHA1 => &Self::SHA1_EMPTY_TREE,
+ HashAlgorithm::SHA256 => &Self::SHA256_EMPTY_TREE,
+ }
+ }
+
+ /// The object ID which is all zeros.
+ pub const fn null_oid(self) -> &'static ObjectID {
+ match self {
+ HashAlgorithm::SHA1 => &Self::SHA1_NULL_OID,
+ HashAlgorithm::SHA256 => &Self::SHA256_NULL_OID,
+ }
+ }
+
+ /// A pointer to the C `struct git_hash_algo` for interoperability with C.
+ pub fn hash_algo_ptr(self) -> *const c_void {
+ unsafe { c::hash_algo_ptr_by_offset(self as u32) }
+ }
+
+ /// Create a hasher for this algorithm.
+ pub fn hasher(self) -> Hasher {
+ Hasher::new(self)
+ }
+}
+
+pub mod c {
+ use std::os::raw::c_void;
+
+ extern "C" {
+ pub fn hash_algo_ptr_by_offset(n: u32) -> *const c_void;
+ pub fn unsafe_hash_algo(algop: *const c_void) -> *const c_void;
+ pub fn git_hash_alloc() -> *mut c_void;
+ pub fn git_hash_free(ctx: *mut c_void);
+ pub fn git_hash_init(dst: *mut c_void, algop: *const c_void);
+ pub fn git_hash_clone(dst: *mut c_void, src: *const c_void);
+ pub fn git_hash_update(ctx: *mut c_void, inp: *const c_void, len: usize);
+ pub fn git_hash_final(hash: *mut u8, ctx: *mut c_void);
+ pub fn git_hash_final_oid(hash: *mut c_void, ctx: *mut c_void);
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{HashAlgorithm, ObjectID};
+ use std::io::Write;
+
+ fn all_algos() -> &'static [HashAlgorithm] {
+ &[HashAlgorithm::SHA1, HashAlgorithm::SHA256]
+ }
+
+ #[test]
+ fn format_id_round_trips() {
+ for algo in all_algos() {
+ assert_eq!(
+ *algo,
+ HashAlgorithm::from_format_id(algo.format_id()).unwrap()
+ );
+ }
+ }
+
+ #[test]
+ fn offset_round_trips() {
+ for algo in all_algos() {
+ assert_eq!(*algo, HashAlgorithm::from_u32(*algo as u32).unwrap());
+ }
+ }
+
+ #[test]
+ fn slices_have_correct_length() {
+ for algo in all_algos() {
+ for oid in [algo.null_oid(), algo.empty_blob(), algo.empty_tree()] {
+ assert_eq!(oid.as_slice().len(), algo.raw_len());
+ }
+ }
+ }
+
+ #[test]
+ fn hasher_works_correctly() {
+ for algo in all_algos() {
+ let tests: &[(&[u8], &ObjectID)] = &[
+ (b"blob 0\0", algo.empty_blob()),
+ (b"tree 0\0", algo.empty_tree()),
+ ];
+ for (data, oid) in tests {
+ let mut h = algo.hasher();
+ assert_eq!(h.is_safe(), true);
+ // Test that this works incrementally.
+ h.update(&data[0..2]);
+ h.update(&data[2..]);
+
+ let h2 = h.clone();
+
+ let actual_oid = h.into_oid();
+ assert_eq!(**oid, actual_oid);
+
+ let v = h2.into_vec();
+ assert_eq!((*oid).as_slice(), &v);
+
+ let mut h = algo.hasher();
+ h.write_all(&data[0..2]).unwrap();
+ h.write_all(&data[2..]).unwrap();
+
+ let actual_oid = h.into_oid();
+ assert_eq!(**oid, actual_oid);
+ }
+ }
+ }
+}