Skip to content
Snippets Groups Projects
file_table.rs 9.12 KiB
Newer Older
  • Learn to ignore specific revisions
  • Daniel Müller's avatar
    Daniel Müller committed
    use std::{
        io::{BufRead, Cursor, Read, Write},
        path::PathBuf,
        time::SystemTime,
    };
    
    use chrono::{DateTime, Local};
    use log::debug;
    
    use crate::read_write_extension::{ReadExtTypes, WriteExtTypes};
    
    use super::{ArMetadata, EntryType, Uid};
    
    /// The FileTable represents a snapshot in time and contains a list of all archived files and
    /// directories with their metadata
    #[derive(Debug, Clone, Default, PartialEq)]
    pub struct FileTable {
        /// Unique ID of the BlockPool referenced in this FileTable
        pub(crate) uid: Uid,
        /// The actual file table entries
        pub(crate) entries: Vec<ArEntry>,
        /// Path to the file containing the FileTable, if applicable
        pub(crate) file_path: Option<PathBuf>,
    }
    
    /// The ArEntry represents a single file or directory in the archive
    #[derive(Debug, Clone, Default, PartialEq)]
    pub struct ArEntry {
        /// File metadata , see `ArEntryFlags` for more information
        pub(crate) metadata: ArMetadata,
        /// Symlink target path. This is only set if the entry type is symlink
        pub(crate) symlink_target: Option<String>,
        /// File path relative to the archive root.
        /// # Notes
        /// Storing the full path for each entry is highly inefficient and creates redundancies.
        /// By compressing the filetable, this effect is not as bad as it could be, but this should
        /// still be changed in the future. A tree based structure could be a viable and more efficient
        /// option.
        pub(crate) path: String,
        /// An ordered list of the block IDs making up the file contents. The blocks with the stored
        /// IDs need to be concatinated in the same order to restore the file contents
        pub(crate) blocks: Vec<u64>,
    }
    
    impl FileTable {
        /// Sort all contained entries by path. This can be used if a sorted file table is needed,
        /// since the order of entries might be random, depending on the directory scan
        pub fn sort_by_path(&mut self) {
            self.entries.sort_by(|a, b| a.path.cmp(&b.path));
        }
    
        /// Serialize (write) the file table to the given output stream. This will not apply
        /// compression, but only serialize the file table contents
        pub fn serialize_to<T: Write>(&self, mut output: T) -> std::io::Result<()> {
            debug!(
                "Serializing FileTable: {} Entries, UID={:X?}",
                self.entries.len(),
                self.uid
            );
    
            output.write_all(&self.uid)?;
    
            for entry in self.entries.iter() {
                let symlink_target_len = entry
                    .symlink_target
                    .as_ref()
                    .map(|it| it.len() + 1)
                    .unwrap_or(0);
    
                // Calculate entry length
                let entry_len = ArMetadata::METADATA_LENGTH
                    + entry.path.len()
                    + 1
                    + symlink_target_len
                    + 8 * entry.blocks.len();
    
                // Verify symlink / non-symlink serialization
                match (
                    entry.metadata.entry_and_os_type.entry_type(),
                    entry.symlink_target.as_ref(),
                ) {
                    (EntryType::SymbolicLinkFile | EntryType::SymbolicLinkDir, Some(_)) => (),
                    (EntryType::SymbolicLinkFile | EntryType::SymbolicLinkDir, None) => {
                        panic!("Trying to serialize entry with symlink type but no target")
                    }
                    (_, Some(_)) => {
                        panic!("Trying to serialize entry with symlink target but wrong enty type")
                    }
                    _ => (),
                }
    
                output.write_all_u64(entry_len as u64)?;
                entry.metadata.serialize_to(&mut output)?;
                output.write_all(entry.path.as_bytes())?;
                output.write_all_u8(0)?;
    
                // Symlink starget string is only included on symlinks
                if symlink_target_len > 0 {
                    output.write_all(entry.symlink_target.as_ref().unwrap().as_bytes())?;
                    output.write_all_u8(0)?;
                }
    
                // Serialize the block offsets
                for idx in entry.blocks.iter().copied() {
                    output.write_all_u64(idx)?;
                }
            }
            Ok(())
        }
    
        /// Try to deserialie a file table from the given input stream. This will not apply
        /// decompression
        pub fn try_deserialize_from<T: Read>(mut input: T) -> std::io::Result<Self> {
            let mut file_table = FileTable::default();
    
            input.read_exact(&mut file_table.uid)?;
    
            // The read buffer is allocated once for the whole deserialization
            let mut buf = Vec::new();
    
            while let Some(ar_entry) = Self::try_deserialize_entry_from(&mut input, &mut buf)? {
                file_table.entries.push(ar_entry);
            }
    
            debug!(
                "Read FileTable: {} Entries, UID={:X?}",
                file_table.entries.len(),
                file_table.uid
            );
    
            Ok(file_table)
        }
    
        /// Try to deserialize a single Archive Entry from a given input stream. This must only be used
        /// after parsing the beginning of the filetable
        fn try_deserialize_entry_from<T: Read>(
            mut input: T,
            buf: &mut Vec<u8>,
        ) -> std::io::Result<Option<ArEntry>> {
            use std::io::ErrorKind::UnexpectedEof;
    
            let entry_len = match input.read_exact_u64() {
                Err(e) if e.kind() == UnexpectedEof => return Ok(None),
                r => r?,
            };
    
            // Read the full archive entry into the buffer
            buf.resize(entry_len as usize, 0);
            input.read_exact(buf)?;
    
            let mut buf_cursor = Cursor::new(buf);
    
            let metadata = ArMetadata::try_deserialize_from(&mut buf_cursor)?;
    
            let mut path = Vec::new();
            buf_cursor.read_until(0, &mut path)?;
            // Discard the 0 delimiter
            path.pop();
            let path = String::from_utf8(path).unwrap();
    
            let mut ar_entry = ArEntry {
                metadata,
                path,
                ..Default::default()
            };
    
            if matches!(
                ar_entry.metadata.entry_and_os_type.entry_type(),
                EntryType::SymbolicLinkFile | EntryType::SymbolicLinkDir
            ) {
                let mut symlink_target = Vec::new();
                buf_cursor.read_until(0, &mut symlink_target)?;
                // Discard the 0 delimiter
                symlink_target.pop();
                let symlink_target = String::from_utf8(symlink_target).unwrap();
                ar_entry.symlink_target = Some(symlink_target);
            }
    
            // Calculate the number of missing bytes
            let missing_bytes = entry_len - buf_cursor.position();
    
            // Read the remaining buffer offset values for the entry
            for _ in 0..missing_bytes / 8 {
                let idx = buf_cursor.read_exact_u64()?;
                ar_entry.blocks.push(idx);
            }
    
            Ok(Some(ar_entry))
        }
    
        pub fn print_simple_list(&self) {
            let epoch: DateTime<Local> = SystemTime::UNIX_EPOCH.into();
            let mut total_size = 0;
    
            for entry in self.entries.iter() {
                let modified = epoch
                    + chrono::Duration::seconds(entry.metadata.modified_unix_seconds)
                    + chrono::Duration::nanoseconds(entry.metadata.modified_nanos as i64);
                let modified = modified.format("%Y-%m-%d %H:%M:%S");
    
                total_size += entry.metadata.file_size;
                let file_size = bytesize::ByteSize::b(entry.metadata.file_size);
    
                let path = entry
                    .path
                    .trim_start_matches("./")
                    .trim_start_matches(".\\");
    
                if path.contains(' ') {
                    println!("{modified} {file_size:>9} '{path}'");
                } else {
                    println!("{modified} {file_size:>9} {path}");
                }
            }
    
            println!("Total size: {}", bytesize::ByteSize::b(total_size));
        }
    }
    
    #[cfg(test)]
    mod test {
        use super::{ArEntry, FileTable};
        use std::io::Cursor;
    
        /// Test the roundtrip for a FileTable (serialize <-> deserialize)
        #[test]
        pub fn test_file_table_io() {
            let mut file_table = FileTable::default();
    
            for i in 0..50 {
                let mut file_entry = ArEntry::default();
                file_entry
                    .metadata
                    .entry_and_os_type
                    .set_os_type(match i % 3 {
                        0 => crate::datamodel::OsType::Unknown,
                        1 => crate::datamodel::OsType::Windows,
                        2 => crate::datamodel::OsType::Unix,
                        _ => unreachable!(),
                    });
                file_entry
                    .metadata
                    .entry_and_os_type
                    .set_entry_type(match i % 2 {
                        0 => crate::datamodel::EntryType::File,
                        1 => crate::datamodel::EntryType::Directory,
                        _ => unreachable!(),
                    });
                file_entry.path = format!("/path/to/entry{}", i);
                file_entry.blocks.extend(i as u64..10 * i as u64);
    
                file_table.entries.push(file_entry);
            }
    
            let mut io_buffer = Vec::new();
    
            file_table.serialize_to(&mut io_buffer).unwrap();
    
            let file_table2 = FileTable::try_deserialize_from(Cursor::new(io_buffer)).unwrap();
    
            assert_eq!(
                file_table, file_table2,
                "FileTable serialization <-> deserialization failed"
            );
        }
    }