anshdadhich commited on
Commit
761f4fd
Β·
verified Β·
1 Parent(s): deeaf27

Upload src/index/store.rs

Browse files
Files changed (1) hide show
  1. src/index/store.rs +268 -0
src/index/store.rs ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #![allow(dead_code)]
2
+ use serde::{Serialize, Deserialize};
3
+ use crate::mft::types::{FileKind, FileRecord, JournalCheckpoint};
4
+ use crate::mft::reader::ScanResult;
5
+
6
+ // ── Cache format (disk) ──────────────────────────────────────────────
7
+ #[derive(Debug, Clone, Serialize, Deserialize)]
8
+ pub struct CachedEntry {
9
+ pub file_ref: u64,
10
+ pub parent_ref: u64,
11
+ pub name: String,
12
+ pub kind: FileKind,
13
+ }
14
+
15
+ #[derive(Serialize, Deserialize)]
16
+ pub struct CacheData {
17
+ pub entries: Vec<CachedEntry>,
18
+ pub drive_root: String,
19
+ pub checkpoints: Vec<JournalCheckpoint>,
20
+ }
21
+
22
+ // ── Compact in-memory entry (32 bytes) ───────────────────────────────
23
+ #[derive(Clone)]
24
+ pub struct IndexEntry {
25
+ pub file_ref: u64,
26
+ pub parent_ref: u64,
27
+ pub name_off: u32,
28
+ pub name_lower_off: u32,
29
+ pub name_len: u16,
30
+ pub name_lower_len: u16,
31
+ pub flags: u8, // bit 0 = is_dir
32
+ }
33
+
34
+ impl IndexEntry {
35
+ #[inline]
36
+ pub fn is_dir(&self) -> bool {
37
+ self.flags & 1 != 0
38
+ }
39
+
40
+ #[inline]
41
+ pub fn kind(&self) -> FileKind {
42
+ if self.is_dir() { FileKind::Directory } else { FileKind::File }
43
+ }
44
+ }
45
+
46
+ // ── Main index store ─────────────────────────────────────────────────
47
+ pub struct IndexStore {
48
+ pub entries: Vec<IndexEntry>,
49
+ pub name_arena: Vec<u8>,
50
+ pub name_lower_arena: Vec<u8>,
51
+ pub ref_lookup: Vec<(u64, u32)>, // sorted by file_ref for binary search
52
+ pub drive_root: String,
53
+ pub checkpoints: Vec<JournalCheckpoint>,
54
+ }
55
+
56
+ impl IndexStore {
57
+ pub fn new() -> Self {
58
+ Self {
59
+ entries: Vec::new(),
60
+ name_arena: Vec::new(),
61
+ name_lower_arena: Vec::new(),
62
+ ref_lookup: Vec::new(),
63
+ drive_root: String::new(),
64
+ checkpoints: Vec::new(),
65
+ }
66
+ }
67
+
68
+ // ── Arena accessors ──────────────────────────────────────────────
69
+
70
+ #[inline]
71
+ pub fn name(&self, e: &IndexEntry) -> &str {
72
+ unsafe {
73
+ std::str::from_utf8_unchecked(
74
+ &self.name_arena[e.name_off as usize..(e.name_off as usize + e.name_len as usize)]
75
+ )
76
+ }
77
+ }
78
+
79
+ #[inline]
80
+ pub fn name_lower(&self, e: &IndexEntry) -> &str {
81
+ unsafe {
82
+ std::str::from_utf8_unchecked(
83
+ &self.name_lower_arena[e.name_lower_off as usize..(e.name_lower_off as usize + e.name_lower_len as usize)]
84
+ )
85
+ }
86
+ }
87
+
88
+ // ── Ref lookup (binary search) ───────────────────────────────────
89
+
90
+ pub fn lookup_idx(&self, file_ref: u64) -> Option<u32> {
91
+ self.ref_lookup
92
+ .binary_search_by_key(&file_ref, |&(r, _)| r)
93
+ .ok()
94
+ .map(|pos| self.ref_lookup[pos].1)
95
+ }
96
+
97
+ fn rebuild_ref_lookup(&mut self) {
98
+ self.ref_lookup.clear();
99
+ self.ref_lookup.reserve(self.entries.len());
100
+ for (i, e) in self.entries.iter().enumerate() {
101
+ self.ref_lookup.push((e.file_ref, i as u32));
102
+ }
103
+ self.ref_lookup.sort_unstable_by_key(|&(r, _)| r);
104
+ }
105
+
106
+ // ── Populate from MFT scan ───────────────────────────────────────
107
+
108
+ pub fn populate_from_scan(&mut self, scan: ScanResult, drive_root: &str) {
109
+ self.drive_root = drive_root.to_string();
110
+ let count = scan.records.len();
111
+ self.entries.reserve(count);
112
+ // Estimate ~30 bytes avg per name
113
+ self.name_arena.reserve(count * 30);
114
+ self.name_lower_arena.reserve(count * 30);
115
+
116
+ for r in &scan.records {
117
+ let name_slice = &scan.name_data[r.name_off as usize..(r.name_off as usize + r.name_len as usize)];
118
+ let name = String::from_utf16_lossy(name_slice);
119
+ let name_lower = name.to_lowercase();
120
+
121
+ let n_off = self.name_arena.len() as u32;
122
+ let n_len = name.len() as u16;
123
+ self.name_arena.extend_from_slice(name.as_bytes());
124
+
125
+ let nl_off = self.name_lower_arena.len() as u32;
126
+ let nl_len = name_lower.len() as u16;
127
+ self.name_lower_arena.extend_from_slice(name_lower.as_bytes());
128
+
129
+ self.entries.push(IndexEntry {
130
+ file_ref: r.file_ref,
131
+ parent_ref: r.parent_ref,
132
+ name_off: n_off,
133
+ name_lower_off: nl_off,
134
+ name_len: n_len,
135
+ name_lower_len: nl_len,
136
+ flags: if r.is_dir { 1 } else { 0 },
137
+ });
138
+ }
139
+ }
140
+
141
+ pub fn finalize(&mut self) {
142
+ let store_ptr = self as *const IndexStore;
143
+ self.entries.sort_unstable_by(|a, b| {
144
+ let s = unsafe { &*store_ptr };
145
+ s.name_lower(a).cmp(s.name_lower(b))
146
+ });
147
+ self.rebuild_ref_lookup();
148
+ // Shrink arenas to fit
149
+ self.name_arena.shrink_to_fit();
150
+ self.name_lower_arena.shrink_to_fit();
151
+ }
152
+
153
+ // ── Cache serialization ──────────────────────────────────────────
154
+
155
+ pub fn to_cache(&self) -> CacheData {
156
+ CacheData {
157
+ entries: self.entries.iter().map(|e| CachedEntry {
158
+ file_ref: e.file_ref,
159
+ parent_ref: e.parent_ref,
160
+ name: self.name(e).to_string(),
161
+ kind: e.kind(),
162
+ }).collect(),
163
+ drive_root: self.drive_root.clone(),
164
+ checkpoints: self.checkpoints.clone(),
165
+ }
166
+ }
167
+
168
+ pub fn from_cache(cache: CacheData) -> Self {
169
+ let count = cache.entries.len();
170
+ let mut store = Self {
171
+ entries: Vec::with_capacity(count),
172
+ name_arena: Vec::with_capacity(count * 30),
173
+ name_lower_arena: Vec::with_capacity(count * 30),
174
+ ref_lookup: Vec::with_capacity(count),
175
+ drive_root: cache.drive_root,
176
+ checkpoints: cache.checkpoints,
177
+ };
178
+
179
+ for c in cache.entries {
180
+ let name_lower = c.name.to_lowercase();
181
+
182
+ let n_off = store.name_arena.len() as u32;
183
+ let n_len = c.name.len() as u16;
184
+ store.name_arena.extend_from_slice(c.name.as_bytes());
185
+
186
+ let nl_off = store.name_lower_arena.len() as u32;
187
+ let nl_len = name_lower.len() as u16;
188
+ store.name_lower_arena.extend_from_slice(name_lower.as_bytes());
189
+
190
+ let flags = match c.kind {
191
+ FileKind::Directory => 1u8,
192
+ FileKind::File => 0u8,
193
+ };
194
+
195
+ store.entries.push(IndexEntry {
196
+ file_ref: c.file_ref,
197
+ parent_ref: c.parent_ref,
198
+ name_off: n_off,
199
+ name_lower_off: nl_off,
200
+ name_len: n_len,
201
+ name_lower_len: nl_len,
202
+ flags,
203
+ });
204
+ }
205
+
206
+ store.rebuild_ref_lookup();
207
+ store.name_arena.shrink_to_fit();
208
+ store.name_lower_arena.shrink_to_fit();
209
+ store
210
+ }
211
+
212
+ // ── Live mutations ───────────────────────────────────────────────
213
+
214
+ pub fn insert(&mut self, record: FileRecord) {
215
+ let name_lower = record.name.to_lowercase();
216
+
217
+ let n_off = self.name_arena.len() as u32;
218
+ let n_len = record.name.len() as u16;
219
+ self.name_arena.extend_from_slice(record.name.as_bytes());
220
+
221
+ let nl_off = self.name_lower_arena.len() as u32;
222
+ let nl_len = name_lower.len() as u16;
223
+ self.name_lower_arena.extend_from_slice(name_lower.as_bytes());
224
+
225
+ let flags = match record.kind {
226
+ FileKind::Directory => 1u8,
227
+ FileKind::File => 0u8,
228
+ };
229
+
230
+ let entry = IndexEntry {
231
+ file_ref: record.file_ref,
232
+ parent_ref: record.parent_ref,
233
+ name_off: n_off,
234
+ name_lower_off: nl_off,
235
+ name_len: n_len,
236
+ name_lower_len: nl_len,
237
+ flags,
238
+ };
239
+
240
+ let store_ptr = self as *const IndexStore;
241
+ let pos = self.entries.partition_point(|e| {
242
+ let s = unsafe { &*store_ptr };
243
+ s.name_lower(e) < name_lower.as_str()
244
+ });
245
+ self.entries.insert(pos, entry);
246
+ self.rebuild_ref_lookup();
247
+ }
248
+
249
+ pub fn remove(&mut self, file_ref: u64) {
250
+ // Name bytes left as dead space in arena (negligible for rare deletes)
251
+ self.entries.retain(|e| e.file_ref != file_ref);
252
+ self.rebuild_ref_lookup();
253
+ }
254
+
255
+ pub fn rename(&mut self, old_ref: u64, new_record: FileRecord) {
256
+ self.remove(old_ref);
257
+ self.insert(new_record);
258
+ }
259
+
260
+ pub fn apply_move(&mut self, file_ref: u64, new_parent_ref: u64, name: String, kind: FileKind) {
261
+ self.remove(file_ref);
262
+ self.insert(FileRecord { file_ref, parent_ref: new_parent_ref, name, kind });
263
+ }
264
+
265
+ pub fn len(&self) -> usize {
266
+ self.entries.len()
267
+ }
268
+ }