anshdadhich commited on
Commit
e51364b
·
verified ·
1 Parent(s): 6fd27db

Upload fastsearch-core/src/index/store.rs

Browse files
Files changed (1) hide show
  1. fastsearch-core/src/index/store.rs +252 -0
fastsearch-core/src/index/store.rs ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #![allow(dead_code)]
2
+ use serde::{Serialize, Deserialize};
3
+ use crate::mft::types::{FileKind, FileRecord, JournalCheckpoint};
4
+ use crate::mft::reader::ScanResult;
5
+
6
+ #[derive(Debug, Clone, Serialize, Deserialize)]
7
+ pub struct CachedEntry {
8
+ pub file_ref: u64,
9
+ pub parent_ref: u64,
10
+ pub name: String,
11
+ pub kind: FileKind,
12
+ }
13
+
14
+ #[derive(Serialize, Deserialize)]
15
+ pub struct CacheData {
16
+ pub entries: Vec<CachedEntry>,
17
+ pub drive_root: String,
18
+ pub checkpoints: Vec<JournalCheckpoint>,
19
+ }
20
+
21
+ #[derive(Clone)]
22
+ pub struct IndexEntry {
23
+ pub file_ref: u64,
24
+ pub parent_ref: u64,
25
+ pub name_off: u32,
26
+ pub name_lower_off: u32,
27
+ pub name_len: u16,
28
+ pub name_lower_len: u16,
29
+ pub flags: u8,
30
+ }
31
+
32
+ impl IndexEntry {
33
+ #[inline]
34
+ pub fn is_dir(&self) -> bool {
35
+ self.flags & 1 != 0
36
+ }
37
+
38
+ #[inline]
39
+ pub fn kind(&self) -> FileKind {
40
+ if self.is_dir() { FileKind::Directory } else { FileKind::File }
41
+ }
42
+ }
43
+
44
+ pub struct IndexStore {
45
+ pub entries: Vec<IndexEntry>,
46
+ pub name_arena: Vec<u8>,
47
+ pub name_lower_arena: Vec<u8>,
48
+ pub ref_lookup: Vec<(u64, u32)>,
49
+ pub drive_root: String,
50
+ pub checkpoints: Vec<JournalCheckpoint>,
51
+ }
52
+
53
+ impl IndexStore {
54
+ pub fn new() -> Self {
55
+ Self {
56
+ entries: Vec::new(),
57
+ name_arena: Vec::new(),
58
+ name_lower_arena: Vec::new(),
59
+ ref_lookup: Vec::new(),
60
+ drive_root: String::new(),
61
+ checkpoints: Vec::new(),
62
+ }
63
+ }
64
+
65
+ #[inline]
66
+ pub fn name(&self, e: &IndexEntry) -> &str {
67
+ unsafe {
68
+ std::str::from_utf8_unchecked(
69
+ &self.name_arena[e.name_off as usize..(e.name_off as usize + e.name_len as usize)]
70
+ )
71
+ }
72
+ }
73
+
74
+ #[inline]
75
+ pub fn name_lower(&self, e: &IndexEntry) -> &str {
76
+ unsafe {
77
+ std::str::from_utf8_unchecked(
78
+ &self.name_lower_arena[e.name_lower_off as usize..(e.name_lower_off as usize + e.name_lower_len as usize)]
79
+ )
80
+ }
81
+ }
82
+
83
+ pub fn lookup_idx(&self, file_ref: u64) -> Option<u32> {
84
+ self.ref_lookup
85
+ .binary_search_by_key(&file_ref, |&(r, _)| r)
86
+ .ok()
87
+ .map(|pos| self.ref_lookup[pos].1)
88
+ }
89
+
90
+ fn rebuild_ref_lookup(&mut self) {
91
+ self.ref_lookup.clear();
92
+ self.ref_lookup.reserve(self.entries.len());
93
+ for (i, e) in self.entries.iter().enumerate() {
94
+ self.ref_lookup.push((e.file_ref, i as u32));
95
+ }
96
+ self.ref_lookup.sort_unstable_by_key(|&(r, _)| r);
97
+ }
98
+
99
+ pub fn populate_from_scan(&mut self, scan: ScanResult, drive_root: &str) {
100
+ self.drive_root = drive_root.to_string();
101
+ let count = scan.records.len();
102
+ self.entries.reserve(count);
103
+ self.name_arena.reserve(count * 30);
104
+ self.name_lower_arena.reserve(count * 30);
105
+
106
+ for r in &scan.records {
107
+ let name_slice = &scan.name_data[r.name_off as usize..(r.name_off as usize + r.name_len as usize)];
108
+ let name = String::from_utf16_lossy(name_slice);
109
+ let name_lower = name.to_lowercase();
110
+
111
+ let n_off = self.name_arena.len() as u32;
112
+ let n_len = name.len() as u16;
113
+ self.name_arena.extend_from_slice(name.as_bytes());
114
+
115
+ let nl_off = self.name_lower_arena.len() as u32;
116
+ let nl_len = name_lower.len() as u16;
117
+ self.name_lower_arena.extend_from_slice(name_lower.as_bytes());
118
+
119
+ self.entries.push(IndexEntry {
120
+ file_ref: r.file_ref,
121
+ parent_ref: r.parent_ref,
122
+ name_off: n_off,
123
+ name_lower_off: nl_off,
124
+ name_len: n_len,
125
+ name_lower_len: nl_len,
126
+ flags: if r.is_dir { 1 } else { 0 },
127
+ });
128
+ }
129
+ }
130
+
131
+ pub fn finalize(&mut self) {
132
+ let store_ptr = self as *const IndexStore;
133
+ self.entries.sort_unstable_by(|a, b| {
134
+ let s = unsafe { &*store_ptr };
135
+ s.name_lower(a).cmp(s.name_lower(b))
136
+ });
137
+ self.rebuild_ref_lookup();
138
+ self.name_arena.shrink_to_fit();
139
+ self.name_lower_arena.shrink_to_fit();
140
+ }
141
+
142
+ pub fn to_cache(&self) -> CacheData {
143
+ CacheData {
144
+ entries: self.entries.iter().map(|e| CachedEntry {
145
+ file_ref: e.file_ref,
146
+ parent_ref: e.parent_ref,
147
+ name: self.name(e).to_string(),
148
+ kind: e.kind(),
149
+ }).collect(),
150
+ drive_root: self.drive_root.clone(),
151
+ checkpoints: self.checkpoints.clone(),
152
+ }
153
+ }
154
+
155
+ pub fn from_cache(cache: CacheData) -> Self {
156
+ let count = cache.entries.len();
157
+ let mut store = Self {
158
+ entries: Vec::with_capacity(count),
159
+ name_arena: Vec::with_capacity(count * 30),
160
+ name_lower_arena: Vec::with_capacity(count * 30),
161
+ ref_lookup: Vec::with_capacity(count),
162
+ drive_root: cache.drive_root,
163
+ checkpoints: cache.checkpoints,
164
+ };
165
+
166
+ for c in cache.entries {
167
+ let name_lower = c.name.to_lowercase();
168
+
169
+ let n_off = store.name_arena.len() as u32;
170
+ let n_len = c.name.len() as u16;
171
+ store.name_arena.extend_from_slice(c.name.as_bytes());
172
+
173
+ let nl_off = store.name_lower_arena.len() as u32;
174
+ let nl_len = name_lower.len() as u16;
175
+ store.name_lower_arena.extend_from_slice(name_lower.as_bytes());
176
+
177
+ let flags = match c.kind {
178
+ FileKind::Directory => 1u8,
179
+ FileKind::File => 0u8,
180
+ };
181
+
182
+ store.entries.push(IndexEntry {
183
+ file_ref: c.file_ref,
184
+ parent_ref: c.parent_ref,
185
+ name_off: n_off,
186
+ name_lower_off: nl_off,
187
+ name_len: n_len,
188
+ name_lower_len: nl_len,
189
+ flags,
190
+ });
191
+ }
192
+
193
+ store.rebuild_ref_lookup();
194
+ store.name_arena.shrink_to_fit();
195
+ store.name_lower_arena.shrink_to_fit();
196
+ store
197
+ }
198
+
199
+ pub fn insert(&mut self, record: FileRecord) {
200
+ let name_lower = record.name.to_lowercase();
201
+
202
+ let n_off = self.name_arena.len() as u32;
203
+ let n_len = record.name.len() as u16;
204
+ self.name_arena.extend_from_slice(record.name.as_bytes());
205
+
206
+ let nl_off = self.name_lower_arena.len() as u32;
207
+ let nl_len = name_lower.len() as u16;
208
+ self.name_lower_arena.extend_from_slice(name_lower.as_bytes());
209
+
210
+ let flags = match record.kind {
211
+ FileKind::Directory => 1u8,
212
+ FileKind::File => 0u8,
213
+ };
214
+
215
+ let entry = IndexEntry {
216
+ file_ref: record.file_ref,
217
+ parent_ref: record.parent_ref,
218
+ name_off: n_off,
219
+ name_lower_off: nl_off,
220
+ name_len: n_len,
221
+ name_lower_len: nl_len,
222
+ flags,
223
+ };
224
+
225
+ let store_ptr = self as *const IndexStore;
226
+ let pos = self.entries.partition_point(|e| {
227
+ let s = unsafe { &*store_ptr };
228
+ s.name_lower(e) < name_lower.as_str()
229
+ });
230
+ self.entries.insert(pos, entry);
231
+ self.rebuild_ref_lookup();
232
+ }
233
+
234
+ pub fn remove(&mut self, file_ref: u64) {
235
+ self.entries.retain(|e| e.file_ref != file_ref);
236
+ self.rebuild_ref_lookup();
237
+ }
238
+
239
+ pub fn rename(&mut self, old_ref: u64, new_record: FileRecord) {
240
+ self.remove(old_ref);
241
+ self.insert(new_record);
242
+ }
243
+
244
+ pub fn apply_move(&mut self, file_ref: u64, new_parent_ref: u64, name: String, kind: FileKind) {
245
+ self.remove(file_ref);
246
+ self.insert(FileRecord { file_ref, parent_ref: new_parent_ref, name, kind });
247
+ }
248
+
249
+ pub fn len(&self) -> usize {
250
+ self.entries.len()
251
+ }
252
+ }