anshdadhich commited on
Commit
925c24f
·
verified ·
1 Parent(s): e51364b

Upload fastsearch-core/src/mft/reader.rs

Browse files
Files changed (1) hide show
  1. fastsearch-core/src/mft/reader.rs +387 -0
fastsearch-core/src/mft/reader.rs ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #![allow(dead_code)]
3
+
4
+ use std::mem;
5
+ use windows::{
6
+ core::PCWSTR,
7
+ Win32::Foundation::HANDLE,
8
+ Win32::Storage::FileSystem::{
9
+ CreateFileW, ReadFile, SetFilePointerEx,
10
+ FILE_BEGIN, FILE_FLAG_BACKUP_SEMANTICS, FILE_FLAG_SEQUENTIAL_SCAN,
11
+ FILE_SHARE_DELETE, FILE_SHARE_READ, FILE_SHARE_WRITE, OPEN_EXISTING,
12
+ },
13
+ Win32::System::Ioctl::{
14
+ FSCTL_ENUM_USN_DATA, MFT_ENUM_DATA_V0, USN_RECORD_V2,
15
+ },
16
+ Win32::System::IO::DeviceIoControl,
17
+ };
18
+
19
+ use crate::mft::types::NtfsDrive;
20
+
21
+ const FALLBACK_BUF: usize = 4 * 1024 * 1024;
22
+ const DIRECT_BUF: usize = 4 * 1024 * 1024;
23
+
24
+ pub struct CompactRecord {
25
+ pub file_ref: u64,
26
+ pub parent_ref: u64,
27
+ pub name_off: u32,
28
+ pub name_len: u16,
29
+ pub is_dir: bool,
30
+ }
31
+
32
+ pub struct ScanResult {
33
+ pub records: Vec<CompactRecord>,
34
+ pub name_data: Vec<u16>,
35
+ }
36
+
37
+ pub struct MftReader {
38
+ handle: HANDLE,
39
+ pub drive: NtfsDrive,
40
+ }
41
+
42
+ impl MftReader {
43
+ pub fn open(drive: &NtfsDrive) -> windows::core::Result<Self> {
44
+ let path: Vec<u16> = drive
45
+ .device_path
46
+ .encode_utf16()
47
+ .chain(Some(0))
48
+ .collect();
49
+
50
+ let handle = unsafe {
51
+ CreateFileW(
52
+ PCWSTR(path.as_ptr()),
53
+ 0x80000000u32,
54
+ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
55
+ None,
56
+ OPEN_EXISTING,
57
+ FILE_FLAG_BACKUP_SEMANTICS,
58
+ None,
59
+ )?
60
+ };
61
+
62
+ Ok(Self {
63
+ handle,
64
+ drive: drive.clone(),
65
+ })
66
+ }
67
+
68
+ pub fn scan_direct(&self) -> Option<ScanResult> {
69
+ let record_size = self.read_mft_record_size()?;
70
+
71
+ let mft_path = format!("{}$MFT", self.drive.root);
72
+ let mft_wide: Vec<u16> = mft_path.encode_utf16().chain(Some(0)).collect();
73
+
74
+ let mft_handle = unsafe {
75
+ CreateFileW(
76
+ PCWSTR(mft_wide.as_ptr()),
77
+ 0x80000000u32,
78
+ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
79
+ None,
80
+ OPEN_EXISTING,
81
+ FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_SEQUENTIAL_SCAN,
82
+ None,
83
+ )
84
+ .ok()?
85
+ };
86
+
87
+ let mut records: Vec<CompactRecord> = Vec::with_capacity(3_000_000);
88
+ let mut name_data: Vec<u16> = Vec::with_capacity(40_000_000);
89
+ let mut buffer = vec![0u8; DIRECT_BUF];
90
+ let mut mft_index: u64 = 0;
91
+ let mut leftover = 0usize;
92
+
93
+ loop {
94
+ let mut bytes_read = 0u32;
95
+ let ok = unsafe {
96
+ ReadFile(
97
+ mft_handle,
98
+ Some(&mut buffer[leftover..]),
99
+ Some(&mut bytes_read),
100
+ None,
101
+ )
102
+ };
103
+ if ok.is_err() || bytes_read == 0 {
104
+ break;
105
+ }
106
+
107
+ let total = leftover + bytes_read as usize;
108
+ let mut offset = 0usize;
109
+
110
+ while offset + record_size <= total {
111
+ let applied =
112
+ Self::apply_fixup(&mut buffer[offset..offset + record_size], record_size);
113
+
114
+ if applied {
115
+ Self::parse_file_record(
116
+ &buffer[offset..offset + record_size],
117
+ mft_index,
118
+ &mut records,
119
+ &mut name_data,
120
+ );
121
+ }
122
+
123
+ mft_index += 1;
124
+ offset += record_size;
125
+ }
126
+
127
+ offset = total - (total % record_size);
128
+
129
+ leftover = total - offset;
130
+ if leftover > 0 {
131
+ unsafe {
132
+ std::ptr::copy(
133
+ buffer.as_ptr().add(offset),
134
+ buffer.as_mut_ptr(),
135
+ leftover,
136
+ );
137
+ }
138
+ }
139
+ }
140
+
141
+ unsafe {
142
+ windows::Win32::Foundation::CloseHandle(mft_handle).ok();
143
+ }
144
+
145
+ Some(ScanResult { records, name_data })
146
+ }
147
+
148
+ pub fn scan(&self) -> ScanResult {
149
+ let mut records: Vec<CompactRecord> = Vec::with_capacity(3_000_000);
150
+ let mut name_data: Vec<u16> = Vec::with_capacity(40_000_000);
151
+
152
+ let mut enum_data = MFT_ENUM_DATA_V0 {
153
+ StartFileReferenceNumber: 0,
154
+ LowUsn: 0,
155
+ HighUsn: i64::MAX,
156
+ };
157
+
158
+ let mut buffer = vec![0u8; FALLBACK_BUF];
159
+
160
+ loop {
161
+ let mut bytes_returned: u32 = 0;
162
+
163
+ let ok = unsafe {
164
+ DeviceIoControl(
165
+ self.handle,
166
+ FSCTL_ENUM_USN_DATA,
167
+ Some(&enum_data as *const _ as *const _),
168
+ mem::size_of::<MFT_ENUM_DATA_V0>() as u32,
169
+ Some(buffer.as_mut_ptr() as *mut _),
170
+ FALLBACK_BUF as u32,
171
+ Some(&mut bytes_returned),
172
+ None,
173
+ )
174
+ };
175
+
176
+ if let Err(e) = ok {
177
+ let code = e.code().0 as u32;
178
+ if code == 0x80070026 {
179
+ break;
180
+ }
181
+ eprintln!("MFT error on {}: {:?}", self.drive.letter, e);
182
+ break;
183
+ }
184
+
185
+ if bytes_returned <= 8 {
186
+ break;
187
+ }
188
+
189
+ let next_ref = u64::from_ne_bytes(buffer[0..8].try_into().unwrap());
190
+ enum_data.StartFileReferenceNumber = next_ref;
191
+
192
+ let mut offset = 8usize;
193
+ while offset + mem::size_of::<USN_RECORD_V2>() <= bytes_returned as usize {
194
+ let record = unsafe {
195
+ &*(buffer.as_ptr().add(offset) as *const USN_RECORD_V2)
196
+ };
197
+
198
+ let rec_len = record.RecordLength as usize;
199
+ if rec_len == 0 || offset + rec_len > bytes_returned as usize {
200
+ break;
201
+ }
202
+
203
+ let name_offset = record.FileNameOffset as usize;
204
+ let name_len = record.FileNameLength as usize / 2;
205
+ let name_ptr = unsafe {
206
+ buffer.as_ptr().add(offset + name_offset) as *const u16
207
+ };
208
+ let name_slice = unsafe { std::slice::from_raw_parts(name_ptr, name_len) };
209
+
210
+ let arena_off = name_data.len() as u32;
211
+ name_data.extend_from_slice(name_slice);
212
+
213
+ records.push(CompactRecord {
214
+ file_ref: record.FileReferenceNumber as u64,
215
+ parent_ref: record.ParentFileReferenceNumber as u64,
216
+ name_off: arena_off,
217
+ name_len: name_len as u16,
218
+ is_dir: (record.FileAttributes & 0x10) != 0,
219
+ });
220
+
221
+ offset += rec_len;
222
+ }
223
+ }
224
+
225
+ ScanResult { records, name_data }
226
+ }
227
+
228
+ fn read_mft_record_size(&self) -> Option<usize> {
229
+ unsafe {
230
+ SetFilePointerEx(self.handle, 0, None, FILE_BEGIN).ok()?;
231
+ }
232
+ let mut boot = [0u8; 512];
233
+ let mut br = 0u32;
234
+ unsafe {
235
+ ReadFile(self.handle, Some(&mut boot), Some(&mut br), None).ok()?;
236
+ }
237
+ if br < 512 || &boot[3..7] != b"NTFS" {
238
+ return None;
239
+ }
240
+
241
+ let bytes_per_sector = u16::from_le_bytes([boot[0x0B], boot[0x0C]]) as usize;
242
+ let sectors_per_cluster = boot[0x0D] as usize;
243
+ let cluster_size = bytes_per_sector * sectors_per_cluster;
244
+
245
+ let raw = boot[0x40] as i8;
246
+ let record_size = if raw > 0 {
247
+ raw as usize * cluster_size
248
+ } else {
249
+ 1usize << (-(raw as i32) as usize)
250
+ };
251
+
252
+ Some(record_size)
253
+ }
254
+
255
+ fn apply_fixup(record: &mut [u8], record_size: usize) -> bool {
256
+ if record.len() < 48 || &record[0..4] != b"FILE" {
257
+ return false;
258
+ }
259
+
260
+ let fixup_off = u16::from_le_bytes([record[4], record[5]]) as usize;
261
+ let fixup_cnt = u16::from_le_bytes([record[6], record[7]]) as usize;
262
+
263
+ if fixup_cnt < 2 || fixup_off + fixup_cnt * 2 > record_size {
264
+ return false;
265
+ }
266
+
267
+ let check = [record[fixup_off], record[fixup_off + 1]];
268
+
269
+ for i in 1..fixup_cnt {
270
+ let end = i * 512 - 2;
271
+ if end + 1 >= record_size {
272
+ break;
273
+ }
274
+ if record[end] != check[0] || record[end + 1] != check[1] {
275
+ return false;
276
+ }
277
+ record[end] = record[fixup_off + i * 2];
278
+ record[end + 1] = record[fixup_off + i * 2 + 1];
279
+ }
280
+
281
+ true
282
+ }
283
+
284
+ fn parse_file_record(
285
+ record: &[u8],
286
+ mft_index: u64,
287
+ records: &mut Vec<CompactRecord>,
288
+ name_data: &mut Vec<u16>,
289
+ ) {
290
+ let flags = u16::from_le_bytes([record[0x16], record[0x17]]);
291
+ if flags & 0x01 == 0 {
292
+ return;
293
+ }
294
+
295
+ let is_dir = flags & 0x02 != 0;
296
+ let seq = u16::from_le_bytes([record[0x10], record[0x11]]) as u64;
297
+ let file_ref = mft_index | (seq << 48);
298
+
299
+ let first_attr = u16::from_le_bytes([record[0x14], record[0x15]]) as usize;
300
+ let mut aoff = first_attr;
301
+
302
+ let mut best_ns: u8 = 255;
303
+ let mut best_name: Option<(usize, usize, u64)> = None;
304
+
305
+ while aoff + 8 <= record.len() {
306
+ let atype = u32::from_le_bytes(record[aoff..aoff + 4].try_into().unwrap());
307
+
308
+ if atype == 0xFFFF_FFFF {
309
+ break;
310
+ }
311
+
312
+ let alen =
313
+ u32::from_le_bytes(record[aoff + 4..aoff + 8].try_into().unwrap()) as usize;
314
+
315
+ if alen == 0 || aoff + alen > record.len() {
316
+ break;
317
+ }
318
+
319
+ if atype == 0x30 && record[aoff + 8] == 0 {
320
+ let vlen =
321
+ u32::from_le_bytes(record[aoff + 16..aoff + 20].try_into().unwrap()) as usize;
322
+
323
+ let voff =
324
+ u16::from_le_bytes([record[aoff + 20], record[aoff + 21]]) as usize;
325
+
326
+ let vs = aoff + voff;
327
+
328
+ if vs + 66 <= record.len() && vlen >= 66 {
329
+ let parent =
330
+ u64::from_le_bytes(record[vs..vs + 8].try_into().unwrap());
331
+
332
+ let nlen = record[vs + 64] as usize;
333
+ let ns = record[vs + 65];
334
+
335
+ if vs + 66 + nlen * 2 <= record.len() {
336
+ if ns == 2 {
337
+ continue;
338
+ }
339
+
340
+ let priority = match ns {
341
+ 1 => 0,
342
+ 3 => 1,
343
+ 0 => 2,
344
+ _ => 3,
345
+ };
346
+
347
+ if priority < best_ns {
348
+ best_ns = priority;
349
+ best_name = Some((vs + 66, nlen, parent));
350
+
351
+ if priority == 0 {
352
+ break;
353
+ }
354
+ }
355
+ }
356
+ }
357
+ }
358
+
359
+ aoff += alen;
360
+ }
361
+
362
+ if let Some((name_pos, nlen, parent)) = best_name {
363
+ let arena_off = name_data.len() as u32;
364
+
365
+ for i in 0..nlen {
366
+ let p = name_pos + i * 2;
367
+ name_data.push(u16::from_le_bytes([record[p], record[p + 1]]));
368
+ }
369
+
370
+ records.push(CompactRecord {
371
+ file_ref,
372
+ parent_ref: parent,
373
+ name_off: arena_off,
374
+ name_len: nlen as u16,
375
+ is_dir,
376
+ });
377
+ }
378
+ }
379
+
380
+ }
381
+
382
+
383
+ impl Drop for MftReader {
384
+ fn drop(&mut self) {
385
+ unsafe { windows::Win32::Foundation::CloseHandle(self.handle).ok() };
386
+ }
387
+ }