anshdadhich commited on
Commit
04e62b4
·
verified ·
1 Parent(s): f4ce227

Delete fastsearch-core

Browse files
fastsearch-core/Cargo.toml DELETED
@@ -1,35 +0,0 @@
1
- [package]
2
- name = "fastsearch-core"
3
- version = "0.1.0"
4
- edition = "2021"
5
-
6
- [dependencies]
7
- windows = { version = "0.60", features = [
8
- "Win32_Storage_FileSystem",
9
- "Win32_Security",
10
- "Win32_System_Ioctl",
11
- "Win32_System_IO",
12
- "Win32_Foundation",
13
- "Win32_System_Threading",
14
- "Win32_System_LibraryLoader",
15
- "Win32_UI",
16
- "Win32_Graphics_Gdi",
17
- "Win32_Graphics_Dwm",
18
- "Win32_UI_Input",
19
- "Win32_UI_WindowsAndMessaging",
20
- "Win32_UI_Shell",
21
- "Win32_UI_Input_KeyboardAndMouse",
22
- "Win32_System_Diagnostics_Debug",
23
- "Win32_System_Pipes",
24
- ] }
25
- rayon = "1.10"
26
- crossbeam-channel = "0.5"
27
- fuzzy-matcher = "0.3"
28
- once_cell = "1.19"
29
- parking_lot = "0.12"
30
- bincode = "1.3"
31
- serde = { version = "1", features = ["derive"] }
32
- serde_json = "1.0"
33
- lz4_flex = "0.11"
34
- ctrlc = "3.5"
35
- memmap2 = "0.9"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fastsearch-core/src/index/mod.rs DELETED
@@ -1,2 +0,0 @@
1
- pub mod search;
2
- pub mod store;
 
 
 
fastsearch-core/src/index/search.rs DELETED
@@ -1,126 +0,0 @@
1
- use rayon::prelude::*;
2
- use crate::index::store::IndexStore;
3
-
4
- const APP_EXTENSIONS: &[&str] = &["exe", "lnk", "msi", "appx", "msix"];
5
- const APP_PATH_MARKERS: &[&str] = &[
6
- "\\program files\\", "\\program files (x86)\\",
7
- "\\start menu\\", "\\desktop\\", "\\appdata\\",
8
- ];
9
-
10
- #[derive(Debug, Clone)]
11
- pub struct SearchResult {
12
- pub full_path: std::path::PathBuf,
13
- pub name: String,
14
- pub rank: u8,
15
- pub is_dir: bool,
16
- }
17
-
18
- pub fn search(
19
- store: &IndexStore,
20
- query: &str,
21
- limit: usize,
22
- case_sensitive: bool,
23
- excluded_dirs: &[String],
24
- ) -> Vec<SearchResult> {
25
- if query.is_empty() {
26
- return Vec::new();
27
- }
28
-
29
- let q = if case_sensitive { query.to_string() } else { query.to_lowercase() };
30
-
31
- let entries = &store.entries;
32
- let name_lower_arena = &store.name_lower_arena;
33
- let name_arena = &store.name_arena;
34
-
35
- let mut candidates: Vec<(u32, u8)> = entries
36
- .par_iter()
37
- .enumerate()
38
- .filter_map(|(idx, entry)| {
39
- let name_cmp = if case_sensitive {
40
- unsafe { std::str::from_utf8_unchecked(&name_arena[entry.name_off as usize..(entry.name_off as usize + entry.name_len as usize)]) }
41
- } else {
42
- unsafe { std::str::from_utf8_unchecked(&name_lower_arena[entry.name_lower_off as usize..(entry.name_lower_off as usize + entry.name_lower_len as usize)]) }
43
- };
44
-
45
- let rank = if name_cmp == q { 1u8 }
46
- else if name_cmp.starts_with(&q) { 2 }
47
- else if name_cmp.contains(q.as_str()) { 3 }
48
- else { return None; };
49
-
50
- Some((idx as u32, rank))
51
- })
52
- .collect();
53
-
54
- candidates.sort_unstable_by_key(|&(_, rank)| rank);
55
- let overshoot = (limit * 5).max(1000);
56
- candidates.truncate(overshoot);
57
-
58
- let mut results: Vec<SearchResult> = Vec::with_capacity(limit);
59
-
60
- for &(idx, base_rank) in &candidates {
61
- let entry = &entries[idx as usize];
62
- let full_path = build_path(entry.file_ref, store);
63
-
64
- if !excluded_dirs.is_empty() {
65
- let path_lower = full_path.to_string_lossy().to_lowercase();
66
- if excluded_dirs.iter().any(|ex| path_lower.starts_with(ex.as_str())) {
67
- continue;
68
- }
69
- }
70
-
71
- let name_lower = store.name_lower(entry);
72
- let rank = if base_rank <= 2 {
73
- let ext_is_app = name_lower
74
- .rsplit('.')
75
- .next()
76
- .map(|e| APP_EXTENSIONS.contains(&e))
77
- .unwrap_or(false);
78
- if ext_is_app {
79
- let path_lower = full_path.to_string_lossy().to_lowercase();
80
- if APP_PATH_MARKERS.iter().any(|m| path_lower.contains(m)) { 0 } else { base_rank }
81
- } else {
82
- base_rank
83
- }
84
- } else {
85
- base_rank
86
- };
87
-
88
- results.push(SearchResult {
89
- full_path,
90
- name: store.name(entry).to_string(),
91
- rank,
92
- is_dir: entry.is_dir(),
93
- });
94
- }
95
-
96
- results.sort_unstable_by_key(|r| r.rank);
97
- results.truncate(limit);
98
- results
99
- }
100
-
101
- /// Iterative path builder — walks parent chain via sorted ref_lookup.
102
- pub fn build_path(file_ref: u64, store: &IndexStore) -> std::path::PathBuf {
103
- let mut components: Vec<&str> = Vec::with_capacity(16);
104
- let mut current = file_ref;
105
-
106
- for _ in 0..64 {
107
- match store.lookup_idx(current) {
108
- Some(idx) => {
109
- let entry = &store.entries[idx as usize];
110
- components.push(store.name(entry));
111
- if entry.parent_ref == current {
112
- break;
113
- }
114
- current = entry.parent_ref;
115
- }
116
- None => break,
117
- }
118
- }
119
-
120
- components.reverse();
121
- let mut path = std::path::PathBuf::from(&store.drive_root);
122
- for comp in components {
123
- path.push(comp);
124
- }
125
- path
126
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fastsearch-core/src/index/store.rs DELETED
@@ -1,252 +0,0 @@
1
- #![allow(dead_code)]
2
- use serde::{Serialize, Deserialize};
3
- use crate::mft::types::{FileKind, FileRecord, JournalCheckpoint};
4
- use crate::mft::reader::ScanResult;
5
-
6
- #[derive(Debug, Clone, Serialize, Deserialize)]
7
- pub struct CachedEntry {
8
- pub file_ref: u64,
9
- pub parent_ref: u64,
10
- pub name: String,
11
- pub kind: FileKind,
12
- }
13
-
14
- #[derive(Serialize, Deserialize)]
15
- pub struct CacheData {
16
- pub entries: Vec<CachedEntry>,
17
- pub drive_root: String,
18
- pub checkpoints: Vec<JournalCheckpoint>,
19
- }
20
-
21
- #[derive(Clone)]
22
- pub struct IndexEntry {
23
- pub file_ref: u64,
24
- pub parent_ref: u64,
25
- pub name_off: u32,
26
- pub name_lower_off: u32,
27
- pub name_len: u16,
28
- pub name_lower_len: u16,
29
- pub flags: u8,
30
- }
31
-
32
- impl IndexEntry {
33
- #[inline]
34
- pub fn is_dir(&self) -> bool {
35
- self.flags & 1 != 0
36
- }
37
-
38
- #[inline]
39
- pub fn kind(&self) -> FileKind {
40
- if self.is_dir() { FileKind::Directory } else { FileKind::File }
41
- }
42
- }
43
-
44
- pub struct IndexStore {
45
- pub entries: Vec<IndexEntry>,
46
- pub name_arena: Vec<u8>,
47
- pub name_lower_arena: Vec<u8>,
48
- pub ref_lookup: Vec<(u64, u32)>,
49
- pub drive_root: String,
50
- pub checkpoints: Vec<JournalCheckpoint>,
51
- }
52
-
53
- impl IndexStore {
54
- pub fn new() -> Self {
55
- Self {
56
- entries: Vec::new(),
57
- name_arena: Vec::new(),
58
- name_lower_arena: Vec::new(),
59
- ref_lookup: Vec::new(),
60
- drive_root: String::new(),
61
- checkpoints: Vec::new(),
62
- }
63
- }
64
-
65
- #[inline]
66
- pub fn name(&self, e: &IndexEntry) -> &str {
67
- unsafe {
68
- std::str::from_utf8_unchecked(
69
- &self.name_arena[e.name_off as usize..(e.name_off as usize + e.name_len as usize)]
70
- )
71
- }
72
- }
73
-
74
- #[inline]
75
- pub fn name_lower(&self, e: &IndexEntry) -> &str {
76
- unsafe {
77
- std::str::from_utf8_unchecked(
78
- &self.name_lower_arena[e.name_lower_off as usize..(e.name_lower_off as usize + e.name_lower_len as usize)]
79
- )
80
- }
81
- }
82
-
83
- pub fn lookup_idx(&self, file_ref: u64) -> Option<u32> {
84
- self.ref_lookup
85
- .binary_search_by_key(&file_ref, |&(r, _)| r)
86
- .ok()
87
- .map(|pos| self.ref_lookup[pos].1)
88
- }
89
-
90
- fn rebuild_ref_lookup(&mut self) {
91
- self.ref_lookup.clear();
92
- self.ref_lookup.reserve(self.entries.len());
93
- for (i, e) in self.entries.iter().enumerate() {
94
- self.ref_lookup.push((e.file_ref, i as u32));
95
- }
96
- self.ref_lookup.sort_unstable_by_key(|&(r, _)| r);
97
- }
98
-
99
- pub fn populate_from_scan(&mut self, scan: ScanResult, drive_root: &str) {
100
- self.drive_root = drive_root.to_string();
101
- let count = scan.records.len();
102
- self.entries.reserve(count);
103
- self.name_arena.reserve(count * 30);
104
- self.name_lower_arena.reserve(count * 30);
105
-
106
- for r in &scan.records {
107
- let name_slice = &scan.name_data[r.name_off as usize..(r.name_off as usize + r.name_len as usize)];
108
- let name = String::from_utf16_lossy(name_slice);
109
- let name_lower = name.to_lowercase();
110
-
111
- let n_off = self.name_arena.len() as u32;
112
- let n_len = name.len() as u16;
113
- self.name_arena.extend_from_slice(name.as_bytes());
114
-
115
- let nl_off = self.name_lower_arena.len() as u32;
116
- let nl_len = name_lower.len() as u16;
117
- self.name_lower_arena.extend_from_slice(name_lower.as_bytes());
118
-
119
- self.entries.push(IndexEntry {
120
- file_ref: r.file_ref,
121
- parent_ref: r.parent_ref,
122
- name_off: n_off,
123
- name_lower_off: nl_off,
124
- name_len: n_len,
125
- name_lower_len: nl_len,
126
- flags: if r.is_dir { 1 } else { 0 },
127
- });
128
- }
129
- }
130
-
131
- pub fn finalize(&mut self) {
132
- let store_ptr = self as *const IndexStore;
133
- self.entries.sort_unstable_by(|a, b| {
134
- let s = unsafe { &*store_ptr };
135
- s.name_lower(a).cmp(s.name_lower(b))
136
- });
137
- self.rebuild_ref_lookup();
138
- self.name_arena.shrink_to_fit();
139
- self.name_lower_arena.shrink_to_fit();
140
- }
141
-
142
- pub fn to_cache(&self) -> CacheData {
143
- CacheData {
144
- entries: self.entries.iter().map(|e| CachedEntry {
145
- file_ref: e.file_ref,
146
- parent_ref: e.parent_ref,
147
- name: self.name(e).to_string(),
148
- kind: e.kind(),
149
- }).collect(),
150
- drive_root: self.drive_root.clone(),
151
- checkpoints: self.checkpoints.clone(),
152
- }
153
- }
154
-
155
- pub fn from_cache(cache: CacheData) -> Self {
156
- let count = cache.entries.len();
157
- let mut store = Self {
158
- entries: Vec::with_capacity(count),
159
- name_arena: Vec::with_capacity(count * 30),
160
- name_lower_arena: Vec::with_capacity(count * 30),
161
- ref_lookup: Vec::with_capacity(count),
162
- drive_root: cache.drive_root,
163
- checkpoints: cache.checkpoints,
164
- };
165
-
166
- for c in cache.entries {
167
- let name_lower = c.name.to_lowercase();
168
-
169
- let n_off = store.name_arena.len() as u32;
170
- let n_len = c.name.len() as u16;
171
- store.name_arena.extend_from_slice(c.name.as_bytes());
172
-
173
- let nl_off = store.name_lower_arena.len() as u32;
174
- let nl_len = name_lower.len() as u16;
175
- store.name_lower_arena.extend_from_slice(name_lower.as_bytes());
176
-
177
- let flags = match c.kind {
178
- FileKind::Directory => 1u8,
179
- FileKind::File => 0u8,
180
- };
181
-
182
- store.entries.push(IndexEntry {
183
- file_ref: c.file_ref,
184
- parent_ref: c.parent_ref,
185
- name_off: n_off,
186
- name_lower_off: nl_off,
187
- name_len: n_len,
188
- name_lower_len: nl_len,
189
- flags,
190
- });
191
- }
192
-
193
- store.rebuild_ref_lookup();
194
- store.name_arena.shrink_to_fit();
195
- store.name_lower_arena.shrink_to_fit();
196
- store
197
- }
198
-
199
- pub fn insert(&mut self, record: FileRecord) {
200
- let name_lower = record.name.to_lowercase();
201
-
202
- let n_off = self.name_arena.len() as u32;
203
- let n_len = record.name.len() as u16;
204
- self.name_arena.extend_from_slice(record.name.as_bytes());
205
-
206
- let nl_off = self.name_lower_arena.len() as u32;
207
- let nl_len = name_lower.len() as u16;
208
- self.name_lower_arena.extend_from_slice(name_lower.as_bytes());
209
-
210
- let flags = match record.kind {
211
- FileKind::Directory => 1u8,
212
- FileKind::File => 0u8,
213
- };
214
-
215
- let entry = IndexEntry {
216
- file_ref: record.file_ref,
217
- parent_ref: record.parent_ref,
218
- name_off: n_off,
219
- name_lower_off: nl_off,
220
- name_len: n_len,
221
- name_lower_len: nl_len,
222
- flags,
223
- };
224
-
225
- let store_ptr = self as *const IndexStore;
226
- let pos = self.entries.partition_point(|e| {
227
- let s = unsafe { &*store_ptr };
228
- s.name_lower(e) < name_lower.as_str()
229
- });
230
- self.entries.insert(pos, entry);
231
- self.rebuild_ref_lookup();
232
- }
233
-
234
- pub fn remove(&mut self, file_ref: u64) {
235
- self.entries.retain(|e| e.file_ref != file_ref);
236
- self.rebuild_ref_lookup();
237
- }
238
-
239
- pub fn rename(&mut self, old_ref: u64, new_record: FileRecord) {
240
- self.remove(old_ref);
241
- self.insert(new_record);
242
- }
243
-
244
- pub fn apply_move(&mut self, file_ref: u64, new_parent_ref: u64, name: String, kind: FileKind) {
245
- self.remove(file_ref);
246
- self.insert(FileRecord { file_ref, parent_ref: new_parent_ref, name, kind });
247
- }
248
-
249
- pub fn len(&self) -> usize {
250
- self.entries.len()
251
- }
252
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fastsearch-core/src/lib.rs DELETED
@@ -1,12 +0,0 @@
1
- #![allow(dead_code)]
2
-
3
- pub mod index;
4
- pub mod mft;
5
- pub mod utils;
6
-
7
- pub use index::search::{SearchResult, search, build_path};
8
- pub use index::store::{IndexStore, CacheData};
9
- pub use mft::types::*;
10
- pub use mft::reader::{MftReader, ScanResult};
11
- pub use mft::watcher::UsnWatcher;
12
- pub use utils::drives::get_ntfs_drives;
 
 
 
 
 
 
 
 
 
 
 
 
 
fastsearch-core/src/mft/mod.rs DELETED
@@ -1,3 +0,0 @@
1
- pub mod reader;
2
- pub mod types;
3
- pub mod watcher;
 
 
 
 
fastsearch-core/src/mft/reader.rs DELETED
@@ -1,387 +0,0 @@
1
-
2
- #![allow(dead_code)]
3
-
4
- use std::mem;
5
- use windows::{
6
- core::PCWSTR,
7
- Win32::Foundation::HANDLE,
8
- Win32::Storage::FileSystem::{
9
- CreateFileW, ReadFile, SetFilePointerEx,
10
- FILE_BEGIN, FILE_FLAG_BACKUP_SEMANTICS, FILE_FLAG_SEQUENTIAL_SCAN,
11
- FILE_SHARE_DELETE, FILE_SHARE_READ, FILE_SHARE_WRITE, OPEN_EXISTING,
12
- },
13
- Win32::System::Ioctl::{
14
- FSCTL_ENUM_USN_DATA, MFT_ENUM_DATA_V0, USN_RECORD_V2,
15
- },
16
- Win32::System::IO::DeviceIoControl,
17
- };
18
-
19
- use crate::mft::types::NtfsDrive;
20
-
21
- const FALLBACK_BUF: usize = 4 * 1024 * 1024;
22
- const DIRECT_BUF: usize = 4 * 1024 * 1024;
23
-
24
- pub struct CompactRecord {
25
- pub file_ref: u64,
26
- pub parent_ref: u64,
27
- pub name_off: u32,
28
- pub name_len: u16,
29
- pub is_dir: bool,
30
- }
31
-
32
- pub struct ScanResult {
33
- pub records: Vec<CompactRecord>,
34
- pub name_data: Vec<u16>,
35
- }
36
-
37
- pub struct MftReader {
38
- handle: HANDLE,
39
- pub drive: NtfsDrive,
40
- }
41
-
42
- impl MftReader {
43
- pub fn open(drive: &NtfsDrive) -> windows::core::Result<Self> {
44
- let path: Vec<u16> = drive
45
- .device_path
46
- .encode_utf16()
47
- .chain(Some(0))
48
- .collect();
49
-
50
- let handle = unsafe {
51
- CreateFileW(
52
- PCWSTR(path.as_ptr()),
53
- 0x80000000u32,
54
- FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
55
- None,
56
- OPEN_EXISTING,
57
- FILE_FLAG_BACKUP_SEMANTICS,
58
- None,
59
- )?
60
- };
61
-
62
- Ok(Self {
63
- handle,
64
- drive: drive.clone(),
65
- })
66
- }
67
-
68
- pub fn scan_direct(&self) -> Option<ScanResult> {
69
- let record_size = self.read_mft_record_size()?;
70
-
71
- let mft_path = format!("{}$MFT", self.drive.root);
72
- let mft_wide: Vec<u16> = mft_path.encode_utf16().chain(Some(0)).collect();
73
-
74
- let mft_handle = unsafe {
75
- CreateFileW(
76
- PCWSTR(mft_wide.as_ptr()),
77
- 0x80000000u32,
78
- FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
79
- None,
80
- OPEN_EXISTING,
81
- FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_SEQUENTIAL_SCAN,
82
- None,
83
- )
84
- .ok()?
85
- };
86
-
87
- let mut records: Vec<CompactRecord> = Vec::with_capacity(3_000_000);
88
- let mut name_data: Vec<u16> = Vec::with_capacity(40_000_000);
89
- let mut buffer = vec![0u8; DIRECT_BUF];
90
- let mut mft_index: u64 = 0;
91
- let mut leftover = 0usize;
92
-
93
- loop {
94
- let mut bytes_read = 0u32;
95
- let ok = unsafe {
96
- ReadFile(
97
- mft_handle,
98
- Some(&mut buffer[leftover..]),
99
- Some(&mut bytes_read),
100
- None,
101
- )
102
- };
103
- if ok.is_err() || bytes_read == 0 {
104
- break;
105
- }
106
-
107
- let total = leftover + bytes_read as usize;
108
- let mut offset = 0usize;
109
-
110
- while offset + record_size <= total {
111
- let applied =
112
- Self::apply_fixup(&mut buffer[offset..offset + record_size], record_size);
113
-
114
- if applied {
115
- Self::parse_file_record(
116
- &buffer[offset..offset + record_size],
117
- mft_index,
118
- &mut records,
119
- &mut name_data,
120
- );
121
- }
122
-
123
- mft_index += 1;
124
- offset += record_size;
125
- }
126
-
127
- offset = total - (total % record_size);
128
-
129
- leftover = total - offset;
130
- if leftover > 0 {
131
- unsafe {
132
- std::ptr::copy(
133
- buffer.as_ptr().add(offset),
134
- buffer.as_mut_ptr(),
135
- leftover,
136
- );
137
- }
138
- }
139
- }
140
-
141
- unsafe {
142
- windows::Win32::Foundation::CloseHandle(mft_handle).ok();
143
- }
144
-
145
- Some(ScanResult { records, name_data })
146
- }
147
-
148
- pub fn scan(&self) -> ScanResult {
149
- let mut records: Vec<CompactRecord> = Vec::with_capacity(3_000_000);
150
- let mut name_data: Vec<u16> = Vec::with_capacity(40_000_000);
151
-
152
- let mut enum_data = MFT_ENUM_DATA_V0 {
153
- StartFileReferenceNumber: 0,
154
- LowUsn: 0,
155
- HighUsn: i64::MAX,
156
- };
157
-
158
- let mut buffer = vec![0u8; FALLBACK_BUF];
159
-
160
- loop {
161
- let mut bytes_returned: u32 = 0;
162
-
163
- let ok = unsafe {
164
- DeviceIoControl(
165
- self.handle,
166
- FSCTL_ENUM_USN_DATA,
167
- Some(&enum_data as *const _ as *const _),
168
- mem::size_of::<MFT_ENUM_DATA_V0>() as u32,
169
- Some(buffer.as_mut_ptr() as *mut _),
170
- FALLBACK_BUF as u32,
171
- Some(&mut bytes_returned),
172
- None,
173
- )
174
- };
175
-
176
- if let Err(e) = ok {
177
- let code = e.code().0 as u32;
178
- if code == 0x80070026 {
179
- break;
180
- }
181
- eprintln!("MFT error on {}: {:?}", self.drive.letter, e);
182
- break;
183
- }
184
-
185
- if bytes_returned <= 8 {
186
- break;
187
- }
188
-
189
- let next_ref = u64::from_ne_bytes(buffer[0..8].try_into().unwrap());
190
- enum_data.StartFileReferenceNumber = next_ref;
191
-
192
- let mut offset = 8usize;
193
- while offset + mem::size_of::<USN_RECORD_V2>() <= bytes_returned as usize {
194
- let record = unsafe {
195
- &*(buffer.as_ptr().add(offset) as *const USN_RECORD_V2)
196
- };
197
-
198
- let rec_len = record.RecordLength as usize;
199
- if rec_len == 0 || offset + rec_len > bytes_returned as usize {
200
- break;
201
- }
202
-
203
- let name_offset = record.FileNameOffset as usize;
204
- let name_len = record.FileNameLength as usize / 2;
205
- let name_ptr = unsafe {
206
- buffer.as_ptr().add(offset + name_offset) as *const u16
207
- };
208
- let name_slice = unsafe { std::slice::from_raw_parts(name_ptr, name_len) };
209
-
210
- let arena_off = name_data.len() as u32;
211
- name_data.extend_from_slice(name_slice);
212
-
213
- records.push(CompactRecord {
214
- file_ref: record.FileReferenceNumber as u64,
215
- parent_ref: record.ParentFileReferenceNumber as u64,
216
- name_off: arena_off,
217
- name_len: name_len as u16,
218
- is_dir: (record.FileAttributes & 0x10) != 0,
219
- });
220
-
221
- offset += rec_len;
222
- }
223
- }
224
-
225
- ScanResult { records, name_data }
226
- }
227
-
228
- fn read_mft_record_size(&self) -> Option<usize> {
229
- unsafe {
230
- SetFilePointerEx(self.handle, 0, None, FILE_BEGIN).ok()?;
231
- }
232
- let mut boot = [0u8; 512];
233
- let mut br = 0u32;
234
- unsafe {
235
- ReadFile(self.handle, Some(&mut boot), Some(&mut br), None).ok()?;
236
- }
237
- if br < 512 || &boot[3..7] != b"NTFS" {
238
- return None;
239
- }
240
-
241
- let bytes_per_sector = u16::from_le_bytes([boot[0x0B], boot[0x0C]]) as usize;
242
- let sectors_per_cluster = boot[0x0D] as usize;
243
- let cluster_size = bytes_per_sector * sectors_per_cluster;
244
-
245
- let raw = boot[0x40] as i8;
246
- let record_size = if raw > 0 {
247
- raw as usize * cluster_size
248
- } else {
249
- 1usize << (-(raw as i32) as usize)
250
- };
251
-
252
- Some(record_size)
253
- }
254
-
255
- fn apply_fixup(record: &mut [u8], record_size: usize) -> bool {
256
- if record.len() < 48 || &record[0..4] != b"FILE" {
257
- return false;
258
- }
259
-
260
- let fixup_off = u16::from_le_bytes([record[4], record[5]]) as usize;
261
- let fixup_cnt = u16::from_le_bytes([record[6], record[7]]) as usize;
262
-
263
- if fixup_cnt < 2 || fixup_off + fixup_cnt * 2 > record_size {
264
- return false;
265
- }
266
-
267
- let check = [record[fixup_off], record[fixup_off + 1]];
268
-
269
- for i in 1..fixup_cnt {
270
- let end = i * 512 - 2;
271
- if end + 1 >= record_size {
272
- break;
273
- }
274
- if record[end] != check[0] || record[end + 1] != check[1] {
275
- return false;
276
- }
277
- record[end] = record[fixup_off + i * 2];
278
- record[end + 1] = record[fixup_off + i * 2 + 1];
279
- }
280
-
281
- true
282
- }
283
-
284
- fn parse_file_record(
285
- record: &[u8],
286
- mft_index: u64,
287
- records: &mut Vec<CompactRecord>,
288
- name_data: &mut Vec<u16>,
289
- ) {
290
- let flags = u16::from_le_bytes([record[0x16], record[0x17]]);
291
- if flags & 0x01 == 0 {
292
- return;
293
- }
294
-
295
- let is_dir = flags & 0x02 != 0;
296
- let seq = u16::from_le_bytes([record[0x10], record[0x11]]) as u64;
297
- let file_ref = mft_index | (seq << 48);
298
-
299
- let first_attr = u16::from_le_bytes([record[0x14], record[0x15]]) as usize;
300
- let mut aoff = first_attr;
301
-
302
- let mut best_ns: u8 = 255;
303
- let mut best_name: Option<(usize, usize, u64)> = None;
304
-
305
- while aoff + 8 <= record.len() {
306
- let atype = u32::from_le_bytes(record[aoff..aoff + 4].try_into().unwrap());
307
-
308
- if atype == 0xFFFF_FFFF {
309
- break;
310
- }
311
-
312
- let alen =
313
- u32::from_le_bytes(record[aoff + 4..aoff + 8].try_into().unwrap()) as usize;
314
-
315
- if alen == 0 || aoff + alen > record.len() {
316
- break;
317
- }
318
-
319
- if atype == 0x30 && record[aoff + 8] == 0 {
320
- let vlen =
321
- u32::from_le_bytes(record[aoff + 16..aoff + 20].try_into().unwrap()) as usize;
322
-
323
- let voff =
324
- u16::from_le_bytes([record[aoff + 20], record[aoff + 21]]) as usize;
325
-
326
- let vs = aoff + voff;
327
-
328
- if vs + 66 <= record.len() && vlen >= 66 {
329
- let parent =
330
- u64::from_le_bytes(record[vs..vs + 8].try_into().unwrap());
331
-
332
- let nlen = record[vs + 64] as usize;
333
- let ns = record[vs + 65];
334
-
335
- if vs + 66 + nlen * 2 <= record.len() {
336
- if ns == 2 {
337
- continue;
338
- }
339
-
340
- let priority = match ns {
341
- 1 => 0,
342
- 3 => 1,
343
- 0 => 2,
344
- _ => 3,
345
- };
346
-
347
- if priority < best_ns {
348
- best_ns = priority;
349
- best_name = Some((vs + 66, nlen, parent));
350
-
351
- if priority == 0 {
352
- break;
353
- }
354
- }
355
- }
356
- }
357
- }
358
-
359
- aoff += alen;
360
- }
361
-
362
- if let Some((name_pos, nlen, parent)) = best_name {
363
- let arena_off = name_data.len() as u32;
364
-
365
- for i in 0..nlen {
366
- let p = name_pos + i * 2;
367
- name_data.push(u16::from_le_bytes([record[p], record[p + 1]]));
368
- }
369
-
370
- records.push(CompactRecord {
371
- file_ref,
372
- parent_ref: parent,
373
- name_off: arena_off,
374
- name_len: nlen as u16,
375
- is_dir,
376
- });
377
- }
378
- }
379
-
380
- }
381
-
382
-
383
- impl Drop for MftReader {
384
- fn drop(&mut self) {
385
- unsafe { windows::Win32::Foundation::CloseHandle(self.handle).ok() };
386
- }
387
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fastsearch-core/src/mft/types.rs DELETED
@@ -1,37 +0,0 @@
1
- use serde::{Serialize, Deserialize};
2
-
3
- #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
4
- pub enum FileKind {
5
- File,
6
- Directory,
7
- }
8
-
9
- #[derive(Debug, Clone)]
10
- pub struct FileRecord {
11
- pub file_ref: u64,
12
- pub parent_ref: u64,
13
- pub name: String,
14
- pub kind: FileKind,
15
- }
16
-
17
- #[derive(Debug, Clone)]
18
- pub struct NtfsDrive {
19
- pub letter: char,
20
- pub root: String,
21
- pub device_path: String,
22
- }
23
-
24
- #[derive(Debug)]
25
- pub enum IndexEvent {
26
- Created(FileRecord),
27
- Deleted(u64),
28
- Renamed { old_ref: u64, new_record: FileRecord },
29
- Moved { file_ref: u64, new_parent_ref: u64, name: String, kind: FileKind },
30
- }
31
-
32
- #[derive(Debug, Clone, Serialize, Deserialize)]
33
- pub struct JournalCheckpoint {
34
- pub next_usn: i64,
35
- pub journal_id: u64,
36
- pub drive_letter: char,
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fastsearch-core/src/mft/watcher.rs DELETED
@@ -1,232 +0,0 @@
1
- use std::mem;
2
- use std::time::Duration;
3
- use crossbeam_channel::Sender;
4
- use windows::{
5
- core::PCWSTR,
6
- Win32::Foundation::HANDLE,
7
- Win32::Storage::FileSystem::{
8
- CreateFileW, FILE_FLAG_BACKUP_SEMANTICS, FILE_SHARE_DELETE,
9
- FILE_SHARE_READ, FILE_SHARE_WRITE, OPEN_EXISTING,
10
- },
11
- Win32::System::Ioctl::{
12
- FSCTL_QUERY_USN_JOURNAL, FSCTL_READ_USN_JOURNAL,
13
- READ_USN_JOURNAL_DATA_V0, USN_JOURNAL_DATA_V0, USN_RECORD_V2,
14
- USN_REASON_FILE_CREATE, USN_REASON_FILE_DELETE,
15
- USN_REASON_RENAME_NEW_NAME, USN_REASON_RENAME_OLD_NAME,
16
- },
17
- Win32::System::IO::DeviceIoControl,
18
- };
19
- use crate::mft::types::{FileKind, FileRecord, IndexEvent, JournalCheckpoint, NtfsDrive};
20
-
21
- const BUFFER_SIZE: usize = 64 * 1024;
22
-
23
- pub struct UsnWatcher {
24
- handle: HANDLE,
25
- drive: NtfsDrive,
26
- sender: Sender<IndexEvent>,
27
- pub next_usn: i64,
28
- pub journal_id: u64,
29
- }
30
-
31
- impl UsnWatcher {
32
- pub fn new(
33
- drive: &NtfsDrive,
34
- sender: Sender<IndexEvent>,
35
- ) -> windows::core::Result<Self> {
36
- Self::new_from(drive, sender, None)
37
- }
38
-
39
- pub fn new_from(
40
- drive: &NtfsDrive,
41
- sender: Sender<IndexEvent>,
42
- checkpoint: Option<&JournalCheckpoint>,
43
- ) -> windows::core::Result<Self> {
44
- let path: Vec<u16> = drive.device_path.encode_utf16().chain(Some(0)).collect();
45
-
46
- let handle = unsafe {
47
- CreateFileW(
48
- PCWSTR(path.as_ptr()),
49
- 0x0,
50
- FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
51
- None,
52
- OPEN_EXISTING,
53
- FILE_FLAG_BACKUP_SEMANTICS,
54
- None,
55
- )?
56
- };
57
-
58
- let mut journal_data: USN_JOURNAL_DATA_V0 = unsafe { mem::zeroed() };
59
- let mut bytes_returned = 0u32;
60
-
61
- unsafe {
62
- DeviceIoControl(
63
- handle,
64
- FSCTL_QUERY_USN_JOURNAL,
65
- None, 0,
66
- Some(&mut journal_data as *mut _ as *mut _),
67
- mem::size_of::<USN_JOURNAL_DATA_V0>() as u32,
68
- Some(&mut bytes_returned),
69
- None,
70
- )?;
71
- }
72
-
73
- let next_usn = if let Some(cp) = checkpoint {
74
- if cp.journal_id != journal_data.UsnJournalID {
75
- return Err(windows::core::Error::new(
76
- windows::Win32::Foundation::ERROR_JOURNAL_NOT_ACTIVE.into(),
77
- "Journal ID mismatch — rescan needed",
78
- ));
79
- }
80
- if cp.next_usn < journal_data.FirstUsn || cp.next_usn > journal_data.NextUsn {
81
- return Err(windows::core::Error::new(
82
- windows::Win32::Foundation::ERROR_JOURNAL_NOT_ACTIVE.into(),
83
- "Saved USN outside journal range — rescan needed",
84
- ));
85
- }
86
- cp.next_usn
87
- } else {
88
- journal_data.NextUsn
89
- };
90
-
91
- Ok(Self {
92
- handle,
93
- drive: drive.clone(),
94
- sender,
95
- next_usn,
96
- journal_id: journal_data.UsnJournalID,
97
- })
98
- }
99
-
100
- pub fn checkpoint(&self) -> JournalCheckpoint {
101
- JournalCheckpoint {
102
- next_usn: self.next_usn,
103
- journal_id: self.journal_id,
104
- drive_letter: self.drive.letter,
105
- }
106
- }
107
-
108
- pub fn run(&mut self) {
109
- let mut buffer = vec![0u8; BUFFER_SIZE];
110
- loop {
111
- std::thread::sleep(Duration::from_millis(500));
112
- self.poll(&mut buffer);
113
- }
114
- }
115
-
116
- pub fn run_shared(&mut self, shared: std::sync::Arc<parking_lot::Mutex<Vec<JournalCheckpoint>>>) {
117
- let mut buffer = vec![0u8; BUFFER_SIZE];
118
- loop {
119
- std::thread::sleep(Duration::from_millis(500));
120
- self.poll(&mut buffer);
121
- let mut cps = shared.lock();
122
- cps.retain(|c| c.drive_letter != self.drive.letter);
123
- cps.push(self.checkpoint());
124
- }
125
- }
126
-
127
- pub fn drain(&mut self) -> usize {
128
- let mut buffer = vec![0u8; BUFFER_SIZE];
129
- let mut count = 0;
130
- loop {
131
- let before = self.next_usn;
132
- self.poll(&mut buffer);
133
- if self.next_usn == before {
134
- break;
135
- }
136
- count += 1;
137
- }
138
- count
139
- }
140
-
141
- fn poll(&mut self, buffer: &mut Vec<u8>) {
142
- let read_data = READ_USN_JOURNAL_DATA_V0 {
143
- StartUsn: self.next_usn,
144
- ReasonMask: USN_REASON_FILE_CREATE
145
- | USN_REASON_FILE_DELETE
146
- | USN_REASON_RENAME_NEW_NAME
147
- | USN_REASON_RENAME_OLD_NAME,
148
- ReturnOnlyOnClose: 0,
149
- Timeout: 0,
150
- BytesToWaitFor: 0,
151
- UsnJournalID: self.journal_id,
152
- };
153
-
154
- let mut bytes_returned = 0u32;
155
- let ok = unsafe {
156
- DeviceIoControl(
157
- self.handle,
158
- FSCTL_READ_USN_JOURNAL,
159
- Some(&read_data as *const _ as *const _),
160
- mem::size_of::<READ_USN_JOURNAL_DATA_V0>() as u32,
161
- Some(buffer.as_mut_ptr() as *mut _),
162
- BUFFER_SIZE as u32,
163
- Some(&mut bytes_returned),
164
- None,
165
- )
166
- };
167
-
168
- if ok.is_err() || bytes_returned <= 8 {
169
- return;
170
- }
171
-
172
- self.next_usn = i64::from_ne_bytes(buffer[0..8].try_into().unwrap());
173
-
174
- let mut offset = 8usize;
175
- while offset + mem::size_of::<USN_RECORD_V2>() <= bytes_returned as usize {
176
- let record = unsafe {
177
- &*(buffer.as_ptr().add(offset) as *const USN_RECORD_V2)
178
- };
179
- if record.RecordLength == 0 { break; }
180
- self.process_record(record, buffer, offset);
181
- offset += record.RecordLength as usize;
182
- }
183
- }
184
-
185
- fn process_record(&self, record: &USN_RECORD_V2, buffer: &[u8], offset: usize) {
186
- let name_offset = record.FileNameOffset as usize;
187
- let name_len = record.FileNameLength as usize / 2;
188
- let name_ptr = unsafe {
189
- buffer.as_ptr().add(offset + name_offset) as *const u16
190
- };
191
- let name_slice = unsafe { std::slice::from_raw_parts(name_ptr, name_len) };
192
- let name = String::from_utf16_lossy(name_slice);
193
-
194
- let is_dir = (record.FileAttributes & 0x10) != 0;
195
- let file_ref = record.FileReferenceNumber as u64;
196
- let parent_ref = record.ParentFileReferenceNumber as u64;
197
- let reason = record.Reason;
198
-
199
- if reason & USN_REASON_FILE_DELETE != 0 {
200
- let _ = self.sender.send(IndexEvent::Deleted(file_ref));
201
- return;
202
- }
203
-
204
- let kind = if is_dir { FileKind::Directory } else { FileKind::File };
205
-
206
- if reason & USN_REASON_RENAME_NEW_NAME != 0 {
207
- let _ = self.sender.send(IndexEvent::Moved {
208
- file_ref,
209
- new_parent_ref: parent_ref,
210
- name: name.clone(),
211
- kind: kind.clone(),
212
- });
213
- return;
214
- }
215
-
216
- if reason & USN_REASON_FILE_CREATE != 0 {
217
- let new_record = FileRecord {
218
- file_ref,
219
- parent_ref,
220
- name,
221
- kind,
222
- };
223
- let _ = self.sender.send(IndexEvent::Created(new_record));
224
- }
225
- }
226
- }
227
-
228
- impl Drop for UsnWatcher {
229
- fn drop(&mut self) {
230
- unsafe { windows::Win32::Foundation::CloseHandle(self.handle).ok() };
231
- }
232
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fastsearch-core/src/utils/drives.rs DELETED
@@ -1,59 +0,0 @@
1
- use crate::mft::types::NtfsDrive;
2
- use windows::{
3
- Win32::Storage::FileSystem::{
4
- GetLogicalDriveStringsW, GetVolumeInformationW,
5
- },
6
- };
7
-
8
- /// Returns all NTFS drives on the system
9
- pub fn get_ntfs_drives() -> Vec<NtfsDrive> {
10
- let mut drives = Vec::new();
11
-
12
- let mut buf = vec![0u16; 256];
13
- let len = unsafe { GetLogicalDriveStringsW(Some(&mut buf)) } as usize;
14
- if len == 0 {
15
- return drives;
16
- }
17
-
18
- let drive_strings: Vec<String> = buf[..len]
19
- .split(|&c| c == 0)
20
- .filter(|s| !s.is_empty())
21
- .map(|s| String::from_utf16_lossy(s))
22
- .collect();
23
-
24
- for root in drive_strings {
25
- if is_ntfs(&root) {
26
- let letter = root.chars().next().unwrap();
27
- drives.push(NtfsDrive {
28
- letter,
29
- root: root.clone(),
30
- device_path: format!("\\\\.\\{}:", letter),
31
- });
32
- }
33
- }
34
-
35
- drives
36
- }
37
-
38
- fn is_ntfs(root: &str) -> bool {
39
- let root_wide: Vec<u16> = root.encode_utf16().chain(Some(0)).collect();
40
- let mut fs_name = vec![0u16; 32];
41
-
42
- let ok = unsafe {
43
- GetVolumeInformationW(
44
- windows::core::PCWSTR(root_wide.as_ptr()),
45
- None,
46
- None,
47
- None,
48
- None,
49
- Some(&mut fs_name),
50
- )
51
- };
52
-
53
- if ok.is_err() {
54
- return false;
55
- }
56
-
57
- let fs = String::from_utf16_lossy(&fs_name);
58
- fs.starts_with("NTFS")
59
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fastsearch-core/src/utils/mod.rs DELETED
@@ -1 +0,0 @@
1
- pub mod drives;