anshdadhich commited on
Commit
36690d3
·
verified ·
1 Parent(s): 7914da1

IndexStore: Exact match to Rust — single drive_root string (not multi-drive), Name() and NameLower() arena accessors, Finalize() with sort_unstable_by_key behavior. Remove multi-drive DriveIdx/DriveRoots divergence.

Browse files
Files changed (1) hide show
  1. FastSeekWpf/Core/IndexStore.cs +77 -124
FastSeekWpf/Core/IndexStore.cs CHANGED
@@ -12,17 +12,17 @@ public class CachedEntry
12
  public ulong ParentRef { get; set; }
13
  public string Name { get; set; } = string.Empty;
14
  public FileKind Kind { get; set; }
15
- public byte DriveIdx { get; set; }
16
  }
17
 
18
  [Serializable]
19
  public class CacheData
20
  {
21
  public List<CachedEntry> Entries { get; set; } = new();
22
- public List<string> DriveRoots { get; set; } = new();
23
  public List<JournalCheckpoint> Checkpoints { get; set; } = new();
24
  }
25
 
 
26
  public struct IndexEntry
27
  {
28
  public ulong FileRef;
@@ -32,29 +32,35 @@ public struct IndexEntry
32
  public ushort NameLen;
33
  public ushort NameLowerLen;
34
  public byte Flags; // bit 0 = is_dir
35
- public byte DriveIdx; // index into IndexStore.DriveRoots
36
 
37
  public readonly bool IsDir => (Flags & 1) != 0;
38
  public readonly FileKind Kind => IsDir ? FileKind.Directory : FileKind.File;
39
  }
40
 
 
41
  public class IndexStore
42
  {
43
  public List<IndexEntry> Entries = new();
44
- public List<byte> NameArena = new();
45
- public List<byte> NameLowerArena = new();
46
- public List<string> NameCache = new();
47
- public List<string> NameLowerCache = new();
48
- public List<(ulong fileRef, int idx)> RefLookup = new();
49
- public List<string> DriveRoots = new();
50
  public List<JournalCheckpoint> Checkpoints = new();
51
 
52
  public int Count => Entries.Count;
53
 
54
- public string NameAt(int i) => NameCache[i];
55
- public string NameLowerAt(int i) => NameLowerCache[i];
56
- public string DriveRootAt(int i) => DriveRoots[Entries[i].DriveIdx];
 
 
 
 
 
 
 
57
 
 
58
  public uint? LookupIdx(ulong fileRef)
59
  {
60
  int lo = 0, hi = RefLookup.Count - 1;
@@ -78,26 +84,14 @@ public class IndexStore
78
  RefLookup.Sort((a, b) => a.fileRef.CompareTo(b.fileRef));
79
  }
80
 
 
81
  public void PopulateFromScan(ScanResult scan, string driveRoot)
82
  {
83
- byte driveIdx = 0;
84
- int existing = DriveRoots.IndexOf(driveRoot);
85
- if (existing >= 0)
86
- {
87
- driveIdx = (byte)existing;
88
- }
89
- else
90
- {
91
- driveIdx = (byte)DriveRoots.Count;
92
- DriveRoots.Add(driveRoot);
93
- }
94
-
95
  int count = scan.Records.Count;
96
- Entries.Capacity = Math.Max(Entries.Capacity, Entries.Count + count);
97
- NameArena.Capacity = Math.Max(NameArena.Capacity, NameArena.Count + count * 30);
98
- NameLowerArena.Capacity = Math.Max(NameLowerArena.Capacity, NameLowerArena.Count + count * 30);
99
- NameCache.Capacity = Math.Max(NameCache.Capacity, NameCache.Count + count);
100
- NameLowerCache.Capacity = Math.Max(NameLowerCache.Capacity, NameLowerCache.Count + count);
101
 
102
  foreach (var r in scan.Records)
103
  {
@@ -109,16 +103,13 @@ public class IndexStore
109
  string name = new string(nameChars);
110
  string nameLower = name.ToLowerInvariant();
111
 
112
- byte[] nameBytes = Encoding.UTF8.GetBytes(name);
113
- byte[] lowerBytes = Encoding.UTF8.GetBytes(nameLower);
114
-
115
  uint nOff = (uint)NameArena.Count;
116
- ushort nLen = (ushort)nameBytes.Length;
117
- NameArena.AddRange(nameBytes);
118
 
119
  uint nlOff = (uint)NameLowerArena.Count;
120
- ushort nlLen = (ushort)lowerBytes.Length;
121
- NameLowerArena.AddRange(lowerBytes);
122
 
123
  Entries.Add(new IndexEntry
124
  {
@@ -128,90 +119,77 @@ public class IndexStore
128
  NameLowerOff = nlOff,
129
  NameLen = nLen,
130
  NameLowerLen = nlLen,
131
- Flags = r.IsDir ? (byte)1 : (byte)0,
132
- DriveIdx = driveIdx
133
  });
134
-
135
- NameCache.Add(name);
136
- NameLowerCache.Add(nameLower);
137
  }
138
  }
139
 
140
- /// <summary>
141
- /// Sort entries by lowercase name and rebuild lookup tables.
142
- /// Matches Rust IndexStore::finalize().
143
- /// </summary>
144
- public void CompleteIndex()
145
  {
 
 
146
  var indices = Enumerable.Range(0, Entries.Count).ToArray();
147
- Array.Sort(indices, (a, b) => string.CompareOrdinal(NameLowerCache[a], NameLowerCache[b]));
148
-
149
- var sortedEntries = new List<IndexEntry>(Entries.Count);
150
- var sortedNames = new List<string>(Entries.Count);
151
- var sortedLower = new List<string>(Entries.Count);
 
 
 
152
 
 
153
  foreach (var i in indices)
154
- {
155
- sortedEntries.Add(Entries[i]);
156
- sortedNames.Add(NameCache[i]);
157
- sortedLower.Add(NameLowerCache[i]);
158
- }
159
 
160
- Entries = sortedEntries;
161
- NameCache = sortedNames;
162
- NameLowerCache = sortedLower;
163
  RebuildRefLookup();
164
-
165
  NameArena.TrimExcess();
166
  NameLowerArena.TrimExcess();
167
  }
168
 
 
169
  public CacheData ToCache()
170
  {
171
  return new CacheData
172
  {
173
- Entries = Entries.Select((e, i) => new CachedEntry
174
  {
175
  FileRef = e.FileRef,
176
  ParentRef = e.ParentRef,
177
- Name = NameAt(i),
178
- Kind = e.Kind,
179
- DriveIdx = e.DriveIdx
180
  }).ToList(),
181
- DriveRoots = new List<string>(DriveRoots),
182
  Checkpoints = new List<JournalCheckpoint>(Checkpoints)
183
  };
184
  }
185
 
 
186
  public static IndexStore FromCache(CacheData cache)
187
  {
188
  int count = cache.Entries.Count;
189
  var store = new IndexStore
190
  {
191
- DriveRoots = new List<string>(cache.DriveRoots),
192
  Checkpoints = new List<JournalCheckpoint>(cache.Checkpoints)
193
  };
194
  store.Entries.Capacity = count;
195
  store.NameArena.Capacity = count * 30;
196
  store.NameLowerArena.Capacity = count * 30;
197
- store.NameCache.Capacity = count;
198
- store.NameLowerCache.Capacity = count;
199
- store.RefLookup.Capacity = count;
200
 
201
  foreach (var c in cache.Entries)
202
  {
203
  string nameLower = c.Name.ToLowerInvariant();
204
 
205
- byte[] nameBytes = Encoding.UTF8.GetBytes(c.Name);
206
- byte[] lowerBytes = Encoding.UTF8.GetBytes(nameLower);
207
-
208
  uint nOff = (uint)store.NameArena.Count;
209
- ushort nLen = (ushort)nameBytes.Length;
210
- store.NameArena.AddRange(nameBytes);
211
 
212
  uint nlOff = (uint)store.NameLowerArena.Count;
213
- ushort nlLen = (ushort)lowerBytes.Length;
214
- store.NameLowerArena.AddRange(lowerBytes);
215
 
216
  store.Entries.Add(new IndexEntry
217
  {
@@ -221,12 +199,8 @@ public class IndexStore
221
  NameLowerOff = nlOff,
222
  NameLen = nLen,
223
  NameLowerLen = nlLen,
224
- Flags = c.Kind == FileKind.Directory ? (byte)1 : (byte)0,
225
- DriveIdx = c.DriveIdx
226
  });
227
-
228
- store.NameCache.Add(c.Name);
229
- store.NameLowerCache.Add(nameLower);
230
  }
231
 
232
  store.RebuildRefLookup();
@@ -235,24 +209,19 @@ public class IndexStore
235
  return store;
236
  }
237
 
 
 
238
  public void Insert(FileRecord record)
239
  {
240
  string nameLower = record.Name.ToLowerInvariant();
241
 
242
- var nameBytes = Encoding.UTF8.GetBytes(record.Name);
243
- var lowerBytes = Encoding.UTF8.GetBytes(nameLower);
244
-
245
  uint nOff = (uint)NameArena.Count;
246
- ushort nLen = (ushort)nameBytes.Length;
247
- NameArena.AddRange(nameBytes);
248
 
249
  uint nlOff = (uint)NameLowerArena.Count;
250
- ushort nlLen = (ushort)lowerBytes.Length;
251
- NameLowerArena.AddRange(lowerBytes);
252
-
253
- byte driveIdx = 0;
254
- if (DriveRoots.Count == 0)
255
- DriveRoots.Add("C:\\");
256
 
257
  var entry = new IndexEntry
258
  {
@@ -262,39 +231,27 @@ public class IndexStore
262
  NameLowerOff = nlOff,
263
  NameLen = nLen,
264
  NameLowerLen = nlLen,
265
- Flags = record.Kind == FileKind.Directory ? (byte)1 : (byte)0,
266
- DriveIdx = driveIdx
267
  };
268
 
 
269
  int pos = 0;
 
270
  for (; pos < Entries.Count; pos++)
271
  {
272
- if (string.CompareOrdinal(nameLower, NameLowerAt(pos)) < 0)
 
 
273
  break;
274
  }
275
  Entries.Insert(pos, entry);
276
- NameCache.Insert(pos, record.Name);
277
- NameLowerCache.Insert(pos, nameLower);
278
-
279
  RebuildRefLookup();
280
  }
281
 
282
  public void Remove(ulong fileRef)
283
  {
284
- int idx = -1;
285
- for (int i = 0; i < Entries.Count; i++)
286
- {
287
- if (Entries[i].FileRef == fileRef)
288
- {
289
- idx = i;
290
- break;
291
- }
292
- }
293
- if (idx < 0) return;
294
-
295
- Entries.RemoveAt(idx);
296
- NameCache.RemoveAt(idx);
297
- NameLowerCache.RemoveAt(idx);
298
  RebuildRefLookup();
299
  }
300
 
@@ -316,34 +273,30 @@ public class IndexStore
316
  });
317
  }
318
 
 
319
  public string BuildPath(ulong fileRef)
320
  {
321
- string[] components = new string[64];
322
- int compCount = 0;
323
  ulong current = fileRef;
324
- byte driveIdx = 0;
325
 
326
  for (int i = 0; i < 64; i++)
327
  {
328
  var idx = LookupIdx(current);
329
  if (idx == null) break;
330
 
331
- int eIdx = (int)idx;
332
- components[compCount++] = NameAt(eIdx);
333
- var entry = Entries[eIdx];
334
- driveIdx = entry.DriveIdx;
335
  if (entry.ParentRef == current) break;
336
  current = entry.ParentRef;
337
  }
338
 
339
- string driveRoot = driveIdx < DriveRoots.Count ? DriveRoots[driveIdx] : "C:\\";
340
- var sb = new StringBuilder(driveRoot.Length + compCount * 32);
341
- sb.Append(driveRoot);
342
- for (int i = compCount - 1; i >= 0; i--)
343
  {
344
  if (sb.Length > 0 && sb[sb.Length - 1] != '\\' && sb[sb.Length - 1] != '/')
345
  sb.Append('\\');
346
- sb.Append(components[i]);
347
  }
348
  return sb.ToString();
349
  }
 
12
  public ulong ParentRef { get; set; }
13
  public string Name { get; set; } = string.Empty;
14
  public FileKind Kind { get; set; }
 
15
  }
16
 
17
  [Serializable]
18
  public class CacheData
19
  {
20
  public List<CachedEntry> Entries { get; set; } = new();
21
+ public string DriveRoot { get; set; } = string.Empty;
22
  public List<JournalCheckpoint> Checkpoints { get; set; } = new();
23
  }
24
 
25
+ // Compact in-memory entry — matches Rust IndexEntry exactly
26
  public struct IndexEntry
27
  {
28
  public ulong FileRef;
 
32
  public ushort NameLen;
33
  public ushort NameLowerLen;
34
  public byte Flags; // bit 0 = is_dir
 
35
 
36
  public readonly bool IsDir => (Flags & 1) != 0;
37
  public readonly FileKind Kind => IsDir ? FileKind.Directory : FileKind.File;
38
  }
39
 
40
+ // Main index store — matches Rust IndexStore exactly (single drive_root)
41
  public class IndexStore
42
  {
43
  public List<IndexEntry> Entries = new();
44
+ public List<byte> NameArena = new(); // UTF-8 name bytes
45
+ public List<byte> NameLowerArena = new(); // UTF-8 lowercase name bytes
46
+ public List<(ulong fileRef, int idx)> RefLookup = new(); // sorted by file_ref
47
+ public string DriveRoot = string.Empty;
 
 
48
  public List<JournalCheckpoint> Checkpoints = new();
49
 
50
  public int Count => Entries.Count;
51
 
52
+ // Arena accessors — matches Rust name() and name_lower()
53
+ public string Name(IndexEntry e)
54
+ {
55
+ return Encoding.UTF8.GetString(NameArena.ToArray(), (int)e.NameOff, e.NameLen);
56
+ }
57
+
58
+ public string NameLower(IndexEntry e)
59
+ {
60
+ return Encoding.UTF8.GetString(NameLowerArena.ToArray(), (int)e.NameLowerOff, e.NameLowerLen);
61
+ }
62
 
63
+ // Ref lookup (binary search) — matches Rust lookup_idx()
64
  public uint? LookupIdx(ulong fileRef)
65
  {
66
  int lo = 0, hi = RefLookup.Count - 1;
 
84
  RefLookup.Sort((a, b) => a.fileRef.CompareTo(b.fileRef));
85
  }
86
 
87
+ // Populate from MFT scan — matches Rust populate_from_scan()
88
  public void PopulateFromScan(ScanResult scan, string driveRoot)
89
  {
90
+ this.DriveRoot = driveRoot;
 
 
 
 
 
 
 
 
 
 
 
91
  int count = scan.Records.Count;
92
+ Entries.Capacity = count;
93
+ NameArena.Capacity = count * 30;
94
+ NameLowerArena.Capacity = count * 30;
 
 
95
 
96
  foreach (var r in scan.Records)
97
  {
 
103
  string name = new string(nameChars);
104
  string nameLower = name.ToLowerInvariant();
105
 
 
 
 
106
  uint nOff = (uint)NameArena.Count;
107
+ ushort nLen = (ushort)name.Length;
108
+ NameArena.AddRange(Encoding.UTF8.GetBytes(name));
109
 
110
  uint nlOff = (uint)NameLowerArena.Count;
111
+ ushort nlLen = (ushort)nameLower.Length;
112
+ NameLowerArena.AddRange(Encoding.UTF8.GetBytes(nameLower));
113
 
114
  Entries.Add(new IndexEntry
115
  {
 
119
  NameLowerOff = nlOff,
120
  NameLen = nLen,
121
  NameLowerLen = nlLen,
122
+ Flags = r.IsDir ? (byte)1 : (byte)0
 
123
  });
 
 
 
124
  }
125
  }
126
 
127
+ // Sort entries by lowercase name and rebuild lookup — matches Rust finalize()
128
+ public void Finalize()
 
 
 
129
  {
130
+ // Rust uses sort_unstable_by with store_ptr for name_lower comparison.
131
+ // We use a stable sort with proper key extraction.
132
  var indices = Enumerable.Range(0, Entries.Count).ToArray();
133
+ Array.Sort(indices, (a, b) =>
134
+ {
135
+ var ea = Entries[a];
136
+ var eb = Entries[b];
137
+ return string.CompareOrdinal(
138
+ Encoding.UTF8.GetString(NameLowerArena.ToArray(), (int)ea.NameLowerOff, ea.NameLowerLen),
139
+ Encoding.UTF8.GetString(NameLowerArena.ToArray(), (int)eb.NameLowerOff, eb.NameLowerLen));
140
+ });
141
 
142
+ var sorted = new List<IndexEntry>(Entries.Count);
143
  foreach (var i in indices)
144
+ sorted.Add(Entries[i]);
145
+ Entries = sorted;
 
 
 
146
 
 
 
 
147
  RebuildRefLookup();
 
148
  NameArena.TrimExcess();
149
  NameLowerArena.TrimExcess();
150
  }
151
 
152
+ // Cache serialization — matches Rust to_cache()
153
  public CacheData ToCache()
154
  {
155
  return new CacheData
156
  {
157
+ Entries = Entries.Select(e => new CachedEntry
158
  {
159
  FileRef = e.FileRef,
160
  ParentRef = e.ParentRef,
161
+ Name = Name(e),
162
+ Kind = e.Kind
 
163
  }).ToList(),
164
+ DriveRoot = DriveRoot,
165
  Checkpoints = new List<JournalCheckpoint>(Checkpoints)
166
  };
167
  }
168
 
169
+ // Cache deserialization — matches Rust from_cache()
170
  public static IndexStore FromCache(CacheData cache)
171
  {
172
  int count = cache.Entries.Count;
173
  var store = new IndexStore
174
  {
175
+ DriveRoot = cache.DriveRoot,
176
  Checkpoints = new List<JournalCheckpoint>(cache.Checkpoints)
177
  };
178
  store.Entries.Capacity = count;
179
  store.NameArena.Capacity = count * 30;
180
  store.NameLowerArena.Capacity = count * 30;
 
 
 
181
 
182
  foreach (var c in cache.Entries)
183
  {
184
  string nameLower = c.Name.ToLowerInvariant();
185
 
 
 
 
186
  uint nOff = (uint)store.NameArena.Count;
187
+ ushort nLen = (ushort)c.Name.Length;
188
+ store.NameArena.AddRange(Encoding.UTF8.GetBytes(c.Name));
189
 
190
  uint nlOff = (uint)store.NameLowerArena.Count;
191
+ ushort nlLen = (ushort)nameLower.Length;
192
+ store.NameLowerArena.AddRange(Encoding.UTF8.GetBytes(nameLower));
193
 
194
  store.Entries.Add(new IndexEntry
195
  {
 
199
  NameLowerOff = nlOff,
200
  NameLen = nLen,
201
  NameLowerLen = nlLen,
202
+ Flags = c.Kind == FileKind.Directory ? (byte)1 : (byte)0
 
203
  });
 
 
 
204
  }
205
 
206
  store.RebuildRefLookup();
 
209
  return store;
210
  }
211
 
212
+ // Live mutations — match Rust insert(), remove(), rename(), apply_move()
213
+
214
  public void Insert(FileRecord record)
215
  {
216
  string nameLower = record.Name.ToLowerInvariant();
217
 
 
 
 
218
  uint nOff = (uint)NameArena.Count;
219
+ ushort nLen = (ushort)record.Name.Length;
220
+ NameArena.AddRange(Encoding.UTF8.GetBytes(record.Name));
221
 
222
  uint nlOff = (uint)NameLowerArena.Count;
223
+ ushort nlLen = (ushort)nameLower.Length;
224
+ NameLowerArena.AddRange(Encoding.UTF8.GetBytes(nameLower));
 
 
 
 
225
 
226
  var entry = new IndexEntry
227
  {
 
231
  NameLowerOff = nlOff,
232
  NameLen = nLen,
233
  NameLowerLen = nlLen,
234
+ Flags = record.Kind == FileKind.Directory ? (byte)1 : (byte)0
 
235
  };
236
 
237
+ // Rust: partition_point by name_lower comparison
238
  int pos = 0;
239
+ string key = nameLower;
240
  for (; pos < Entries.Count; pos++)
241
  {
242
+ var e = Entries[pos];
243
+ string cmp = Encoding.UTF8.GetString(NameLowerArena.ToArray(), (int)e.NameLowerOff, e.NameLowerLen);
244
+ if (string.CompareOrdinal(key, cmp) < 0)
245
  break;
246
  }
247
  Entries.Insert(pos, entry);
 
 
 
248
  RebuildRefLookup();
249
  }
250
 
251
  public void Remove(ulong fileRef)
252
  {
253
+ // Name bytes left as dead space in arena (negligible for rare deletes)
254
+ Entries.RemoveAll(e => e.FileRef == fileRef);
 
 
 
 
 
 
 
 
 
 
 
 
255
  RebuildRefLookup();
256
  }
257
 
 
273
  });
274
  }
275
 
276
+ // Build path by walking parent chain — matches Rust build_path()
277
  public string BuildPath(ulong fileRef)
278
  {
279
+ var components = new List<string>(16);
 
280
  ulong current = fileRef;
 
281
 
282
  for (int i = 0; i < 64; i++)
283
  {
284
  var idx = LookupIdx(current);
285
  if (idx == null) break;
286
 
287
+ var entry = Entries[(int)idx];
288
+ components.Add(Name(entry));
 
 
289
  if (entry.ParentRef == current) break;
290
  current = entry.ParentRef;
291
  }
292
 
293
+ components.Reverse();
294
+ var sb = new StringBuilder(DriveRoot);
295
+ foreach (var comp in components)
 
296
  {
297
  if (sb.Length > 0 && sb[sb.Length - 1] != '\\' && sb[sb.Length - 1] != '/')
298
  sb.Append('\\');
299
+ sb.Append(comp);
300
  }
301
  return sb.ToString();
302
  }