Delphi 版 everything、光速搜索代码

近日没啥事情,研究了一下 everything、光速搜索原理。花了一个礼拜时间,终于搞定。

废话不多说,直接上代码:

unit uMFTSearchFile;
{
  dbyoung@sina.com
  2018-04-23
}

interface

uses Windows, System.Classes, Generics.Collections;

{ 获取磁盘所有文件列表 }
function GetLogicalDiskAllFiles(const chrLogiclDiskName: Char; var FileList: TStringList; const bSort: Boolean = False): Boolean;

implementation

type
  PFileInfo = ^TFileInfo;

  TFileInfo = record
    strFileName: String;               // 文件名称
    FileReferenceNumber: UInt64;       // 文件的ID
    ParentFileReferenceNumber: UInt64; // 文件的父ID
  end;

const
  PARTITION_IFS          = $07;
  BUF_LEN                = 500 * 1024;
  USN_DELETE_FLAG_DELETE = $00000001;
  c_UInt64Root           = 1407374883553285;

type
  PARTITION_INFORMATION = record
    StartingOffset: LARGE_INTEGER;
    PartitionLength: LARGE_INTEGER;
    HiddenSectors: Cardinal;
    PartitionNumber: Cardinal;
    PartitionType: Byte;
    BootIndicator: Boolean;
    RecognizedPartition: Boolean;
    RewritePartition: Boolean;
  end;

  CREATE_USN_JOURNAL_DATA = record
    MaximumSize: UInt64;
    AllocationDelta: UInt64;
  end;

  USN_JOURNAL_DATA = record
    UsnJournalID: UInt64;
    FirstUsn: Int64;
    NextUsn: Int64;
    LowestValidUsn: Int64;
    MaxUsn: Int64;
    MaximumSize: UInt64;
    AllocationDelta: UInt64;
  end;

  MFT_ENUM_DATA = record
    StartFileReferenceNumber: UInt64;
    LowUsn: Int64;
    HighUsn: Int64;
  end;

  PUSN = ^USN;

  USN = record
    RecordLength: Cardinal;
    MajorVersion: Word;
    MinorVersion: Word;
    FileReferenceNumber: UInt64;
    ParentFileReferenceNumber: UInt64;
    USN: Int64;
    TimeStamp: LARGE_INTEGER;
    Reason: Cardinal;
    SourceInfo: Cardinal;
    SecurityId: Cardinal;
    FileAttributes: Cardinal;
    FileNameLength: Word;
    FileNameOffset: Word;
    FileName: PWideChar;
  end;

  DELETE_USN_JOURNAL_DATA = record
    UsnJournalID: UInt64;
    DeleteFlags: Cardinal;
  end;

{ TStringList 按数值排序 }
function Int64Sort(List: TStringList; Index1, Index2: Integer): Integer;
var
  Int64A, Int64B: Int64;
begin
  Int64A := PFileInfo(List.Objects[Index1])^.FileReferenceNumber;
  Int64B := PFileInfo(List.Objects[Index2])^.FileReferenceNumber;
  if Int64A < Int64B then
    Result := -1
  else if Int64A = Int64B then
    Result := 0
  else
    Result := 1;
end;

{ 简化的 MOVE 函数,也可以用 MOVE 函数来替代 }
procedure MyMove(const Source; var Dest; Count: NativeInt); assembler;
asm
  FILD    QWORD PTR [EAX]
  FISTP   QWORD PTR [EDX]
end;

{ 获取文件全路径,包含路径和文件名 }
procedure GetFullFileName(var FileList: TStringList; const chrLogiclDiskName: Char; const bSort: Boolean = False);
var
  UInt64List: TArray<UInt64>;
  III       : Integer;
  UPID      : UInt64;
  intIndex  : Integer;
begin
  { 将 FileList 按 FileReferenceNumber 数值排序 }
  FileList.Sorted := False;
  FileList.CustomSort(Int64Sort);

  { 将排序好的 FileReferenceNumber 复制到 UInt64 数组列表中,便于下面进行快速查找 <TArray.BinarySearch 为高效的折半查找> }
  SetLength(UInt64List, FileList.Count);
  for III := 0 to FileList.Count - 1 do
  begin
    UInt64List[III] := PFileInfo(FileList.Objects[III])^.FileReferenceNumber;
  end;

  { 获取每一个文件全路径名称 }
  for III := 0 to FileList.Count - 1 do
  begin
    UPID := PFileInfo(FileList.Objects[III])^.ParentFileReferenceNumber;
    while TArray.BinarySearch(UInt64List, UPID, intIndex) do
    begin
      UPID                  := PFileInfo(FileList.Objects[intIndex])^.ParentFileReferenceNumber;
      FileList.Strings[III] := PFileInfo(FileList.Objects[intIndex])^.strFileName + '\' + FileList.Strings[III];
    end;
    FileList.Strings[III] := (chrLogiclDiskName + ':\' + FileList.Strings[III]);
  end;

  { 将所有文件按文件名排序 }
  if bSort then
    FileList.Sort;
end;

{ 获取磁盘所有文件列表 }
function GetLogicalDiskAllFiles(const chrLogiclDiskName: Char; var FileList: TStringList; const bSort: Boolean = False): Boolean;
var
  Info       : PARTITION_INFORMATION;
  bStatus    : Boolean;
  hRootHandle: THandle;
  hTempHandle: Cardinal;
  cujd       : CREATE_USN_JOURNAL_DATA;
  ujd        : USN_JOURNAL_DATA;
  med        : MFT_ENUM_DATA;
  dujd       : DELETE_USN_JOURNAL_DATA;
  dwRet      : DWORD;
  Buffer     : array [0 .. BUF_LEN - 1] of Char;
  UsnRecord  : PUSN;
  strFileName: String;
  int64Size  : Integer;
  pfi        : PFileInfo;
  III        : Integer;

begin
  Result := False;

  { 打开磁盘 需要管理员权限 }
  hTempHandle := 0;
  hRootHandle := CreateFile(PChar('\\.\' + chrLogiclDiskName + ':'), GENERIC_READ or GENERIC_WRITE, FILE_SHARE_READ or FILE_SHARE_WRITE, nil, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, hTempHandle);
  if hRootHandle = INVALID_HANDLE_VALUE then
    Exit;

  try
    { 是否是NTFS磁盘格式 }
    if not DeviceIoControl(hRootHandle, IOCTL_DISK_GET_PARTITION_INFO, nil, 0, @Info, Sizeof(Info), dwRet, nil) then
      Exit;

    if not Info.RecognizedPartition then
      Exit;

    if Info.PartitionType <> PARTITION_IFS then
      Exit;

    { 初始化USN日志文件 }
    bStatus := DeviceIoControl(hRootHandle, FSCTL_CREATE_USN_JOURNAL, @cujd, Sizeof(cujd), nil, 0, dwRet, nil);
    if not bStatus then
      Exit;

    { 获取USN日志基本信息 }
    bStatus := DeviceIoControl(hRootHandle, FSCTL_QUERY_USN_JOURNAL, nil, 0, @ujd, Sizeof(ujd), dwRet, nil);
    if not bStatus then
      Exit;

    { 枚举USN日志文件中的所有记录 }
    med.StartFileReferenceNumber := 0;
    med.LowUsn                   := 0;
    med.HighUsn                  := ujd.NextUsn;
    int64Size                    := Sizeof(Int64);
    while DeviceIoControl(hRootHandle, FSCTL_ENUM_USN_DATA, @med, Sizeof(med), @Buffer, BUF_LEN, dwRet, nil) do
    begin
      { 找到第一个 USN 记录 }
      UsnRecord := PUSN(Integer(@(Buffer)) + int64Size);
      while dwRet > 60 do
      begin
        { 获取文件名称 }
        strFileName := PWideChar(Integer(UsnRecord) + UsnRecord^.FileNameOffset);
        strFileName := Copy(strFileName, 1, UsnRecord^.FileNameLength div 2);

        { 将文件信息添加到列表中 }
        pfi                            := AllocMem(Sizeof(TFileInfo)); // 不要忘记释放内存
        pfi^.strFileName               := strFileName;
        pfi^.FileReferenceNumber       := UsnRecord^.FileReferenceNumber;
        pfi^.ParentFileReferenceNumber := UsnRecord^.ParentFileReferenceNumber;
        FileList.AddObject(strFileName, TObject(pfi));

        { 获取下一个 USN 记录 }
        if UsnRecord.RecordLength > 0 then
          Dec(dwRet, UsnRecord.RecordLength)
        else
          Break;
        UsnRecord := PUSN(Cardinal(UsnRecord) + UsnRecord.RecordLength);
      end;
      MyMove(Buffer, med, int64Size);
    end;

    { 获取文件全路径,包含路径和文件名 }
    GetFullFileName(FileList, chrLogiclDiskName, bSort);

    { 释放内存 }
    for III := 0 to FileList.Count - 1 do
    begin
      FreeMem(PFileInfo(FileList.Objects[III]));
    end;

    { 删除USN日志文件信息 }
    dujd.UsnJournalID := ujd.UsnJournalID;
    dujd.DeleteFlags  := USN_DELETE_FLAG_DELETE;
    DeviceIoControl(hRootHandle, FSCTL_DELETE_USN_JOURNAL, @dujd, Sizeof(dujd), nil, 0, dwRet, nil);
  finally
    CloseHandle(hRootHandle);
  end;
end;

end.

Delphi 10.2 环境下开发。

调用:

var FileList : TStringList;

begin

  FileList     := TStringList.Create;

  GetLogicalDiskAllFiles('C', FileList, False);

  FileList.SaveToFile('d:\temp.txt', TEncoding.UTF8);

  FileList.Free;

end;

在我的机器上100万个文件,耗时7秒左右。200万个文件,耗时15秒左右。

相关文章