{********************************************************************}
{                                                                    }
{ written by TMS Software                                            }
{            copyright (c) 2025                                      }
{            Email : info@tmssoftware.com                            }
{            Web : http://www.tmssoftware.com                        }
{                                                                    }
{ The source code is given as is. The author is not responsible      }
{ for any possible damage done due to the use of this code.          }
{ The complete source code remains property of the author and may    }
{ not be distributed, published, given or sold in any form as such.  }
{ No parts of the source code can be included in any other component }
{ or application without written authorization of the author.        }
{********************************************************************}

unit PDFExtract;

interface

uses
  Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes;

function ExtractPdfText(const AFileName: string; const APassword: string = ''): string;


implementation

type
  FPDF_DOCUMENT = Pointer;
  FPDF_PAGE     = Pointer;
  FPDF_TEXTPAGE = Pointer;

const
  PDFIUM_DLL = 'pdfium.dll'; // 64-bit dll beside your exe

procedure FPDF_InitLibrary; cdecl; external PDFIUM_DLL;
procedure FPDF_DestroyLibrary; cdecl; external PDFIUM_DLL;

function FPDF_LoadDocument(file_path: PAnsiChar; password: PAnsiChar): FPDF_DOCUMENT; cdecl; external PDFIUM_DLL;
procedure FPDF_CloseDocument(document: FPDF_DOCUMENT); cdecl; external PDFIUM_DLL;

function FPDF_GetPageCount(document: FPDF_DOCUMENT): Integer; cdecl; external PDFIUM_DLL;
function FPDF_LoadPage(document: FPDF_DOCUMENT; page_index: Integer): FPDF_PAGE; cdecl; external PDFIUM_DLL;
procedure FPDF_ClosePage(page: FPDF_PAGE); cdecl; external PDFIUM_DLL;

function FPDFText_LoadPage(page: FPDF_PAGE): FPDF_TEXTPAGE; cdecl; external PDFIUM_DLL;
procedure FPDFText_ClosePage(text_page: FPDF_TEXTPAGE); cdecl; external PDFIUM_DLL;
function FPDFText_CountChars(text_page: FPDF_TEXTPAGE): Integer; cdecl; external PDFIUM_DLL;
function FPDFText_GetText(text_page: FPDF_TEXTPAGE; start_index, count: Integer; result_utf16: PWideChar): Integer; cdecl; external PDFIUM_DLL;

function FPDF_GetLastError: Cardinal; cdecl; external PDFIUM_DLL;

function ExtractPdfText(const AFileName: string; const APassword: string = ''): string;
var
  Doc      : FPDF_DOCUMENT;
  Page     : FPDF_PAGE;
  TPage    : FPDF_TEXTPAGE;
  PageCount: Integer;
  i        : Integer;
  CharCount: Integer;
  Buf      : PWideChar;
  Copied   : Integer;
  PathUTF8 : UTF8String;
  PassUTF8 : UTF8String;
  PageText : string;
begin
  Result := '';

  // UTF-8 bytes for path/password (avoid codepage conversions)
  PathUTF8 := UTF8String(AFileName);
  if APassword <> '' then
    PassUTF8 := UTF8String(APassword)
  else
    PassUTF8 := UTF8String('');

  Doc := FPDF_LoadDocument(PAnsiChar(PathUTF8), PAnsiChar(PassUTF8));
  if Doc = nil then
    raise Exception.CreateFmt('PDFium failed to open document. Error code: %d', [FPDF_GetLastError]);

  try
    PageCount := FPDF_GetPageCount(Doc);
    if PageCount <= 0 then
      Exit('');

    for i := 0 to PageCount - 1 do
    begin
      Page := FPDF_LoadPage(Doc, i);       // requires prior FPDF_InitLibrary
      if Page = nil then
        raise Exception.CreateFmt('Failed to load page %d. Error: %d', [i, FPDF_GetLastError]);

      try
        TPage := FPDFText_LoadPage(Page);
        if TPage = nil then
          raise Exception.CreateFmt('Failed to load text for page %d. Error: %d', [i, FPDF_GetLastError]);

        try
          CharCount := FPDFText_CountChars(TPage);
          if CharCount > 0 then
          begin
            GetMem(Buf, SizeOf(WideChar) * (CharCount + 1));
            try
              Copied := FPDFText_GetText(TPage, 0, CharCount, Buf);
              if Copied > 0 then
                SetString(PageText, Buf, Copied - 1) // exclude trailing #0
              else
                PageText := '';
            finally
              FreeMem(Buf);
            end;
          end
          else
            PageText := '';
        finally
          FPDFText_ClosePage(TPage);
        end;

        if Result <> '' then
          Result := Result + sLineBreak + sLineBreak;
        Result := Result + PageText;
      finally
        FPDF_ClosePage(Page);
      end;
    end;
  finally
    FPDF_CloseDocument(Doc);
  end;
end;

initialization
  FPDF_InitLibrary;

finalization
  FPDF_DestroyLibrary;


end.
