如果你需要完全免費的 PDF 處理庫,可以考慮PdfPig方案:
以下是一個使用 PdfPig 提取 PDF 表格并保存為 Excel 的示例代碼:
(1) 安裝 NuGet 包
Install-Package PdfPig
(2) 示例代碼
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using UglyToad.PdfPig;
using UglyToad.PdfPig.Content;
using OfficeOpenXml;
class Program
{
static void Main(string[] args)
{
string pdfPath = "input.pdf";
string excelPath = "output.xlsx";
var tableData = ExtractTableFromPdf(pdfPath);
SaveTableToText(tableData, excelPath);
Console.WriteLine("PDF 表格已成功轉換為 Excel 文件。");
}
static List<List<string>> ExtractTableFromPdf(string pdfPath)
{
var tableData = new List<List<string>>();
using (var pdfDocument = PdfDocument.Open(pdfPath))
{
foreach (var page in pdfDocument.GetPages())
{
var words = page.GetWords();
var lines = words.GroupBy(w => w.BoundingBox.Bottom)
.OrderByDescending(g => g.Key);
foreach (var line in lines)
{
var columns = line.OrderBy(w => w.BoundingBox.Left)
.Select(w => w.Text)
.ToList();
tableData.Add(columns);
}
}
}
return tableData;
}
static void SaveTableToText(List<List<string>> tableData, string excelPath)
{
for (int i = 0; i < tableData.Count; i++)
{
string tmpStr="";
for (int j = 0; j < tableData[i].Count; j++)
{
tmpStr = tmpStr + "|" + tableData[i][j];
}
Console.WriteLine("----------------------");
Console.WriteLine(tmpStr);
}
}
}
(3) 代碼說明
效果說明:
下面是原始PDF文件樣式:

下面是解碼結果:
總結
該文章在 2025/2/20 16:43:20 編輯過