Skip to content

表单数据提取

PDF Oxide 可从 PDF 文档中提取交互式表单字段(AcroForms),包括文本框、复选框、单选按钮、下拉列表和签名字段。提取的表单数据可导出为 FDF 或 XFDF 格式,便于与其他应用程序交换数据。同时支持对 XFA 表单(XML Forms Architecture)进行解析和转换。

快速示例

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()
for field in fields:
    print(f"{field.name} ({field.field_type}): {field.value}")

Node.js

const { PdfDocument } = require("pdf-oxide");

const doc = new PdfDocument("form.pdf");
const fields = doc.getFormFields();
for (const field of fields) {
  console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}
doc.close();

Go

import pdfoxide "github.com/yfedoseev/pdf_oxide/go"

doc, _ := pdfoxide.Open("form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()
for _, field := range fields {
    fmt.Printf("%s (%s): %s\n", field.Name, field.FieldType, field.Value)
}

C#

using PdfOxide.Core;

using var doc = PdfDocument.Open("form.pdf");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
    Console.WriteLine($"{field.Name} ({field.FieldType}): {field.Value}");
}

WASM

const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();
for (const field of fields) {
    console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
for field in &fields {
    println!("{} ({:?}): {:?}", field.full_name, field.field_type, field.value);
}

Java

import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;

try (PdfDocument doc = PdfDocument.open("form.pdf")) {
    for (FormField field : doc.formFields()) {
        System.out.printf("%s (%s): %s%n",
            field.name(), field.type(), field.value().orElse(""));
    }
}

Ruby

require 'pdf_oxide'

PdfOxide::PdfDocument.open('form.pdf') do |doc|
  doc.form_fields.each do |field|
    puts "#{field[:name]} (#{field[:type]}): #{field[:value]}"
  end
end

C++

#include <pdf_oxide/pdf_oxide.hpp>

auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
    std::cout << field.name << " (" << field.type << "): " << field.value << "\n";
}

Swift

import PdfOxide

let doc = try Document.open("form.pdf")
for field in try doc.formFields() {
    print("\(field.name) (\(field.type)): \(field.value)")
}

Kotlin

import fyi.oxide.pdf.PdfDocument

PdfDocument.open("form.pdf").use { doc ->
    doc.formFields().forEach { field ->
        println("${field.name()} (${field.type()}): ${field.valueOrNull() ?: ""}")
    }
}

Dart

import 'package:pdf_oxide/pdf_oxide.dart';

final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
  print('${field.name} (${field.type}): ${field.value}');
}

R

library(pdfoxide)

doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
  cat(sprintf("%s (%s): %s\n", field$name, field$type, field$value))
}

Julia

using PdfOxide

doc = open_document("form.pdf")
for field in get_form_fields(doc)
    println("$(field.name) ($(field.type)): $(field.value)")
end

Zig

const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;

var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const name = try fields.getName(a, i);
    const ftype = try fields.getType(a, i);
    const value = try fields.getValue(a, i);
    std.debug.print("{s} ({s}): {s}\n", .{ name, ftype, value });
}

Scala

import fyi.oxide.pdf.{PdfDocument, valueOption}
import scala.util.Using

Using.resource(PdfDocument.open("form.pdf")) { doc =>
  doc.formFieldsSeq.foreach { field =>
    println(s"${field.name} (${field.`type`}): ${field.valueOption.getOrElse("")}")
  }
}

Clojure

(require '[pdf-oxide.core :as pdf])

(with-open [doc (pdf/open "form.pdf")]
  (doseq [field (pdf/form-fields doc)]
    (println (format "%s (%s): %s"
                     (.name field) (.type field) (.orElse (.value field) "")))))

Objective-C

#import "POXPdfOxide.h"
NSError *err = nil;

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    NSLog(@"%@ (%@): %@", field.name, field.type, field.value);
}

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
  IO.puts("#{field.name} (#{field.type}): #{field.value}")
end)

从 PyMuPDF 的 get_form_fields() 迁移

如果你正在从 PyMuPDF 迁移,两者 API 结构相似,但 PDF Oxide 返回更丰富的数据结构,并且支持 XFA 表单。

PyMuPDF:

import fitz

doc = fitz.open("form.pdf")
# Returns dict of {field_name: field_value} — loses type info
fields = doc.get_form_fields()

# Or iterate widgets for more detail
for page in doc:
    for widget in page.widgets():
        print(widget.field_name, widget.field_value)

PDF Oxide:

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
# Returns structured objects with name, value, type, options, rect
fields = doc.get_form_fields()
for field in fields:
    print(f"{field.name} ({field.field_type}): {field.value}")

# Also handles XFA forms that PyMuPDF cannot read
xfa = doc.has_xfa()

主要区别:

  • PDF Oxide 返回结构化的字段对象(而非字典)
  • 包含字段类型、边界矩形以及下拉字段的选项列表
  • 支持 XFA 表单——PyMuPDF 的 get_form_fields() 对纯 XFA PDF 返回空结果
  • 支持导出为 FDF/XFDF 格式,便于跨系统数据交换

涵盖 PyMuPDF、pypdf、pdfplumber 和 pdfminer 的完整迁移指南,请参阅迁移到 PDF Oxide


读取表单字段

获取所有字段

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("tax-form.pdf")
fields = doc.get_form_fields()

for field in fields:
    print(f"Name: {field.name}")
    print(f"  Type: {field.field_type}")
    print(f"  Value: {field.value}")
    print(f"  Required: {field.is_required}")
    print(f"  Read-only: {field.is_readonly}")
    if field.max_length:
        print(f"  Max length: {field.max_length}")

Node.js

const doc = new PdfDocument("tax-form.pdf");
const fields = doc.getFormFields();

for (const field of fields) {
  console.log(`Name: ${field.name}`);
  console.log(`  Type: ${field.fieldType}`);
  console.log(`  Value: ${field.value}`);
}
doc.close();

Go

doc, _ := pdfoxide.Open("tax-form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()

for _, field := range fields {
    fmt.Printf("Name: %s\n", field.Name)
    fmt.Printf("  Type: %s\n", field.FieldType)
    fmt.Printf("  Value: %s\n", field.Value)
}

C#

using var doc = PdfDocument.Open("tax-form.pdf");
var fields = doc.GetFormFields();

foreach (var field in fields)
{
    Console.WriteLine($"Name: {field.Name}");
    Console.WriteLine($"  Type: {field.FieldType}");
    Console.WriteLine($"  Value: {field.Value}");
}

WASM

const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();

for (const field of fields) {
    console.log(`Name: ${field.name}`);
    console.log(`  Type: ${field.fieldType}`);
    console.log(`  Value: ${field.value}`);
    console.log(`  Flags: ${field.flags}`);
}

Rust

use pdf_oxide::extractors::{FormExtractor, FieldType};
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("tax-form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;

for field in &fields {
    let type_str = match &field.field_type {
        FieldType::Button => "Button",
        FieldType::Text => "Text",
        FieldType::Choice => "Choice",
        FieldType::Signature => "Signature",
        FieldType::Unknown(s) => s.as_str(),
    };

    println!("[{}] {} = {:?}", type_str, field.full_name, field.value);

    if let Some(tooltip) = &field.tooltip {
        println!("  Tooltip: {}", tooltip);
    }
    if let Some(bounds) = &field.bounds {
        println!("  Bounds: [{:.1}, {:.1}, {:.1}, {:.1}]",
            bounds[0], bounds[1], bounds[2], bounds[3]);
    }
}

Java

import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;

try (PdfDocument doc = PdfDocument.open("tax-form.pdf")) {
    for (FormField field : doc.formFields()) {
        System.out.println("Name: " + field.name());
        System.out.println("  Type: " + field.type());
        System.out.println("  Value: " + field.value().orElse(""));
    }
}

Ruby

PdfOxide::PdfDocument.open('tax-form.pdf') do |doc|
  doc.form_fields.each do |field|
    puts "Name: #{field[:name]}"
    puts "  Type: #{field[:type]}"
    puts "  Value: #{field[:value]}"
  end
end

C++

auto doc = pdf_oxide::Document::open("tax-form.pdf");
for (const auto& field : doc.get_form_fields()) {
    std::cout << "Name: " << field.name << "\n";
    std::cout << "  Type: " << field.type << "\n";
    std::cout << "  Value: " << field.value << "\n";
    std::cout << "  Read-only: " << field.readonly << "\n";
    std::cout << "  Required: " << field.required << "\n";
}

Swift

let doc = try Document.open("tax-form.pdf")
for field in try doc.formFields() {
    print("Name: \(field.name)")
    print("  Type: \(field.type)")
    print("  Value: \(field.value)")
    print("  Read-only: \(field.readonly)")
    print("  Required: \(field.required)")
}

Kotlin

PdfDocument.open("tax-form.pdf").use { doc ->
    doc.formFields().forEach { field ->
        println("Name: ${field.name()}")
        println("  Type: ${field.type()}")
        println("  Value: ${field.valueOrNull() ?: ""}")
    }
}

Dart

final doc = PdfDocument.open('tax-form.pdf');
for (final field in doc.getFormFields()) {
  print('Name: ${field.name}');
  print('  Type: ${field.type}');
  print('  Value: ${field.value}');
  print('  Read-only: ${field.readonly}');
  print('  Required: ${field.required}');
}

R

doc <- pdf_open("tax-form.pdf")
for (field in pdf_get_form_fields(doc)) {
  cat(sprintf("Name: %s\n", field$name))
  cat(sprintf("  Type: %s\n", field$type))
  cat(sprintf("  Value: %s\n", field$value))
  cat(sprintf("  Read-only: %s\n", field$readonly))
  cat(sprintf("  Required: %s\n", field$required))
}

Julia

doc = open_document("tax-form.pdf")
for field in get_form_fields(doc)
    println("Name: $(field.name)")
    println("  Type: $(field.type)")
    println("  Value: $(field.value)")
    println("  Read-only: $(field.readonly)")
    println("  Required: $(field.required)")
end

Zig

var doc = try pdf_oxide.Document.open("tax-form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const name = try fields.getName(a, i);
    const ftype = try fields.getType(a, i);
    const value = try fields.getValue(a, i);
    std.debug.print("Name: {s}\n  Type: {s}\n  Value: {s}\n", .{ name, ftype, value });
    std.debug.print("  Read-only: {}\n  Required: {}\n", .{
        try fields.isReadonly(i), try fields.isRequired(i),
    });
}

Scala

Using.resource(PdfDocument.open("tax-form.pdf")) { doc =>
  doc.formFieldsSeq.foreach { field =>
    println(s"Name: ${field.name}")
    println(s"  Type: ${field.`type`}")
    println(s"  Value: ${field.valueOption.getOrElse("")}")
  }
}

Clojure

(with-open [doc (pdf/open "tax-form.pdf")]
  (doseq [field (pdf/form-fields doc)]
    (println "Name:" (.name field))
    (println "  Type:" (.type field))
    (println "  Value:" (.orElse (.value field) ""))))

Objective-C

POXDocument *doc = [POXDocument openPath:@"tax-form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    NSLog(@"Name: %@", field.name);
    NSLog(@"  Type: %@", field.type);
    NSLog(@"  Value: %@", field.value);
    NSLog(@"  Read-only: %d", field.readonly);
    NSLog(@"  Required: %d", field.required);
}

Elixir

{:ok, doc} = PdfOxide.open("tax-form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
  IO.puts("Name: #{field.name}")
  IO.puts("  Type: #{field.type}")
  IO.puts("  Value: #{field.value}")
  IO.puts("  Read-only: #{field.read_only}")
  IO.puts("  Required: #{field.required}")
end)

获取指定字段的值

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

name = doc.get_form_field_value("employee_name")
ssn = doc.get_form_field_value("ssn")
agreed = doc.get_form_field_value("agree_to_terms")

print(f"Name: {name}")       # "John Doe"
print(f"SSN: {ssn}")         # "123-45-6789"
print(f"Agreed: {agreed}")   # True

WASM

const doc = new WasmPdfDocument(bytes);

const name = doc.getFormFieldValue("employee_name");
const ssn = doc.getFormFieldValue("ssn");
const agreed = doc.getFormFieldValue("agree_to_terms");

console.log(`Name: ${name}`);     // "John Doe"
console.log(`SSN: ${ssn}`);       // "123-45-6789"
console.log(`Agreed: ${agreed}`); // true

Rust

use pdf_oxide::editor::{DocumentEditor, EditableDocument};

let mut editor = DocumentEditor::open("form.pdf")?;

if let Some(value) = editor.get_form_field_value("employee_name")? {
    println!("Name: {:?}", value);
}

填写表单

设置字段值

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Set text fields
doc.set_form_field_value("full_name", "Jane Doe")
doc.set_form_field_value("email", "jane@example.com")

# Set checkboxes
doc.set_form_field_value("agree_to_terms", True)

# Save the filled form
doc.save("filled_form.pdf")

WASM

const doc = new WasmPdfDocument(bytes);

// Set text fields
doc.setFormFieldValue("full_name", "Jane Doe");
doc.setFormFieldValue("email", "jane@example.com");

// Set checkboxes
doc.setFormFieldValue("agree_to_terms", true);

// Save the filled form
const filledBytes = doc.save();

Rust

use pdf_oxide::editor::{DocumentEditor, EditableDocument, FormFieldValue};

let mut editor = DocumentEditor::open("form.pdf")?;

// Set text fields
editor.set_form_field_value("full_name", FormFieldValue::Text("Jane Doe".into()))?;
editor.set_form_field_value("email", FormFieldValue::Text("jane@example.com".into()))?;

// Set checkboxes
editor.set_form_field_value("agree_to_terms", FormFieldValue::Boolean(true))?;

// Set choice fields
editor.set_form_field_value("state", FormFieldValue::Choice("California".into()))?;

editor.save("filled_form.pdf")?;

Java

import fyi.oxide.pdf.DocumentEditor;

try (DocumentEditor editor = DocumentEditor.open("form.pdf")) {
    // Set text fields
    editor.setFormField("full_name", "Jane Doe");
    editor.setFormField("email", "jane@example.com");

    // Set checkboxes
    editor.setFormField("agree_to_terms", true);

    editor.saveTo(java.nio.file.Path.of("filled_form.pdf"));
}

Ruby

PdfOxide::DocumentEditor.open('form.pdf') do |editor|
  # Set text fields
  editor.set_form_field('full_name', 'Jane Doe')
  editor.set_form_field('email', 'jane@example.com')

  # Set checkboxes
  editor.set_form_field('agree_to_terms', true)

  editor.save_to('filled_form.pdf')
end

C++

auto editor = pdf_oxide::DocumentEditor::open("form.pdf");

// Set text fields
editor.set_form_field_value("full_name", "Jane Doe");
editor.set_form_field_value("email", "jane@example.com");

// Set checkboxes
editor.set_form_field_value("agree_to_terms", "true");

editor.save("filled_form.pdf");

Swift

let editor = try DocumentEditor.openEditor("form.pdf")

// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe")
try editor.setFormFieldValue("email", "jane@example.com")

// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true")

try editor.save("filled_form.pdf")

Kotlin

import fyi.oxide.pdf.DocumentEditor

DocumentEditor.open("form.pdf").use { editor ->
    // Set text fields
    editor.setFormField("full_name", "Jane Doe")
    editor.setFormField("email", "jane@example.com")

    // Set checkboxes
    editor.setFormField("agree_to_terms", true)

    editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}

Dart

final editor = DocumentEditor.open('form.pdf');

// Set text fields
editor.setFormFieldValue('full_name', 'Jane Doe');
editor.setFormFieldValue('email', 'jane@example.com');

// Set checkboxes
editor.setFormFieldValue('agree_to_terms', 'true');

editor.save('filled_form.pdf');

R

editor <- pdf_editor_open("form.pdf")

# Set text fields
pdf_editor_set_form_field_value(editor, "full_name", "Jane Doe")
pdf_editor_set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
pdf_editor_set_form_field_value(editor, "agree_to_terms", "true")

pdf_editor_save(editor, "filled_form.pdf")

Julia

editor = open_editor("form.pdf")

# Set text fields
set_form_field_value(editor, "full_name", "Jane Doe")
set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
set_form_field_value(editor, "agree_to_terms", "true")

save(editor, "filled_form.pdf")

Zig

var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();

// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe");
try editor.setFormFieldValue("email", "jane@example.com");

// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true");

try editor.save("filled_form.pdf");

Scala

import fyi.oxide.pdf.DocumentEditor
import scala.util.Using

Using.resource(DocumentEditor.open("form.pdf")) { editor =>
  // Set text fields
  editor.setFormField("full_name", "Jane Doe")
  editor.setFormField("email", "jane@example.com")

  // Set checkboxes
  editor.setFormField("agree_to_terms", true)

  editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}

Clojure

(with-open [editor (pdf/editor "form.pdf")]
  ;; Set text fields
  (.setFormField editor "full_name" "Jane Doe")
  (.setFormField editor "email" "jane@example.com")

  ;; Set checkboxes
  (.setFormField editor "agree_to_terms" true)

  (.saveTo editor (java.nio.file.Path/of "filled_form.pdf" (make-array String 0))))

Objective-C

POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];

// Set text fields
[editor setFormField:@"full_name" value:@"Jane Doe" error:&err];
[editor setFormField:@"email" value:@"jane@example.com" error:&err];

// Set checkboxes
[editor setFormField:@"agree_to_terms" value:@"true" error:&err];

[editor saveToPath:@"filled_form.pdf" error:&err];

Elixir

{:ok, editor} = PdfOxide.open_editor("form.pdf")

# Set text fields
PdfOxide.set_form_field_value(editor, "full_name", "Jane Doe")
PdfOxide.set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
PdfOxide.set_form_field_value(editor, "agree_to_terms", "true")

PdfOxide.editor_save(editor, "filled_form.pdf")

导出表单数据

将表单字段数据导出为 FDF 或 XFDF 格式,以便与其他应用程序共享。

FDF 导出

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.fdf")

WASM

const doc = new WasmPdfDocument(bytes);
const fdfBytes = doc.exportFormData("fdf");
// fdfBytes is a Uint8Array

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let fdf_bytes = FormExtractor::export_fdf(&mut doc, fields)?;
std::fs::write("form_data.fdf", &fdf_bytes)?;

C++

auto doc = pdf_oxide::Document::open("form.pdf");
auto fdf = doc.export_form_data_to_bytes(0); // 0 = FDF
std::ofstream("form_data.fdf", std::ios::binary)
    .write(reinterpret_cast<const char*>(fdf.data()), fdf.size());

Swift

let doc = try Document.open("form.pdf")
let fdf = try doc.exportFormData(formatType: 0) // 0 = FDF
try Data(fdf).write(to: URL(fileURLWithPath: "form_data.fdf"))

Dart

final doc = PdfDocument.open('form.pdf');
final fdf = doc.exportFormDataToBytes(0); // 0 = FDF
File('form_data.fdf').writeAsBytesSync(fdf);

R

doc <- pdf_open("form.pdf")
fdf <- pdf_export_form_data_to_bytes(doc, format_type = 0L) # 0 = FDF
writeBin(fdf, "form_data.fdf")

Julia

doc = open_document("form.pdf")
fdf = export_form_data_to_bytes(doc, 0) # 0 = FDF
write("form_data.fdf", fdf)

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
const fdf = try doc.exportFormDataToBytes(a, 0); // 0 = FDF
defer a.free(fdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.fdf", .data = fdf });

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *fdf = [doc exportFormDataToBytes:0 error:&err]; // 0 = FDF
[fdf writeToFile:@"form_data.fdf" atomically:YES];

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fdf} = PdfOxide.export_form_data_to_bytes(doc, 0) # 0 = FDF
File.write!("form_data.fdf", fdf)

XFDF 导出

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.xfdf", format="xfdf")

WASM

const doc = new WasmPdfDocument(bytes);
const xfdfBytes = doc.exportFormData("xfdf");

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let xfdf = FormExtractor::export_xfdf(&mut doc, fields)?;
std::fs::write("form_data.xfdf", &xfdf)?;

C++

auto doc = pdf_oxide::Document::open("form.pdf");
auto xfdf = doc.export_form_data_to_bytes(1); // 1 = XFDF
std::ofstream("form_data.xfdf", std::ios::binary)
    .write(reinterpret_cast<const char*>(xfdf.data()), xfdf.size());

Swift

let doc = try Document.open("form.pdf")
let xfdf = try doc.exportFormData(formatType: 1) // 1 = XFDF
try Data(xfdf).write(to: URL(fileURLWithPath: "form_data.xfdf"))

Dart

final doc = PdfDocument.open('form.pdf');
final xfdf = doc.exportFormDataToBytes(1); // 1 = XFDF
File('form_data.xfdf').writeAsBytesSync(xfdf);

R

doc <- pdf_open("form.pdf")
xfdf <- pdf_export_form_data_to_bytes(doc, format_type = 1L) # 1 = XFDF
writeBin(xfdf, "form_data.xfdf")

Julia

doc = open_document("form.pdf")
xfdf = export_form_data_to_bytes(doc, 1) # 1 = XFDF
write("form_data.xfdf", xfdf)

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
const xfdf = try doc.exportFormDataToBytes(a, 1); // 1 = XFDF
defer a.free(xfdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.xfdf", .data = xfdf });

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *xfdf = [doc exportFormDataToBytes:1 error:&err]; // 1 = XFDF
[xfdf writeToFile:@"form_data.xfdf" atomically:YES];

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, xfdf} = PdfOxide.export_form_data_to_bytes(doc, 1) # 1 = XFDF
File.write!("form_data.xfdf", xfdf)

关于数据导入

导出是数据往返流程的读取端;将 FDF/XFDF 数据写回 PDF 的写入端,请参阅编辑文档。C ABI 和 Swift 绑定中声明了 pdf_editor_import_fdf_bytespdf_editor_import_xfdf_bytespdf_form_import_from_filepdf_document_import_form_data,但在 v0.3.69 中,这些函数返回 Unsupported 状态。目前推荐的可移植方案是手动解析 FDF/XFDF,再调用 set_form_field_value。完整的可运行导入示例,请参阅表单字段编辑 → 导入表单数据


Markdown 与 HTML 中的表单字段

默认情况下,转换为 Markdown 或 HTML 时会包含表单字段的值。可通过 include_form_fields 参数控制此行为。

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Include form field values (default)
md = doc.to_markdown(0, include_form_fields=True)

# Exclude form fields
md = doc.to_markdown(0, include_form_fields=False)

WASM

const doc = new WasmPdfDocument(bytes);

// Include form fields (default: true)
const md = doc.toMarkdown(0, true, true, true);

// Exclude form fields (4th parameter)
const md2 = doc.toMarkdown(0, true, true, false);

Rust

use pdf_oxide::PdfDocument;
use pdf_oxide::converters::ConversionOptions;

let doc = PdfDocument::open("form.pdf")?;
let options = ConversionOptions {
    include_form_fields: true,
    ..Default::default()
};
let md = doc.to_markdown(0, &options)?;

展平表单

将表单字段合并到页面内容中,使其变为不可编辑状态。适用于生成最终归档版 PDF。

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Flatten all form fields
doc.flatten_forms()
doc.save("flattened.pdf")

# Or flatten a single page
doc2 = PdfDocument("form.pdf")
doc2.flatten_forms_on_page(0)
doc2.save("flattened_page0.pdf")

WASM

const doc = new WasmPdfDocument(bytes);

// Flatten all form fields
doc.flattenForms();
const flattened = doc.save();

// Or flatten a single page
const doc2 = new WasmPdfDocument(bytes);
doc2.flattenFormsOnPage(0);
const flattened2 = doc2.save();

Rust

use pdf_oxide::Pdf;

let mut pdf = Pdf::open("form.pdf")?;

// Mark a specific page for flattening
pdf.flatten_page_annotations(0);
pdf.save("flattened.pdf")?;

// Or flatten all pages
let mut pdf2 = Pdf::open("form.pdf")?;
pdf2.flatten_all_annotations();
pdf2.save("flattened_all.pdf")?;

C++

auto editor = pdf_oxide::DocumentEditor::open("form.pdf");

// Flatten all form fields
editor.flatten_forms();
editor.save("flattened.pdf");

// Or flatten a single page
auto editor2 = pdf_oxide::DocumentEditor::open("form.pdf");
editor2.flatten_forms_on_page(0);
editor2.save("flattened_page0.pdf");

Swift

let editor = try DocumentEditor.openEditor("form.pdf")

// Flatten all form fields
try editor.flattenForms()
try editor.save("flattened.pdf")

// Or flatten a single page
let editor2 = try DocumentEditor.openEditor("form.pdf")
try editor2.flattenFormsOnPage(0)
try editor2.save("flattened_page0.pdf")

Dart

final editor = DocumentEditor.open('form.pdf');

// Flatten all form fields
editor.flattenForms();
editor.save('flattened.pdf');

// Or flatten a single page
final editor2 = DocumentEditor.open('form.pdf');
editor2.flattenFormsOnPage(0);
editor2.save('flattened_page0.pdf');

R

editor <- pdf_editor_open("form.pdf")

# Flatten all form fields
pdf_editor_flatten_forms(editor)
pdf_editor_save(editor, "flattened.pdf")

# Or flatten a single page
editor2 <- pdf_editor_open("form.pdf")
pdf_editor_flatten_forms_on_page(editor2, 0)
pdf_editor_save(editor2, "flattened_page0.pdf")

Julia

editor = open_editor("form.pdf")

# Flatten all form fields
flatten_forms(editor)
save(editor, "flattened.pdf")

# Or flatten a single page
editor2 = open_editor("form.pdf")
flatten_forms_on_page(editor2, 0)
save(editor2, "flattened_page0.pdf")

Zig

var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();

// Flatten all form fields
try editor.flattenForms();
try editor.save("flattened.pdf");

// Or flatten a single page
var editor2 = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor2.deinit();
try editor2.flattenFormsOnPage(0);
try editor2.save("flattened_page0.pdf");

Objective-C

POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];

// Flatten all form fields
[editor flattenForms:&err];
[editor saveToPath:@"flattened.pdf" error:&err];

// Or flatten a single page
POXDocumentEditor *editor2 = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
[editor2 flattenFormsOnPage:0 error:&err];
[editor2 saveToPath:@"flattened_page0.pdf" error:&err];

Elixir

{:ok, editor} = PdfOxide.open_editor("form.pdf")

# Flatten all form fields
PdfOxide.flatten_forms(editor)
PdfOxide.editor_save(editor, "flattened.pdf")

# Or flatten a single page
{:ok, editor2} = PdfOxide.open_editor("form.pdf")
PdfOxide.flatten_forms_on_page(editor2, 0)
PdfOxide.editor_save(editor2, "flattened_page0.pdf")

XFA 表单

解析 XFA(XML Forms Architecture)表单内容。XFA 表单使用基于 XML 的模板,而非 AcroForm 字段,在政府机构和企业表单中十分常见。

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("xfa-form.pdf")
if doc.has_xfa():
    print("This document contains an XFA form")
    fields = doc.get_form_fields()  # Extracts AcroForm fallback fields
    for field in fields:
        print(f"  {field.name}: {field.value}")

Node.js

const doc = new PdfDocument("xfa-form.pdf");
if (doc.hasXFA()) {
  console.log("This document contains an XFA form");
  const fields = doc.getFormFields();
  for (const field of fields) {
    console.log(`  ${field.name}: ${field.value}`);
  }
}
doc.close();

Go

doc, _ := pdfoxide.Open("xfa-form.pdf")
defer doc.Close()
if doc.HasXfa() {
    fmt.Println("This document contains an XFA form")
    fields, _ := doc.FormFields()
    for _, field := range fields {
        fmt.Printf("  %s: %s\n", field.Name, field.Value)
    }
}

C#

using var doc = PdfDocument.Open("xfa-form.pdf");
if (doc.HasXfa)
{
    Console.WriteLine("This document contains an XFA form");
    var fields = doc.GetFormFields();
    foreach (var field in fields)
    {
        Console.WriteLine($"  {field.Name}: {field.Value}");
    }
}

WASM

const doc = new WasmPdfDocument(bytes);
if (doc.hasXfa()) {
    console.log("This document contains an XFA form");
    const fields = doc.getFormFields(); // AcroForm fallback fields
    for (const field of fields) {
        console.log(`  ${field.name}: ${field.value}`);
    }
}

Rust

use pdf_oxide::xfa::analyze_xfa_document;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("xfa-form.pdf")?;
let analysis = analyze_xfa_document(&mut doc)?;
println!("XFA form detected: {} fields", analysis.fields.len());
for field in &analysis.fields {
    println!("  {} ({:?})", field.name, field.field_type);
}

C++

auto doc = pdf_oxide::Document::open("xfa-form.pdf");
if (doc.has_xfa()) {
    std::cout << "This document contains an XFA form\n";
    for (const auto& field : doc.get_form_fields()) { // AcroForm fallback
        std::cout << "  " << field.name << ": " << field.value << "\n";
    }
}

Swift

let doc = try Document.open("xfa-form.pdf")
if try doc.hasXfa() {
    print("This document contains an XFA form")
    for field in try doc.formFields() { // AcroForm fallback
        print("  \(field.name): \(field.value)")
    }
}

Dart

final doc = PdfDocument.open('xfa-form.pdf');
if (doc.hasXfa()) {
  print('This document contains an XFA form');
  for (final field in doc.getFormFields()) { // AcroForm fallback
    print('  ${field.name}: ${field.value}');
  }
}

R

doc <- pdf_open("xfa-form.pdf")
if (pdf_has_xfa(doc)) {
  cat("This document contains an XFA form\n")
  for (field in pdf_get_form_fields(doc)) { # AcroForm fallback
    cat(sprintf("  %s: %s\n", field$name, field$value))
  }
}

Julia

doc = open_document("xfa-form.pdf")
if has_xfa(doc)
    println("This document contains an XFA form")
    for field in get_form_fields(doc) # AcroForm fallback
        println("  $(field.name): $(field.value)")
    end
end

Zig

var doc = try pdf_oxide.Document.open("xfa-form.pdf");
if (doc.hasXfa()) {
    std.debug.print("This document contains an XFA form\n", .{});
    var fields = try doc.formFields(); // AcroForm fallback
    defer fields.deinit();
    const count = try fields.count();
    var i: i32 = 0;
    while (i < count) : (i += 1) {
        const name = try fields.getName(a, i);
        const value = try fields.getValue(a, i);
        std.debug.print("  {s}: {s}\n", .{ name, value });
    }
}

Objective-C

POXDocument *doc = [POXDocument openPath:@"xfa-form.pdf" error:&err];
if ([doc hasXfa]) {
    NSLog(@"This document contains an XFA form");
    for (POXFormField *field in [doc formFieldsWithError:&err]) { // AcroForm fallback
        NSLog(@"  %@: %@", field.name, field.value);
    }
}

Elixir

{:ok, doc} = PdfOxide.open("xfa-form.pdf")
if PdfOxide.has_xfa?(doc) do
  IO.puts("This document contains an XFA form")
  {:ok, fields} = PdfOxide.form_fields(doc) # AcroForm fallback
  Enum.each(fields, fn field -> IO.puts("  #{field.name}: #{field.value}") end)
end

API 参考

Python API

方法 说明
doc.get_form_fields() 获取所有表单字段,返回 FormField 对象列表
doc.get_form_field_value(name) 按名称获取指定字段的值
doc.set_form_field_value(name, value) 设置表单字段的值
doc.export_form_data(path, format="fdf") 将表单数据导出为 FDF 或 XFDF 文件
doc.has_xfa() 检查文档是否包含 XFA 表单
doc.flatten_forms() 将所有表单字段展平到页面内容中
doc.flatten_forms_on_page(page) 展平指定页面上的表单字段

Python FormField 属性

属性 类型 说明
name str 字段名称
field_type str 字段类型(text、checkbox、radio、choice、signature)
value str | bool | None 当前字段值
is_required bool 是否为必填字段
is_readonly bool 是否为只读字段
max_length int | None 文本字段的最大长度

JavaScript API

方法 说明
doc.getFormFields() 获取所有表单字段
doc.getFormFieldValue(name) 按名称获取指定字段的值
doc.setFormFieldValue(name, value) 设置表单字段的值
doc.exportFormData(format?) 导出为 FDF(默认)或 XFDF,返回 Uint8Array
doc.hasXfa() 检查文档是否包含 XFA 表单
doc.flattenForms() 将所有表单字段展平到页面内容中
doc.flattenFormsOnPage(pageIndex) 展平指定页面上的表单字段

JavaScript FormField 属性

属性 类型 说明
name string 字段名称
fieldType string 字段类型
value string | boolean | null 当前值
flags number 字段标志位

Rust API

函数 说明
FormExtractor::extract_fields(doc) 从 AcroForm 字典中提取所有表单字段
FormExtractor::export_fdf(doc, fields) 导出为 FDF 字节数据
FormExtractor::export_xfdf(doc, fields) 导出为 XFDF 字符串
analyze_xfa_document(doc) 分析 XFA 表单结构
editor.get_form_fields() 通过 DocumentEditor 获取字段
editor.get_form_field_value(name) 按名称获取字段值
editor.set_form_field_value(name, value) 设置字段值

FormField 字段(Rust)

字段 类型 说明
name String 来自 /T 键的字段名称
full_name String 完全限定名称(点号分隔)
field_type FieldType Button、Text、Choice、Signature、Unknown
value FieldValue 当前字段值
tooltip Option<String> 来自 /TU 键的工具提示
bounds Option<[f64; 4]> 边界框 [x1, y1, x2, y2]
flags Option<u32> 字段标志(ReadOnly、Required、NoExport)
default_value Option<FieldValue> 来自 /DV 键的默认值
max_length Option<u32> 文本字段的最大长度

FieldType 变体

变体 说明
Button 复选框、单选按钮或按钮(/Btn
Text 单行或多行文本框(/Tx
Choice 列表框或下拉框(/Ch
Signature 数字签名字段(/Sig
Unknown(String) 无法识别的字段类型

FieldValue 变体

变体 说明
Text(String) 文本字符串值
Boolean(bool) 布尔值(复选框)
Name(String) 名称值(单选按钮、下拉字段)
Array(Vec<String>) 多个值(多选列表框)
None 无值

进阶:检查必填字段

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()

missing = [f for f in fields if f.is_required and not f.value]
if missing:
    print("Missing required fields:")
    for f in missing:
        print(f"  - {f.name}")

Rust

use pdf_oxide::extractors::{FormExtractor, FieldValue};
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;

let required_empty: Vec<_> = fields.iter()
    .filter(|f| {
        f.flags.map_or(false, |flags| flags & 0x02 != 0)
            && matches!(f.value, FieldValue::None | FieldValue::Text(ref s) if s.is_empty())
    })
    .collect();

if !required_empty.is_empty() {
    println!("Missing required fields:");
    for f in &required_empty {
        println!("  - {}", f.full_name);
    }
}

C++

auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
    if (field.required && field.value.empty()) {
        std::cout << "Missing required field: " << field.name << "\n";
    }
}

Swift

let doc = try Document.open("form.pdf")
for field in try doc.formFields() where field.required && field.value.isEmpty {
    print("Missing required field: \(field.name)")
}

Dart

final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
  if (field.required && field.value.isEmpty) {
    print('Missing required field: ${field.name}');
  }
}

R

doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
  if (field$required && field$value == "") {
    cat(sprintf("Missing required field: %s\n", field$name))
  }
}

Julia

doc = open_document("form.pdf")
for field in get_form_fields(doc)
    if field.required && isempty(field.value)
        println("Missing required field: $(field.name)")
    end
end

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const value = try fields.getValue(a, i);
    if (try fields.isRequired(i) and value.len == 0) {
        const name = try fields.getName(a, i);
        std.debug.print("Missing required field: {s}\n", .{name});
    }
}

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    if (field.required && field.value.length == 0) {
        NSLog(@"Missing required field: %@", field.name);
    }
}

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
fields
|> Enum.filter(fn f -> f.required and f.value in [nil, ""] end)
|> Enum.each(fn f -> IO.puts("Missing required field: #{f.name}") end)

相关页面